[pan: 3/7] 1) Uses long long for timestamp when reading articles files 2) Adds some protection against strange
- From: Dominique Dumont <ddumont src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pan: 3/7] 1) Uses long long for timestamp when reading articles files 2) Adds some protection against strange
- Date: Sat, 24 Sep 2022 10:02:24 +0000 (UTC)
commit 578d09381d239354e02a685a6bc8191ad784aeb3
Author: Thomas Tanner <thosrtanner googlemail com>
Date: Fri Sep 9 15:34:11 2022 +0100
1) Uses long long for timestamp when reading articles files
2) Adds some protection against strange group names
pan/data-impl/data-io.cc | 85 +++++++++++++++++++++++++++++++++++++++++++++---
pan/data-impl/headers.cc | 12 +++----
2 files changed, 86 insertions(+), 11 deletions(-)
---
diff --git a/pan/data-impl/data-io.cc b/pan/data-impl/data-io.cc
index 5ec1c33..5027ec7 100644
--- a/pan/data-impl/data-io.cc
+++ b/pan/data-impl/data-io.cc
@@ -17,28 +17,33 @@
*
*/
+#include "data-io.h"
+
#include <config.h>
#include <cerrno>
#include <cstdio>
-#include <map>
-#include <iostream>
#include <fstream>
+#include <istream>
+#include <map>
+#include <ostream>
+#include <regex>
+
extern "C" {
#include <sys/types.h> // for chmod
#include <sys/stat.h> // for chmod
#include <unistd.h>
}
+
#include <glib.h>
#include <glib/gi18n.h>
#include <pan/general/debug.h>
#include <pan/general/file-util.h>
#include <pan/general/line-reader.h>
#include <pan/general/log.h>
-#include "data-io.h"
#include <pan/general/null.h>
-using namespace pan;
+namespace pan {
namespace
{
@@ -71,10 +76,78 @@ namespace
return get_pan_home_file ("newsgroups.xov");
}
+ bool is_reserved_char(char c)
+ {
+#ifdef G_OS_WIN32
+ //Windows filing systems aren't case sensitive. But there are groups out
+ //there with upper case names... I can't help thinking some system should
+ //validate group names for sanity but apparently no such thing.
+ //You could get rid of this if you ensured people ran
+ //Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Windows-Subsystem-Linux
+ //fsutil.exe file SetCaseSensitiveInfo <path to groups> enable
+ //which for now I think is unlikely.
+ if ('A' <= c && c <= 'Z')
+ {
+ return true;
+ }
+#endif
+ switch (c)
+ {
+ case '%': //Reserve % as we use it to escape things.
+ case '/':
+#ifdef G_OS_WIN32
+ case '<':
+ case '>':
+ case ':':
+ case '"':
+ case '\\':
+ case '|':
+ case '?':
+ case '*':
+#endif
+ return true;
+
+ default:
+ return false;
+ }
+ }
+
+ char to_hex_char(char c)
+ {
+ return c < 10 ? c + '0' : c + 'A' - 10;
+ }
+
std::string get_group_headers_filename (const Quark& group)
{
const std::string home (file::get_pan_home());
- char * filename (g_build_filename (home.c_str(), "groups", group.c_str(), NULL));
+ //A note. We do a lot of work encoding the names here, because
+ //1) Windows is case-insensitive, and there are groups whose names differ
+ // only in case (uk.legaL, uk.Legal, uk.legal)
+ //2) There are certain names you can't use on windows and there is a group
+ // called con.politics
+ //3) There are groups with '/' in the group name...
+ std::string encoded_group;
+ for (auto c : group.to_string())
+ {
+ if (is_reserved_char(c))
+ {
+ encoded_group += "%";
+ encoded_group += to_hex_char((c >> 4) & 0xf);
+ encoded_group += to_hex_char(c & 0xf);
+ }
+ else
+ {
+ encoded_group += c;
+ }
+ }
+#ifdef G_OS_WIN32
+ static std::regex reserved("^(CON|PRN|AUX|NUL|COM[0-9]|LPT[0-9])\\.",
+ std::regex::extended | std::regex::icase);
+
+ encoded_group = std::regex_replace(encoded_group, reserved, "$1%2E");
+#endif
+
+ char * filename (g_build_filename (home.c_str(), "groups", encoded_group.c_str(), NULL));
char * dirname (g_path_get_dirname (filename));
file :: ensure_dir_exists (dirname);
std::string retval (filename);
@@ -288,3 +361,5 @@ DataIO :: write_done (std::ostream* out)
{
finalize_ostream (dynamic_cast<std::ofstream*>(out));
}
+
+}
diff --git a/pan/data-impl/headers.cc b/pan/data-impl/headers.cc
index f3723c7..0a63dc6 100644
--- a/pan/data-impl/headers.cc
+++ b/pan/data-impl/headers.cc
@@ -405,13 +405,13 @@ DataImpl :: load_part (const Quark & group,
namespace
{
- unsigned long view_to_ul (const StringView& view)
+ unsigned long long view_to_ull (const StringView& view)
{
- unsigned long val (0);
+ unsigned long long val (0ull);
if (!view.empty()) {
errno = 0;
- val = strtoul (view.str, 0, 10);
- if (errno) val = 0ul;
+ val = strtoull (view.str, 0, 10);
+ if (errno) val = 0ull;
}
return val;
}
@@ -522,7 +522,7 @@ DataImpl :: load_headers (const DataIO & data_io,
}
// date-posted line
- a.time_posted = view_to_ul (s);
+ a.time_posted = view_to_ull (s);
const int days_old ((now - a.time_posted) / (24*60*60));
// xref line
@@ -581,7 +581,7 @@ DataImpl :: load_headers (const DataIO & data_io,
s.pop_token (part_mid);
if (part_mid.len==1 && *part_mid.str=='"')
part_mid = a.message_id.to_view ();
- s.pop_token(tok); part_bytes = view_to_ul (tok);
+ s.pop_token(tok); part_bytes = view_to_ull (tok);
part_batch.add_part (number, part_mid, part_bytes);
if (s.pop_token(tok)) a.lines += atoi (tok.str); // this field was removed in 0.115
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]