[tracker/rss-enclosures] Fixes GB#616165: Improved reading PS and PS-GZ files
- From: Roberto Guido <rguido src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/rss-enclosures] Fixes GB#616165: Improved reading PS and PS-GZ files
- Date: Mon, 3 May 2010 00:41:33 +0000 (UTC)
commit 0aaca9c1287d17d468520052369a32b9dda6258f
Author: Aleksander Morgado <aleksander lanedo com>
Date: Mon Apr 19 14:10:42 2010 +0200
Fixes GB#616165: Improved reading PS and PS-GZ files
* Don't use a temporary file for the uncompressed ps-gz file contents
* Re-use the same buffer when iterating each line in the ps extractor, no need
to allocate a new one each time.
* Stop reading the uncompressed PS file if reached a max limit (same previous
limit of 20MBytes is used, just a safe one)
src/tracker-extract/tracker-extract-ps.c | 158 +++++++++++++----------------
1 files changed, 71 insertions(+), 87 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-ps.c b/src/tracker-extract/tracker-extract-ps.c
index 5541d0e..7ec62c0 100644
--- a/src/tracker-extract/tracker-extract-ps.c
+++ b/src/tracker-extract/tracker-extract-ps.c
@@ -204,23 +204,15 @@ date_to_iso8601 (const gchar *date)
}
static void
-extract_ps (const gchar *uri,
- TrackerSparqlBuilder *preupdate,
- TrackerSparqlBuilder *metadata)
+extract_ps_from_filestream (FILE *f,
+ TrackerSparqlBuilder *preupdate,
+ TrackerSparqlBuilder *metadata)
{
- FILE *f;
- gchar *filename;
gchar *line;
gsize length;
gssize read_char;
-
- filename = g_filename_from_uri (uri, NULL, NULL);
- f = tracker_file_open (filename, "r", TRUE);
- g_free (filename);
-
- if (!f) {
- return;
- }
+ gsize accum;
+ gsize max_bytes;
line = NULL;
length = 0;
@@ -228,10 +220,27 @@ extract_ps (const gchar *uri,
tracker_sparql_builder_predicate (metadata, "a");
tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument");
- while ((read_char = getline (&line, &length, f)) != -1) {
+ /* 20 MiB should be enough! (original safe limit) */
+ accum = 0;
+ max_bytes = 20u << 20;
+
+ /* Reuse the same buffer for all lines. Must be dynamically allocated with
+ * malloc family methods as getline() may re-size it with realloc() */
+ length = 1024;
+ line = g_malloc (length);
+
+ /* Halt the whole when one of these conditions is met:
+ * a) Reached max bytes to read
+ * b) No more lines to read
+ */
+ while ((accum < max_bytes) &&
+ (read_char = getline (&line, &length, f)) != -1) {
gboolean pageno_atend = FALSE;
gboolean header_finished = FALSE;
+ /* Update accumulated bytes read */
+ accum += read_char;
+
line[read_char - 1] = '\0'; /* overwrite '\n' char */
if (!header_finished && strncmp (line, "%%Copyright:", 12) == 0) {
@@ -274,15 +283,35 @@ extract_ps (const gchar *uri,
break;
}
}
-
- g_free (line);
- line = NULL;
- length = 0;
}
+ /* Deallocate the buffer */
if (line) {
g_free (line);
}
+}
+
+
+
+static void
+extract_ps (const gchar *uri,
+ TrackerSparqlBuilder *preupdate,
+ TrackerSparqlBuilder *metadata)
+{
+ FILE *f;
+ gchar *filename;
+
+ filename = g_filename_from_uri (uri, NULL, NULL);
+ f = tracker_file_open (filename, "r", TRUE);
+ g_free (filename);
+
+ if (!f) {
+ return;
+ }
+
+ /* Extract from filestream! */
+ g_debug ("Extracting PS '%s'...", uri);
+ extract_ps_from_filestream (f, preupdate, metadata);
tracker_file_close (f, FALSE);
}
@@ -294,23 +323,11 @@ extract_ps_gz (const gchar *uri,
TrackerSparqlBuilder *preupdate,
TrackerSparqlBuilder *metadata)
{
- FILE *fz, *f;
- GError *error = NULL;
- gchar *gunzipped;
+ FILE *fz;
gint fdz;
- gint fd;
- gboolean ptat;
const gchar *argv[4];
gchar *filename;
-
- fd = g_file_open_tmp ("tracker-extract-ps-gunzipped.XXXXXX",
- &gunzipped,
- &error);
-
- if (error) {
- g_error_free (error);
- return;
- }
+ GError *error = NULL;
filename = g_filename_from_uri (uri, NULL, NULL);
@@ -321,68 +338,35 @@ extract_ps_gz (const gchar *uri,
argv[2] = filename;
argv[3] = NULL;
- ptat = g_spawn_async_with_pipes (g_get_tmp_dir (),
- (gchar **) argv,
- NULL,
- G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL,
- tracker_spawn_child_func,
- GINT_TO_POINTER (10),
- NULL,
- NULL,
- &fdz,
- NULL,
- &error);
-
- if (!ptat) {
- g_free (filename);
- g_unlink (gunzipped);
+ /* Fork & spawn to gunzip the file */
+ if (!g_spawn_async_with_pipes (g_get_tmp_dir (),
+ (gchar **) argv,
+ NULL,
+ G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL,
+ tracker_spawn_child_func,
+ GINT_TO_POINTER (10),
+ NULL,
+ NULL,
+ &fdz,
+ NULL,
+ &error)) {
+ g_warning ("Couldn't fork & spawn to gunzip '%s': %s",
+ uri, error ? error->message : NULL);
g_clear_error (&error);
- close (fd);
- return;
}
-
- fz = fdopen (fdz, "r");
-
- if (!fz) {
- g_unlink (gunzipped);
+ /* Get FILE from FD */
+ else if ((fz = fdopen (fdz, "r")) == NULL) {
+ g_warning ("Couldn't open FILE from FD (%s)...", uri);
close (fdz);
- close (fd);
- return;
- }
-
- f = fdopen (fd, "w");
-
- if (!f) {
- g_unlink (gunzipped);
- fclose (fz);
- close (fd);
- return;
}
-
- if (f && fz) {
- unsigned char buf[8192];
- size_t w, b, accum;
- size_t max;
-
- /* 20 MiB should be enough! */
- accum = 0;
- max = 20u << 20;
-
- while ((b = fread (buf, 1, 8192, fz)) && accum <= max) {
- accum += b;
- w = 0;
-
- while (w < b) {
- w += fwrite (buf, 1, b, f);
- }
- }
-
+ /* Extract from filestream! */
+ else
+ {
+ g_debug ("Extracting compressed PS '%s'...", uri);
+ extract_ps_from_filestream (fz, preupdate, metadata);
fclose (fz);
- fclose (f);
}
- extract_ps (gunzipped, preupdate, metadata);
- g_unlink (gunzipped);
g_free (filename);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]