[evince/wip/chpe/xmp: 20/29] libdocument: Move XMP parsing from backend/pdf to libdocument
- From: Germán Poo-Caamaño <gpoo src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evince/wip/chpe/xmp: 20/29] libdocument: Move XMP parsing from backend/pdf to libdocument
- Date: Fri, 7 Jan 2022 23:27:27 +0000 (UTC)
commit 53bb117becffd2a53125936e786c29f44d907b09
Author: Christian Persch <chpe src gnome org>
Date: Sat Dec 4 01:45:44 2021 +0100
libdocument: Move XMP parsing from backend/pdf to libdocument
In preparation to adding XMP support to DJVU.
https://gitlab.gnome.org/GNOME/evince/-/merge_requests/342
backend/pdf/ev-poppler.c | 464 +------------------------------------
libdocument/ev-document-info.c | 17 ++
libdocument/ev-document-info.h | 4 +-
libdocument/ev-xmp.c | 512 +++++++++++++++++++++++++++++++++++++++++
libdocument/ev-xmp.h | 32 +++
libdocument/meson.build | 3 +
po/POTFILES.in | 1 +
7 files changed, 569 insertions(+), 464 deletions(-)
---
diff --git a/backend/pdf/ev-poppler.c b/backend/pdf/ev-poppler.c
index f965c4c50..a0631e977 100644
--- a/backend/pdf/ev-poppler.c
+++ b/backend/pdf/ev-poppler.c
@@ -63,49 +63,10 @@
#include "ev-media.h"
#include "ev-file-helpers.h"
-#include <libxml/tree.h>
-#include <libxml/parser.h>
-#include <libxml/xpath.h>
-#include <libxml/xpathInternals.h>
-
#if (defined (HAVE_CAIRO_PDF) || defined (HAVE_CAIRO_PS))
#define HAVE_CAIRO_PRINT
#endif
-/* Fields for checking the license info suggested by Creative Commons
- * Main reference: http://wiki.creativecommons.org/XMP */
-
-/* fields from the XMP Rights Management Schema, XMP Specification Sept 2005, pag. 42 */
-#define LICENSE_MARKED "/rdf:RDF/rdf:Description/xmpRights:Marked"
-#define LICENSE_TEXT "/x:xmpmeta/rdf:RDF/rdf:Description/xmpRights:UsageTerms/rdf:Alt/rdf:li[lang('%s')]"
-#define LICENSE_WEB_STATEMENT "/rdf:RDF/rdf:Description/xmpRights:WebStatement"
-/* license field from Creative Commons schema, http://creativecommons.org/ns */
-#define LICENSE_URI "/rdf:RDF/rdf:Description/cc:license/@rdf:resource"
-
-/* alternative field from the Dublic Core Schema for checking the informal rights statement
- * as suggested by the Creative Commons template [1]. This field has been replaced or
- * complemented by its XMP counterpart [2].
- * References:
- * [1] http://wiki.creativecommons.org/XMP_help_for_Adobe_applications
- * [2] http://code.creativecommons.org/issues/issue505 */
-#define LICENSE_TEXT_ALT "/x:xmpmeta/rdf:RDF/rdf:Description/dc:rights/rdf:Alt/rdf:li[lang('%s')]"
-#define GET_LICENSE_TEXT(a) ( (a < 1) ? LICENSE_TEXT : LICENSE_TEXT_ALT )
-
-/* fields for authors and keywords */
-#define AUTHORS "/rdf:RDF/rdf:Description/dc:creator/rdf:Seq/rdf:li"
-#define KEYWORDS "/rdf:RDF/rdf:Description/dc:subject/rdf:Bag/rdf:li"
-/* fields for title and subject */
-#define TITLE "/rdf:RDF/rdf:Description/dc:title/rdf:Alt/rdf:li[lang('%s')]"
-#define SUBJECT "/rdf:RDF/rdf:Description/dc:description/rdf:Alt/rdf:li[lang('%s')]"
-/* fields for creation and modification dates */
-#define MOD_DATE "/rdf:RDF/rdf:Description/xmp:ModifyDate"
-#define CREATE_DATE "/rdf:RDF/rdf:Description/xmp:CreateDate"
-#define META_DATE "/rdf:RDF/rdf:Description/xmp:MetadataDate"
-/* fields for pdf creator tool and producer */
-#define CREATOR "/rdf:RDF/rdf:Description/xmp:CreatorTool"
-#define PRODUCER "/rdf:RDF/rdf:Description/pdf:Producer"
-
-
typedef struct {
EvFileExporterFormat format;
@@ -578,429 +539,6 @@ pdf_document_get_thumbnail_surface (EvDocument *document,
return surface;
}
-static xmlChar *
-pdf_document_get_xmptag_from_path (xmlXPathContextPtr xpathCtx,
- const char* xpath)
-{
- xmlXPathObjectPtr xpathObj;
- char *xmpmetapath = g_strdup_printf ("%s%s", "/x:xmpmeta", xpath);
- xmlChar *result = NULL;
-
- /* add pdf/a and pdf/x namespaces */
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "x", BAD_CAST "adobe:ns:meta/");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "rdf", BAD_CAST "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "pdfaid", BAD_CAST "http://www.aiim.org/pdfa/ns/id/");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "pdfxid", BAD_CAST "http://www.npes.org/pdfx/ns/id/");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "pdfx", BAD_CAST "http://ns.adobe.com/pdfx/1.3/");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "pdf", BAD_CAST "http://ns.adobe.com/pdf/1.3/");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "xmp", BAD_CAST "http://ns.adobe.com/xap/1.0/");
- /* XMP Rights Management Schema */
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "xmpRights", BAD_CAST "http://ns.adobe.com/xap/1.0/rights/");
- /* Creative Commons Schema */
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "cc", BAD_CAST "http://creativecommons.org/ns#");
-
- /* Try in /rdf:RDF/ */
- xpathObj = xmlXPathEvalExpression (BAD_CAST xpath, xpathCtx);
- if (xpathObj == NULL)
- return NULL;
-
- if (xpathObj->nodesetval != NULL && xpathObj->nodesetval->nodeNr != 0)
- result = xmlNodeGetContent (xpathObj->nodesetval->nodeTab[0]);
-
- xmlXPathFreeObject (xpathObj);
-
- if (result != NULL)
- return result;
-
- /*
- Try in /x:xmpmeta/ (xmpmeta is optional)
- https://wwwimages2.adobe.com/content/dam/acom/en/devnet/xmp/pdfs/XMP SDK Release
cc-2016-08/XMPSpecificationPart1.pdf (Section 7.3.3)
- */
- xpathObj = xmlXPathEvalExpression (BAD_CAST xmpmetapath, xpathCtx);
- if (xpathObj == NULL)
- return NULL;
-
- if (xpathObj->nodesetval != NULL && xpathObj->nodesetval->nodeNr != 0)
- result = xmlNodeGetContent (xpathObj->nodesetval->nodeTab[0]);
-
- xmlXPathFreeObject (xpathObj);
- g_free (xmpmetapath);
- return result;
-}
-
-/* reference:
-http://www.pdfa.org/lib/exe/fetch.php?id=pdfa%3Aen%3Atechdoc&cache=cache&media=pdfa:techdoc:tn0001_pdfa-1_and_namespaces_2008-03-18.pdf
*/
-static char *
-pdf_document_get_format_from_metadata (xmlXPathContextPtr xpathCtx)
-{
- xmlChar *part = NULL;
- xmlChar *conf = NULL;
- xmlChar *pdfxid = NULL;
- char *result = NULL;
- int i;
-
- /* reads pdf/a part */
- /* first syntax: child node */
- part = pdf_document_get_xmptag_from_path (xpathCtx, "/rdf:RDF/rdf:Description/pdfaid:part");
- if (part == NULL) {
- /* second syntax: attribute */
- part = pdf_document_get_xmptag_from_path (xpathCtx, "/rdf:RDF/rdf:Description/@pdfaid:part");
- }
-
- /* reads pdf/a conformance */
- /* first syntax: child node */
- conf = pdf_document_get_xmptag_from_path (xpathCtx, "/rdf:RDF/rdf:Description/pdfaid:conformance");
- if (conf == NULL) {
- /* second syntax: attribute */
- conf = pdf_document_get_xmptag_from_path (xpathCtx,
"/rdf:RDF/rdf:Description/@pdfaid:conformance");
- }
-
- /* reads pdf/x id */
- /* first syntax: pdfxid */
- pdfxid = pdf_document_get_xmptag_from_path (xpathCtx,
"/rdf:RDF/rdf:Description/pdfxid:GTS_PDFXVersion");
- if (pdfxid == NULL) {
- /* second syntax: pdfx */
- pdfxid = pdf_document_get_xmptag_from_path (xpathCtx,
"/rdf:RDF/rdf:Description/pdfx:GTS_PDFXVersion");
- }
-
- if (part != NULL && conf != NULL) {
- /* makes conf lowercase */
- for (i = 0; conf[i]; i++)
- conf[i] = g_ascii_tolower (conf[i]);
-
- /* return buffer */
- result = g_strdup_printf ("PDF/A - %s%s", part, conf);
- }
- else if (pdfxid != NULL) {
- result = g_strdup_printf ("%s", pdfxid);
- }
-
- /* Cleanup */
- xmlFree (part);
- xmlFree (conf);
- xmlFree (pdfxid);
- return result;
-}
-
-static char *
-pdf_document_get_lists_from_dc_tags (xmlXPathContextPtr xpathCtx,
- const char* xpath)
-{
- xmlXPathObjectPtr xpathObj;
- int i;
- char* elements = NULL;
- char* tmp_elements = NULL;
- char* result = NULL;
- xmlChar* content;
-
- /* add xmp namespaces */
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "x", BAD_CAST "adobe:ns:meta/");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "rdf", BAD_CAST "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "dc", BAD_CAST "http://purl.org/dc/elements/1.1/");
-
- /* reads pdf/a sequence*/
- xpathObj = xmlXPathEvalExpression (BAD_CAST xpath, xpathCtx);
- if (xpathObj == NULL)
- return NULL;
-
- if (xpathObj->nodesetval != NULL && xpathObj->nodesetval->nodeNr != 0) {
- for (i = 0; i < xpathObj->nodesetval->nodeNr; i++) {
- content = xmlNodeGetContent (xpathObj->nodesetval->nodeTab[i]);
- if (i) {
- tmp_elements = g_strdup (elements);
- g_free (elements);
- elements = g_strdup_printf ("%s, %s", tmp_elements, content);
- g_free (tmp_elements);
- } else {
- elements = g_strdup_printf ("%s", content);
- }
- xmlFree(content);
- }
- }
- xmlXPathFreeObject (xpathObj);
-
-
- if (elements != NULL) {
- /* return buffer */
- result = g_strdup (elements);
- }
-
- /* Cleanup */
- g_free (elements);
-
- return result;
-}
-
-static char *
-pdf_document_get_author_from_metadata (xmlXPathContextPtr xpathCtx)
-{
- char* result = NULL;
- char* xmpmetapath = g_strdup_printf ("%s%s", "/x:xmpmeta", AUTHORS);
- /* Try in /rdf:RDF/ */
- result = pdf_document_get_lists_from_dc_tags (xpathCtx, AUTHORS);
- if (result != NULL)
- return result;
-
- /* Try in /x:xmpmeta/ */
- result = pdf_document_get_lists_from_dc_tags (xpathCtx, xmpmetapath);
- g_free (xmpmetapath);
-
- return result;
-}
-
-static char *
-pdf_document_get_keywords_from_metadata (xmlXPathContextPtr xpathCtx)
-{
- char* result = NULL;
- char* xmpmetapath = g_strdup_printf ("%s%s", "/x:xmpmeta", KEYWORDS);
- /* Try in /rdf:RDF/ */
- result = pdf_document_get_lists_from_dc_tags (xpathCtx, KEYWORDS);
- if (result != NULL)
- return result;
-
- /* Try in /x:xmpmeta/ */
- result = pdf_document_get_lists_from_dc_tags (xpathCtx, xmpmetapath);
- g_free (xmpmetapath);
-
- return result;
-}
-
-__attribute__((__format__ (__printf__, 2, 0)))
-static char *
-pdf_document_get_localized_object_from_metadata (xmlXPathContextPtr xpathCtx,
- const char* xpath)
-{
- const char *language_string;
- char *aux;
- gchar **tags;
- gchar *tag, *tag_aux;
- int i, j;
- char *loc_object= NULL;
-
- /* register namespaces */
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "x", BAD_CAST "adobe:ns:meta/");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "rdf", BAD_CAST "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "dc", BAD_CAST "http://purl.org/dc/elements/1.1/");
- /* XMP Rights Management Schema */
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "xmpRights", BAD_CAST "http://ns.adobe.com/xap/1.0/rights/");
- /* Creative Commons Schema */
- xmlXPathRegisterNs (xpathCtx, BAD_CAST "cc", BAD_CAST "http://creativecommons.org/ns#");
-
- /* 1) checking for a suitable localized string */
- language_string = pango_language_to_string (gtk_get_default_language ());
- tags = g_strsplit (language_string, "-", -1);
- i = g_strv_length (tags);
- while (i-- && !loc_object) {
- tag = g_strdup (tags[0]);
- for (j = 1; j <= i; j++) {
- tag_aux = g_strdup_printf ("%s-%s", tag, tags[j]);
- g_free (tag);
- tag = tag_aux;
- }
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wformat-nonliteral"
- aux = g_strdup_printf (xpath, tag);
- #pragma GCC diagnostic pop
- loc_object = (gchar *)pdf_document_get_xmptag_from_path (xpathCtx, aux);
- g_free (tag);
- g_free (aux);
- }
- g_strfreev (tags);
-
- /* 2) if not, use the default string */
- if (!loc_object) {
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wformat-nonliteral"
- aux = g_strdup_printf (xpath, "x-default");
- #pragma GCC diagnostic pop
- loc_object = (gchar *)pdf_document_get_xmptag_from_path (xpathCtx, aux);
- g_free (aux);
- }
- return loc_object;
-}
-
-static char *
-pdf_document_get_title_from_metadata (xmlXPathContextPtr xpathCtx)
-{
- return pdf_document_get_localized_object_from_metadata (xpathCtx, TITLE);
-}
-
-static char *
-pdf_document_get_subject_from_metadata (xmlXPathContextPtr xpathCtx)
-{
- return pdf_document_get_localized_object_from_metadata (xpathCtx, SUBJECT);
-}
-
-static EvDocumentLicense *
-pdf_document_get_license_from_metadata (xmlXPathContextPtr xpathCtx)
-{
- xmlChar *marked = NULL;
- EvDocumentLicense *license;
-
- /* checking if the document has been marked as defined on the XMP Rights
- * Management Schema */
- marked = pdf_document_get_xmptag_from_path (xpathCtx, LICENSE_MARKED);
-
- /* a) Not marked => No XMP Rights information */
- if (!marked) {
- xmlFree (marked);
- return NULL;
- }
-
- license = ev_document_license_new ();
-
- /* b) Marked False => Public Domain, no copyrighted material and no
- * license needed */
- if (g_strrstr ((char *) marked, "False") != NULL) {
- license->text = g_strdup (_("This work is in the Public Domain"));
- /* c) Marked True => Copyrighted material */
- } else {
- /* Checking usage terms as defined by the XMP Rights Management
- * Schema. This field is recomended to be checked by Creative
- * Commons */
- /* 1) checking for a suitable localized string */
- int lt;
-
- for (lt = 0; !license->text && lt < 2; lt++)
- license->text = pdf_document_get_localized_object_from_metadata (xpathCtx,
- GET_LICENSE_TEXT
(lt));
-
- /* Checking the license URI as defined by the Creative Commons
- * Schema. This field is recomended to be checked by Creative
- * Commons */
- license->uri = (gchar *)pdf_document_get_xmptag_from_path (xpathCtx, LICENSE_URI);
-
- /* Checking the web statement as defined by the XMP Rights
- * Management Schema. Checking it out is a sort of above-and-beyond
- * the basic recommendations by Creative Commons. It can be
- * considered as a "reinforcement" approach to add certainty. */
- license->web_statement = (gchar *)pdf_document_get_xmptag_from_path (xpathCtx,
LICENSE_WEB_STATEMENT);
- }
- xmlFree (marked);
-
- if (!license->text && !license->uri && !license->web_statement) {
- ev_document_license_free (license);
- return NULL;
- }
-
- return license;
-}
-
-static void
-pdf_document_parse_metadata (const gchar *metadata,
- EvDocumentInfo *info)
-{
- xmlDocPtr doc;
- xmlXPathContextPtr xpathCtx;
- gchar *fmt;
- gchar *author;
- gchar *keywords;
- gchar *title;
- gchar *subject;
- gchar *creatortool;
- gchar *producer;
- gchar *modified_date;
- gchar *created_date;
- gchar *metadata_date;
- GDateTime *modified_datetime;
- GDateTime *metadata_datetime = NULL;
-
- doc = xmlParseMemory (metadata, strlen (metadata));
- if (doc == NULL)
- return; /* invalid xml metadata */
-
- xpathCtx = xmlXPathNewContext (doc);
- if (xpathCtx == NULL) {
- xmlFreeDoc (doc);
- return; /* invalid xpath context */
- }
-
- /* reads pdf metadata date */
- metadata_date = (gchar *)pdf_document_get_xmptag_from_path (xpathCtx, META_DATE);
- if (metadata_date != NULL) {
- metadata_datetime = g_date_time_new_from_iso8601 (metadata_date, NULL);
- g_free (metadata_date);
- }
-
- /* From PDF spec, if the PDF modified date is newer than metadata date,
- * it indicates that the file was edited by a non-XMP aware software.
- * Then, the information dictionary is considered authoritative and the
- * XMP metadata should not be displayed. */
- modified_datetime = ev_document_info_get_modified_datetime (info);
- if (modified_datetime == NULL ||
- metadata_datetime == NULL ||
- g_date_time_compare (metadata_datetime, modified_datetime) >= 0) {
-
- fmt = pdf_document_get_format_from_metadata (xpathCtx);
- if (fmt != NULL) {
- g_free (info->format);
- info->format = fmt;
- }
-
- author = pdf_document_get_author_from_metadata (xpathCtx);
- if (author != NULL) {
- g_free (info->author);
- info->author = author;
- }
-
- keywords = pdf_document_get_keywords_from_metadata (xpathCtx);
- if (keywords != NULL) {
- g_free (info->keywords);
- info->keywords = keywords;
- }
-
- title = pdf_document_get_title_from_metadata (xpathCtx);
- if (title != NULL) {
- g_free (info->title);
- info->title = title;
- }
-
- subject = pdf_document_get_subject_from_metadata (xpathCtx);
- if (subject != NULL) {
- g_free (info->subject);
- info->subject = subject;
- }
-
- creatortool = (char*)pdf_document_get_xmptag_from_path (xpathCtx, CREATOR);
- if (creatortool != NULL) {
- g_free (info->creator);
- info->creator = creatortool;
- }
-
- producer = (char*)pdf_document_get_xmptag_from_path (xpathCtx, PRODUCER);
- if (producer != NULL) {
- g_free (info->producer);
- info->producer = producer;
- }
-
- /* reads modify date */
- modified_date = (gchar *)pdf_document_get_xmptag_from_path (xpathCtx, MOD_DATE);
- if (modified_date != NULL) {
- GDateTime *datetime;
-
- datetime = g_date_time_new_from_iso8601 (modified_date, NULL);
- ev_document_info_take_modified_datetime (info, datetime);
- g_free (modified_date);
- }
-
- /* reads pdf create date */
- created_date = (gchar *)pdf_document_get_xmptag_from_path (xpathCtx, CREATE_DATE);
- if (created_date != NULL) {
- GDateTime *datetime;
-
- datetime = g_date_time_new_from_iso8601 (created_date, NULL);
- ev_document_info_take_created_datetime (info, datetime);
- g_free (created_date);
- }
- }
-
- info->license = pdf_document_get_license_from_metadata (xpathCtx);
-
- g_clear_pointer (&metadata_datetime, g_date_time_unref);
- xmlXPathFreeContext (xpathCtx);
- xmlFreeDoc (doc);
-}
-
static EvDocumentInfo *
pdf_document_get_info (EvDocument *document)
{
@@ -1056,7 +594,7 @@ pdf_document_get_info (EvDocument *document)
ev_document_info_take_modified_datetime (info, modified_datetime);
if (metadata != NULL) {
- pdf_document_parse_metadata (metadata, info);
+ ev_document_info_set_from_xmp (info, metadata);
g_free (metadata);
}
diff --git a/libdocument/ev-document-info.c b/libdocument/ev-document-info.c
index e17314a0c..5b2362069 100644
--- a/libdocument/ev-document-info.c
+++ b/libdocument/ev-document-info.c
@@ -25,6 +25,7 @@
#include <string.h>
#include "ev-document-info.h"
+#include "ev-xmp.h"
typedef struct _EvDocumentInfoExtended EvDocumentInfoExtended;
struct _EvDocumentInfoExtended {
@@ -223,6 +224,22 @@ ev_document_info_get_modified_datetime (const EvDocumentInfo *info)
return info_ex->modified_datetime;
}
+/*
+ * ev_document_info_set_from_xmp:
+ * @info: a #EvDocumentInfo
+ * @xmp: a string containing an XMP document
+ *
+ * Parses the XMP document and sets @info from it.
+ *
+ * Returns: %TRUE iff @xmp could be successfully parsed as a XMP document
+ */
+gboolean
+ev_document_info_set_from_xmp (EvDocumentInfo *info,
+ const char *xmp)
+{
+ return ev_xmp_parse (xmp, info);
+}
+
/* EvDocumentLicense */
G_DEFINE_BOXED_TYPE (EvDocumentLicense, ev_document_license, ev_document_license_copy,
ev_document_license_free)
diff --git a/libdocument/ev-document-info.h b/libdocument/ev-document-info.h
index 732d92627..a2a93127d 100644
--- a/libdocument/ev-document-info.h
+++ b/libdocument/ev-document-info.h
@@ -1,4 +1,3 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; c-indent-level: 8 -*- */
/*
* Copyright (C) 2000-2003 Marco Pesenti Gritti
* Copyright © 2021 Christian Persch
@@ -162,6 +161,9 @@ void ev_document_info_take_created_datetime (EvDocumentInfo *info,
EV_PRIVATE
void ev_document_info_take_modified_datetime (EvDocumentInfo *info,
GDateTime *datetime);
+EV_PRIVATE
+gboolean ev_document_info_set_from_xmp (EvDocumentInfo *info,
+ const char *xmp);
/* EvDocumentLicense */
#define EV_TYPE_DOCUMENT_LICENSE (ev_document_license_get_type())
diff --git a/libdocument/ev-xmp.c b/libdocument/ev-xmp.c
new file mode 100644
index 000000000..20d5cfd79
--- /dev/null
+++ b/libdocument/ev-xmp.c
@@ -0,0 +1,512 @@
+/*
+ * Copyright (C) 2018, Evangelos Rigas <erigas rnd2 org>
+ * Copyright (C) 2009, Juanjo Marín <juanj marin juntadeandalucia es>
+ * Copyright (C) 2004, Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include "ev-xmp.h"
+
+#include <glib/gi18n-lib.h>
+#include <pango/pango.h>
+
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+
+/* Fields for checking the license info suggested by Creative Commons
+ * Main reference: http://wiki.creativecommons.org/XMP
+ */
+
+/* fields from the XMP Rights Management Schema, XMP Specification Sept 2005, pag. 42 */
+#define LICENSE_MARKED "/rdf:RDF/rdf:Description/xmpRights:Marked"
+#define LICENSE_TEXT "/x:xmpmeta/rdf:RDF/rdf:Description/xmpRights:UsageTerms/rdf:Alt/rdf:li[lang('%s')]"
+#define LICENSE_WEB_STATEMENT "/rdf:RDF/rdf:Description/xmpRights:WebStatement"
+/* license field from Creative Commons schema, http://creativecommons.org/ns */
+#define LICENSE_URI "/rdf:RDF/rdf:Description/cc:license/@rdf:resource"
+
+/* alternative field from the Dublic Core Schema for checking the informal rights statement
+ * as suggested by the Creative Commons template [1]. This field has been replaced or
+ * complemented by its XMP counterpart [2].
+ * References:
+ * [1] http://wiki.creativecommons.org/XMP_help_for_Adobe_applications
+ * [2] http://code.creativecommons.org/issues/issue505
+ */
+#define LICENSE_TEXT_ALT "/x:xmpmeta/rdf:RDF/rdf:Description/dc:rights/rdf:Alt/rdf:li[lang('%s')]"
+#define GET_LICENSE_TEXT(a) ( (a < 1) ? LICENSE_TEXT : LICENSE_TEXT_ALT )
+
+/* fields for authors and keywords */
+#define AUTHORS "/rdf:RDF/rdf:Description/dc:creator/rdf:Seq/rdf:li"
+#define KEYWORDS "/rdf:RDF/rdf:Description/dc:subject/rdf:Bag/rdf:li"
+/* fields for title and subject */
+#define TITLE "/rdf:RDF/rdf:Description/dc:title/rdf:Alt/rdf:li[lang('%s')]"
+#define SUBJECT "/rdf:RDF/rdf:Description/dc:description/rdf:Alt/rdf:li[lang('%s')]"
+/* fields for creation and modification dates */
+#define MOD_DATE "/rdf:RDF/rdf:Description/xmp:ModifyDate"
+#define CREATE_DATE "/rdf:RDF/rdf:Description/xmp:CreateDate"
+#define META_DATE "/rdf:RDF/rdf:Description/xmp:MetadataDate"
+/* fields for pdf creator tool and producer */
+#define CREATOR "/rdf:RDF/rdf:Description/xmp:CreatorTool"
+#define PRODUCER "/rdf:RDF/rdf:Description/pdf:Producer"
+
+static xmlChar *
+xmp_get_tag_from_xpath (xmlXPathContextPtr xpathCtx,
+ const char* xpath)
+{
+ xmlXPathObjectPtr xpathObj;
+ xmlChar *result = NULL;
+ char *xmpmetapath;
+
+ xmpmetapath = g_strdup_printf ("%s%s", "/x:xmpmeta", xpath);
+
+ /* add pdf/a and pdf/x namespaces */
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "x", BAD_CAST "adobe:ns:meta/");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "rdf", BAD_CAST
"http://www.w3.org/1999/02/22-rdf-syntax-ns#");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "pdfaid", BAD_CAST "http://www.aiim.org/pdfa/ns/id/");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "pdfxid", BAD_CAST "http://www.npes.org/pdfx/ns/id/");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "pdfx", BAD_CAST "http://ns.adobe.com/pdfx/1.3/");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "pdf", BAD_CAST "http://ns.adobe.com/pdf/1.3/");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "xmp", BAD_CAST "http://ns.adobe.com/xap/1.0/");
+ /* XMP Rights Management Schema */
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "xmpRights", BAD_CAST "http://ns.adobe.com/xap/1.0/rights/");
+ /* Creative Commons Schema */
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "cc", BAD_CAST "http://creativecommons.org/ns#");
+
+ /* Try in /rdf:RDF/ */
+ xpathObj = xmlXPathEvalExpression (BAD_CAST xpath, xpathCtx);
+ if (xpathObj == NULL)
+ return NULL;
+
+ if (xpathObj->nodesetval != NULL && xpathObj->nodesetval->nodeNr != 0)
+ result = xmlNodeGetContent (xpathObj->nodesetval->nodeTab[0]);
+
+ xmlXPathFreeObject (xpathObj);
+
+ if (result != NULL)
+ return result;
+
+ /*
+ Try in /x:xmpmeta/ (xmpmeta is optional)
+ https://wwwimages2.adobe.com/content/dam/acom/en/devnet/xmp/pdfs/XMP SDK Release
cc-2016-08/XMPSpecificationPart1.pdf (Section 7.3.3)
+ */
+ xpathObj = xmlXPathEvalExpression (BAD_CAST xmpmetapath, xpathCtx);
+ if (xpathObj == NULL)
+ return NULL;
+
+ if (xpathObj->nodesetval != NULL && xpathObj->nodesetval->nodeNr != 0)
+ result = xmlNodeGetContent (xpathObj->nodesetval->nodeTab[0]);
+
+ xmlXPathFreeObject (xpathObj);
+ g_free (xmpmetapath);
+ return result;
+}
+
+/* Reference:
+ *
http://www.pdfa.org/lib/exe/fetch.php?id=pdfa%3Aen%3Atechdoc&cache=cache&media=pdfa:techdoc:tn0001_pdfa-1_and_namespaces_2008-03-18.pdf
+ */
+static char *
+xmp_get_pdf_format (xmlXPathContextPtr xpathCtx)
+{
+ xmlChar *part = NULL;
+ xmlChar *conf = NULL;
+ xmlChar *pdfxid = NULL;
+ char *result = NULL;
+ int i;
+
+ /* reads pdf/a part */
+ /* first syntax: child node */
+ part = xmp_get_tag_from_xpath (xpathCtx, "/rdf:RDF/rdf:Description/pdfaid:part");
+ if (part == NULL) {
+ /* second syntax: attribute */
+ part = xmp_get_tag_from_xpath (xpathCtx, "/rdf:RDF/rdf:Description/@pdfaid:part");
+ }
+
+ /* reads pdf/a conformance */
+ /* first syntax: child node */
+ conf = xmp_get_tag_from_xpath (xpathCtx, "/rdf:RDF/rdf:Description/pdfaid:conformance");
+ if (conf == NULL) {
+ /* second syntax: attribute */
+ conf = xmp_get_tag_from_xpath (xpathCtx, "/rdf:RDF/rdf:Description/@pdfaid:conformance");
+ }
+
+ /* reads pdf/x id */
+ /* first syntax: pdfxid */
+ pdfxid = xmp_get_tag_from_xpath (xpathCtx, "/rdf:RDF/rdf:Description/pdfxid:GTS_PDFXVersion");
+ if (pdfxid == NULL) {
+ /* second syntax: pdfx */
+ pdfxid = xmp_get_tag_from_xpath (xpathCtx, "/rdf:RDF/rdf:Description/pdfx:GTS_PDFXVersion");
+ }
+
+ if (part != NULL && conf != NULL) {
+ /* makes conf lowercase */
+ for (i = 0; conf[i]; i++)
+ conf[i] = g_ascii_tolower (conf[i]);
+
+ /* return buffer */
+ result = g_strdup_printf ("PDF/A - %s%s", part, conf);
+ }
+ else if (pdfxid != NULL) {
+ result = g_strdup_printf ("%s", pdfxid);
+ }
+
+ /* Cleanup */
+ xmlFree (part);
+ xmlFree (conf);
+ xmlFree (pdfxid);
+ return result;
+}
+
+static char *
+xmp_get_lists_from_dc_tags (xmlXPathContextPtr xpathCtx,
+ const char* xpath)
+{
+ xmlXPathObjectPtr xpathObj;
+ int i;
+ char* elements = NULL;
+ char* tmp_elements = NULL;
+ char* result = NULL;
+ xmlChar* content;
+
+ /* add xmp namespaces */
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "x", BAD_CAST "adobe:ns:meta/");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "rdf", BAD_CAST
"http://www.w3.org/1999/02/22-rdf-syntax-ns#");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "dc", BAD_CAST "http://purl.org/dc/elements/1.1/");
+
+ /* reads pdf/a sequence*/
+ xpathObj = xmlXPathEvalExpression (BAD_CAST xpath, xpathCtx);
+ if (xpathObj == NULL)
+ return NULL;
+
+ if (xpathObj->nodesetval != NULL && xpathObj->nodesetval->nodeNr != 0) {
+ for (i = 0; i < xpathObj->nodesetval->nodeNr; i++) {
+ content = xmlNodeGetContent (xpathObj->nodesetval->nodeTab[i]);
+ if (i) {
+ tmp_elements = g_strdup (elements);
+ g_free (elements);
+ elements = g_strdup_printf ("%s, %s", tmp_elements, content);
+ g_free (tmp_elements);
+ } else {
+ elements = g_strdup_printf ("%s", content);
+ }
+ xmlFree(content);
+ }
+ }
+ xmlXPathFreeObject (xpathObj);
+
+
+ if (elements != NULL) {
+ /* return buffer */
+ result = g_strdup (elements);
+ }
+
+ /* Cleanup */
+ g_free (elements);
+
+ return result;
+}
+
+static char *
+xmp_get_author (xmlXPathContextPtr xpathCtx)
+{
+ char* result = NULL;
+ char* xmpmetapath;
+
+ xmpmetapath = g_strdup_printf ("%s%s", "/x:xmpmeta", AUTHORS);
+
+ /* Try in /rdf:RDF/ */
+ result = xmp_get_lists_from_dc_tags (xpathCtx, AUTHORS);
+ if (result != NULL) {
+ g_free (xmpmetapath);
+ return result;
+ }
+
+ /* Try in /x:xmpmeta/ */
+ result = xmp_get_lists_from_dc_tags (xpathCtx, xmpmetapath);
+ g_free (xmpmetapath);
+
+ return result;
+}
+
+static char *
+xmp_get_keywords (xmlXPathContextPtr xpathCtx)
+{
+ char* result = NULL;
+ char* xmpmetapath;
+
+ xmpmetapath = g_strdup_printf ("%s%s", "/x:xmpmeta", KEYWORDS);
+
+ /* Try in /rdf:RDF/ */
+ result = xmp_get_lists_from_dc_tags (xpathCtx, KEYWORDS);
+ if (result != NULL) {
+ g_free (xmpmetapath);
+ return result;
+ }
+
+ /* Try in /x:xmpmeta/ */
+ result = xmp_get_lists_from_dc_tags (xpathCtx, xmpmetapath);
+ g_free (xmpmetapath);
+
+ return result;
+}
+
+static G_GNUC_FORMAT (2) char *
+xmp_get_localized_object_from_xpath_format (xmlXPathContextPtr xpathCtx,
+ const char* xpath_format)
+{
+ const char *language_string;
+ char *aux;
+ gchar **tags;
+ gchar *tag, *tag_aux;
+ int i, j;
+ char *loc_object= NULL;
+
+ /* register namespaces */
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "x", BAD_CAST "adobe:ns:meta/");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "rdf", BAD_CAST
"http://www.w3.org/1999/02/22-rdf-syntax-ns#");
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "dc", BAD_CAST "http://purl.org/dc/elements/1.1/");
+ /* XMP Rights Management Schema */
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "xmpRights", BAD_CAST "http://ns.adobe.com/xap/1.0/rights/");
+ /* Creative Commons Schema */
+ xmlXPathRegisterNs (xpathCtx, BAD_CAST "cc", BAD_CAST "http://creativecommons.org/ns#");
+
+ /* 1) checking for a suitable localized string */
+ language_string = pango_language_to_string (pango_language_get_default ());
+ tags = g_strsplit (language_string, "-", -1);
+ i = g_strv_length (tags);
+ while (i-- && !loc_object) {
+ tag = g_strdup (tags[0]);
+ for (j = 1; j <= i; j++) {
+ tag_aux = g_strdup_printf ("%s-%s", tag, tags[j]);
+ g_free (tag);
+ tag = tag_aux;
+ }
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+ aux = g_strdup_printf (xpath_format, tag);
+#pragma GCC diagnostic pop
+ loc_object = (gchar *)xmp_get_tag_from_xpath (xpathCtx, aux);
+ g_free (tag);
+ g_free (aux);
+ }
+ g_strfreev (tags);
+
+ /* 2) if not, use the default string */
+ if (!loc_object) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+ aux = g_strdup_printf (xpath_format, "x-default");
+#pragma GCC diagnostic pop
+ loc_object = (gchar *)xmp_get_tag_from_xpath (xpathCtx, aux);
+ g_free (aux);
+ }
+ return loc_object;
+}
+
+static char *
+xmp_get_title (xmlXPathContextPtr xpathCtx)
+{
+ return xmp_get_localized_object_from_xpath_format (xpathCtx, TITLE);
+}
+
+static char *
+xmp_get_subject (xmlXPathContextPtr xpathCtx)
+{
+ return xmp_get_localized_object_from_xpath_format (xpathCtx, SUBJECT);
+}
+
+static EvDocumentLicense *
+xmp_get_license (xmlXPathContextPtr xpathCtx)
+{
+ xmlChar *marked = NULL;
+ EvDocumentLicense *license;
+
+ /* checking if the document has been marked as defined on the XMP Rights
+ * Management Schema */
+ marked = xmp_get_tag_from_xpath (xpathCtx, LICENSE_MARKED);
+
+ /* a) Not marked => No XMP Rights information */
+ if (!marked) {
+ xmlFree (marked);
+ return NULL;
+ }
+
+ license = ev_document_license_new ();
+
+ /* b) Marked False => Public Domain, no copyrighted material and no
+ * license needed */
+ if (g_strrstr ((char *) marked, "False") != NULL) {
+ license->text = g_strdup (_("This work is in the Public Domain"));
+ /* c) Marked True => Copyrighted material */
+ } else {
+ /* Checking usage terms as defined by the XMP Rights Management
+ * Schema. This field is recomended to be checked by Creative
+ * Commons */
+ /* 1) checking for a suitable localized string */
+ int lt;
+
+ for (lt = 0; !license->text && lt < 2; lt++)
+ license->text = xmp_get_localized_object_from_xpath_format (xpathCtx,
+ GET_LICENSE_TEXT (lt));
+
+ /* Checking the license URI as defined by the Creative Commons
+ * Schema. This field is recomended to be checked by Creative
+ * Commons */
+ license->uri = (gchar *)xmp_get_tag_from_xpath (xpathCtx, LICENSE_URI);
+
+ /* Checking the web statement as defined by the XMP Rights
+ * Management Schema. Checking it out is a sort of above-and-beyond
+ * the basic recommendations by Creative Commons. It can be
+ * considered as a "reinforcement" approach to add certainty. */
+ license->web_statement = (gchar *)xmp_get_tag_from_xpath (xpathCtx, LICENSE_WEB_STATEMENT);
+ }
+ xmlFree (marked);
+
+ if (!license->text && !license->uri && !license->web_statement) {
+ ev_document_license_free (license);
+ return NULL;
+ }
+
+ return license;
+}
+
+/*
+ * ev_xmp_parse:
+ * @metadata: a XMP document as a string
+ * @info: a #EvDocumentInfo
+ *
+ * Returns: %TRUE iff @metadata could be successfully parsed
+ */
+gboolean
+ev_xmp_parse (const gchar *metadata,
+ EvDocumentInfo *info)
+{
+ xmlDocPtr doc;
+ xmlXPathContextPtr xpathCtx;
+ gchar *fmt;
+ gchar *author;
+ gchar *keywords;
+ gchar *title;
+ gchar *subject;
+ gchar *creatortool;
+ gchar *producer;
+ gchar *modified_date;
+ gchar *created_date;
+ gchar *metadata_date;
+ GDateTime *modified_datetime;
+ GDateTime *metadata_datetime = NULL;
+
+ doc = xmlParseMemory (metadata, strlen (metadata));
+ if (doc == NULL)
+ return FALSE; /* invalid xml metadata */
+
+ xpathCtx = xmlXPathNewContext (doc);
+ if (xpathCtx == NULL) {
+ xmlFreeDoc (doc);
+ return FALSE; /* invalid xpath context */
+ }
+
+ /* reads pdf metadata date */
+ metadata_date = (gchar *)xmp_get_tag_from_xpath (xpathCtx, META_DATE);
+ if (metadata_date != NULL) {
+ metadata_datetime = g_date_time_new_from_iso8601 (metadata_date, NULL);
+ g_free (metadata_date);
+ }
+
+ /* From PDF spec, if the PDF modified date is newer than metadata date,
+ * it indicates that the file was edited by a non-XMP aware software.
+ * Then, the information dictionary is considered authoritative and the
+ * XMP metadata should not be displayed. */
+ modified_datetime = ev_document_info_get_modified_datetime (info);
+ if (modified_datetime == NULL ||
+ metadata_datetime == NULL ||
+ g_date_time_compare (metadata_datetime, modified_datetime) >= 0) {
+
+ fmt = xmp_get_pdf_format (xpathCtx);
+ if (fmt != NULL) {
+ g_free (info->format);
+ info->format = fmt;
+ }
+
+ author = xmp_get_author (xpathCtx);
+ if (author != NULL) {
+ g_free (info->author);
+ info->author = author;
+ }
+
+ keywords = xmp_get_keywords (xpathCtx);
+ if (keywords != NULL) {
+ g_free (info->keywords);
+ info->keywords = keywords;
+ }
+
+ title = xmp_get_title (xpathCtx);
+ if (title != NULL) {
+ g_free (info->title);
+ info->title = title;
+ }
+
+ subject = xmp_get_subject (xpathCtx);
+ if (subject != NULL) {
+ g_free (info->subject);
+ info->subject = subject;
+ }
+
+ creatortool = (char*)xmp_get_tag_from_xpath (xpathCtx, CREATOR);
+ if (creatortool != NULL) {
+ g_free (info->creator);
+ info->creator = creatortool;
+ }
+
+ producer = (char*)xmp_get_tag_from_xpath (xpathCtx, PRODUCER);
+ if (producer != NULL) {
+ g_free (info->producer);
+ info->producer = producer;
+ }
+
+ /* reads modify date */
+ modified_date = (gchar *)xmp_get_tag_from_xpath (xpathCtx, MOD_DATE);
+ if (modified_date != NULL) {
+ GDateTime *datetime;
+
+ datetime = g_date_time_new_from_iso8601 (modified_date, NULL);
+ ev_document_info_take_modified_datetime (info, datetime);
+ g_free (modified_date);
+ }
+
+ /* reads pdf create date */
+ created_date = (gchar *)xmp_get_tag_from_xpath (xpathCtx, CREATE_DATE);
+ if (created_date != NULL) {
+ GDateTime *datetime;
+
+ datetime = g_date_time_new_from_iso8601 (created_date, NULL);
+ ev_document_info_take_created_datetime (info, datetime);
+ g_free (created_date);
+ }
+ }
+
+ info->license = xmp_get_license (xpathCtx);
+
+ g_clear_pointer (&metadata_datetime, g_date_time_unref);
+ xmlXPathFreeContext (xpathCtx);
+ xmlFreeDoc (doc);
+
+ return TRUE;
+}
diff --git a/libdocument/ev-xmp.h b/libdocument/ev-xmp.h
new file mode 100644
index 000000000..ae59ba14c
--- /dev/null
+++ b/libdocument/ev-xmp.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright © 2021 Christian Persch
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#pragma once
+
+#if !defined (EVINCE_COMPILATION)
+#error "This is a private header."
+#endif
+
+#include "ev-document-info.h"
+
+G_BEGIN_DECLS
+
+gboolean ev_xmp_parse (const char *xmp,
+ EvDocumentInfo *info);
+
+G_END_DECLS
diff --git a/libdocument/meson.build b/libdocument/meson.build
index 11143f788..2f04fa1dd 100644
--- a/libdocument/meson.build
+++ b/libdocument/meson.build
@@ -86,6 +86,8 @@ sources = files(
'ev-render-context.c',
'ev-selection.c',
'ev-transition-effect.c',
+ 'ev-xmp.c',
+ 'ev-xmp.h',
)
version_conf = configuration_data()
@@ -124,6 +126,7 @@ common_deps = [
deps = common_deps + [
gmodule_dep,
gmodule_no_export_dep,
+ libxml_dep,
m_dep,
synctex_dep,
zlib_dep,
diff --git a/po/POTFILES.in b/po/POTFILES.in
index a6b5bf409..11bacb347 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -26,6 +26,7 @@ data/org.gnome.Evince-previewer.desktop.in.in
libdocument/ev-attachment.c
libdocument/ev-document-factory.c
libdocument/ev-file-helpers.c
+libdocument/ev-xmp.c
libmisc/ev-page-action-widget.c
libmisc/ev-search-box.c
libview/ev-jobs.c
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]