[gnome-maps/wip/mlundblad/wikidata: 1/3] WIP: wikipedia: Add function to fetch article from Wikidata




commit 4b3f5faf2c1a432a42cf7443ce0152d7e7d2c9ec
Author: Marcus Lundblad <ml dfupdate se>
Date:   Tue Oct 4 23:23:49 2022 +0200

    WIP: wikipedia: Add function to fetch article from Wikidata

 src/wikipedia.js | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 92 insertions(+), 1 deletion(-)
---
diff --git a/src/wikipedia.js b/src/wikipedia.js
index 80ad3a7d..e1d312d0 100644
--- a/src/wikipedia.js
+++ b/src/wikipedia.js
@@ -43,6 +43,7 @@ function _getSoupSession() {
 
 let _thumbnailCache = {};
 let _metadataCache = {};
+let _wikidataCache = {};
 
 export function getLanguage(wiki) {
     return wiki.split(':')[0];
@@ -155,6 +156,92 @@ export function fetchArticleInfo(wiki, size, metadataCb, thumbnailCb) {
     });
 }
 
+export function fetchArticleInfoForWikidata(wikidata, defaultArticle,
+                                            size, metadataCb, thumbnailCb) {
+    let cachedWikidata = _wikidataCache[wikidata];
+
+    if (cachedWikidata) {
+        _onWikidataFetched(wikidata, defaultArticle, size, metadataCb,
+                           thumbnailCb);
+        return;
+    }
+
+    let uri = 'https://www.wikidata.org/w/api.php';
+    let encodedForm = Soup.form_encode_hash({ action: 'wbgetentities',
+                                              ids:    wikidata,
+                                              format: 'json' });
+    let msg = Soup.Message.new_from_encoded_form('GET', uri, encodedForm);
+    let session = _getSoupSession();
+
+    session.send_and_read_async(msg, GLib.PRIORIRY_DEFAULT, null,
+                                     (source, res) => {
+        if (msg.get_status() !== Soup.Status.OK) {
+            log("Failed to request Wikidata entities: " + msg.reason_phrase);
+            metadataCb(null, {});
+            thumbnailCb(null);
+            return;
+        }
+
+        let buffer = session.send_and_read_finish(res).get_data();
+        let response = JSON.parse(Utils.getBufferText(buffer));
+
+        Utils.debug('entities: ' + JSON.stringify(response, '', 2));
+        _wikidataCache[wikidata] = response;
+        _onWikidataFetched(wikidata, defaultArticle, response, size,
+                           metadataCb, thumbnailCb);
+    });
+}
+
+function _onWikidataFetched(wikidata, defaultArticle, response, size,
+                            metadataCb, thumbnailCb) {
+    let sitelinks = response?.entities?.[wikidata]?.sitelinks;
+
+    Utils.debug('sitelinks: ' + JSON.stringify(sitelinks, '', 2));
+
+    if (!sitelinks) {
+        Utils.debug('No sitelinks element in response');
+        metadataCb(null, {});
+        thumbnailCb(null);
+    }
+
+    for (let language of _getLanguages()) {
+        /* sitelinks appear under "sitelinks" in the form:
+         * langwiki, e.g. "enwiki"
+         */
+        if (sitelinks[language + 'wiki']) {
+            let article = `${language}:${sitelinks[language + 'wiki'].title}`;
+
+            Utils.debug('fetching article info: ' + article);
+
+            fetchArticleInfo(article, size, metadataCb, thumbnailCb);
+            return;
+        }
+    }
+
+    // if no article reference matches a preferred language
+    if (defaultArticle) {
+        // if there's a default article from the "wikipedia" tag, use it
+        fetchArticleInfo(defaultArticle, size, metadataCb, thumbnailCb);
+    } else {
+        // otherwise use the first sitelink as fallback
+        let sitelink = Object.values(sitelinks)[0];
+        let site = sitelink.site;
+
+        if (!site.endsWith('wiki')) {
+            metadataCb(null, {});
+            if (thumbnailCb)
+                thumbnailCb(null);
+        }
+
+        let language = site.substring(0, site.length - 4);
+        let article = `${language}:${sitelink.title}`;
+
+        Utils.debug('fallback to article: ' + article);
+
+        fetchArticleInfo(article, size, metadataCb, thumbnailCb);
+    }
+}
+
 function _onMetadataFetched(wiki, page, size, metadataCb, thumbnailCb) {
     /* Try to get a thumbnail *before* following language links--the primary
        article probably has the best thumbnail image */
@@ -218,7 +305,7 @@ function _fetchThumbnailImage(wiki, size, source, callback) {
    the original article should be used. */
 function _findLanguageLink(wiki, page) {
     let originalLang = getLanguage(wiki);
-    let languages = GLib.get_language_names().map((lang) => lang.split(/[\._\-]/)[0]);
+    let languages = _getLanguages();
 
     if (!languages.includes(originalLang)) {
         let langlinks = {};
@@ -233,3 +320,7 @@ function _findLanguageLink(wiki, page) {
         }
     }
 }
+
+function _getLanguages() {
+    return GLib.get_language_names().map((lang) => lang.split(/[\._\-]/)[0]);
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]