[glib] xdgmime: Add better detection for text vs. binary and zero-sized files



commit 1c177ce0ab36991d33f4fbe0cfe6e003ca121b7e
Author: David Faure <faure kde org>
Date:   Wed May 2 15:46:25 2018 +0200

    xdgmime: Add better detection for text vs. binary and zero-sized files
    
    This detects zero-sized files to return the special-case
    "application/x-zerosize" mime-type, as well as trying to differentiate
    unknown file types based on their first 128 bytes of data, so that text
    editors can automatically handle unknown text files.
    
    Based on:
    https://cgit.freedesktop.org/xdg/xdgmime/commit/?id=5181175d5fdaa3832b0fd094cda0120b1fe92af6
    https://cgit.freedesktop.org/xdg/xdgmime/commit/?id=9c5802b8da56187c5c6abaf70042d14b12d832a9
    
    https://bugzilla.gnome.org/show_bug.cgi?id=795544

 gio/xdgmime/xdgmime.c      | 18 +++++++++++++-----
 gio/xdgmime/xdgmime.h      |  6 ++++++
 gio/xdgmime/xdgmimecache.c |  9 +++++++--
 gio/xdgmime/xdgmimeint.c   | 15 +++++++++++++++
 gio/xdgmime/xdgmimeint.h   |  1 +
 5 files changed, 42 insertions(+), 7 deletions(-)
---
diff --git a/gio/xdgmime/xdgmime.c b/gio/xdgmime/xdgmime.c
index 95adf7ee0..12028927d 100644
--- a/gio/xdgmime/xdgmime.c
+++ b/gio/xdgmime/xdgmime.c
@@ -60,6 +60,8 @@ XdgMimeCache **_caches = NULL;
 static int n_caches = 0;
 
 const char xdg_mime_type_unknown[] = "application/octet-stream";
+const char xdg_mime_type_empty[] = "application/x-zerosize";
+const char xdg_mime_type_textplain[] = "text/plain";
 
 
 enum
@@ -463,17 +465,23 @@ xdg_mime_get_mime_type_for_data (const void *data,
 {
   const char *mime_type;
 
+  if (len == 0)
+    {
+      *result_prio = 100;
+      return XDG_MIME_TYPE_EMPTY;
+    }
+
   xdg_mime_init ();
 
   if (_caches)
-    return _xdg_mime_cache_get_mime_type_for_data (data, len, result_prio);
-
-  mime_type = _xdg_mime_magic_lookup_data (global_magic, data, len, result_prio, NULL, 0);
+    mime_type = _xdg_mime_cache_get_mime_type_for_data (data, len, result_prio);
+  else
+    mime_type = _xdg_mime_magic_lookup_data (global_magic, data, len, result_prio, NULL, 0);
 
   if (mime_type)
     return mime_type;
 
-  return XDG_MIME_TYPE_UNKNOWN;
+  return _xdg_binary_or_text_fallback(data, len);
 }
 
 #ifdef NOT_USED_IN_GIO
@@ -554,7 +562,7 @@ xdg_mime_get_mime_type_for_file (const char  *file_name,
   if (mime_type)
     return mime_type;
 
-  return XDG_MIME_TYPE_UNKNOWN;
+  return _xdg_binary_or_text_fallback(data, bytes_read);
 }
 
 const char *
diff --git a/gio/xdgmime/xdgmime.h b/gio/xdgmime/xdgmime.h
index d30106245..cd20c77a9 100644
--- a/gio/xdgmime/xdgmime.h
+++ b/gio/xdgmime/xdgmime.h
@@ -66,6 +66,8 @@ typedef void (*XdgMimeDestroy)  (void *user_data);
 #define xdg_mime_register_reload_callback     XDG_ENTRY(register_reload_callback)
 #define xdg_mime_remove_callback              XDG_ENTRY(remove_callback)
 #define xdg_mime_type_unknown                 XDG_ENTRY(type_unknown)
+#define xdg_mime_type_empty                   XDG_ENTRY(type_empty)
+#define xdg_mime_type_textplain               XDG_ENTRY(type_textplain)
 #define xdg_mime_get_icon                     XDG_ENTRY(get_icon)
 #define xdg_mime_get_generic_icon             XDG_ENTRY(get_generic_icon)
 
@@ -75,7 +77,11 @@ typedef void (*XdgMimeDestroy)  (void *user_data);
 #endif
 
 extern const char xdg_mime_type_unknown[];
+extern const char xdg_mime_type_empty[];
+extern const char xdg_mime_type_textplain[];
 #define XDG_MIME_TYPE_UNKNOWN xdg_mime_type_unknown
+#define XDG_MIME_TYPE_EMPTY xdg_mime_type_empty
+#define XDG_MIME_TYPE_TEXTPLAIN xdg_mime_type_textplain
 
 const char  *xdg_mime_get_mime_type_for_data       (const void *data,
                                                    size_t      len,
diff --git a/gio/xdgmime/xdgmimecache.c b/gio/xdgmime/xdgmimecache.c
index eab6119b6..8010f9e5b 100644
--- a/gio/xdgmime/xdgmimecache.c
+++ b/gio/xdgmime/xdgmimecache.c
@@ -760,12 +760,11 @@ cache_get_mime_type_for_data (const void *data,
 
   for (n = 0; n < n_mime_types; n++)
     {
-      
       if (mime_types[n])
        return mime_types[n];
     }
 
-  return XDG_MIME_TYPE_UNKNOWN;
+  return NULL;
 }
 
 const char *
@@ -812,6 +811,9 @@ _xdg_mime_cache_get_mime_type_for_file (const char  *file_name,
       statbuf = &buf;
     }
 
+  if (statbuf->st_size == 0)
+    return XDG_MIME_TYPE_EMPTY;
+
   if (!S_ISREG (statbuf->st_mode))
     return XDG_MIME_TYPE_UNKNOWN;
 
@@ -841,6 +843,9 @@ _xdg_mime_cache_get_mime_type_for_file (const char  *file_name,
   mime_type = cache_get_mime_type_for_data (data, bytes_read, NULL,
                                            mime_types, n);
 
+  if (!mime_type)
+    mime_type = _xdg_binary_or_text_fallback(data, bytes_read);
+
   free (data);
   fclose (file);
 
diff --git a/gio/xdgmime/xdgmimeint.c b/gio/xdgmime/xdgmimeint.c
index d56bb8340..35c3635e2 100644
--- a/gio/xdgmime/xdgmimeint.c
+++ b/gio/xdgmime/xdgmimeint.c
@@ -185,3 +185,18 @@ _xdg_reverse_ucs4 (xdg_unichar_t *source, int len)
     }
 }
 
+const char *
+_xdg_binary_or_text_fallback(const void *data, size_t len)
+{
+  unsigned char *chardata;
+  int i;
+
+  chardata = (unsigned char *) data;
+  for (i = 0; i < 128 && i < len; ++i)
+    {
+       if (chardata[i] < 32 && chardata[i] != 9 && chardata[i] != 10 && chardata[i] != 13)
+         return XDG_MIME_TYPE_UNKNOWN; /* binary data */
+    }
+
+  return XDG_MIME_TYPE_TEXTPLAIN;
+}
diff --git a/gio/xdgmime/xdgmimeint.h b/gio/xdgmime/xdgmimeint.h
index 8acd8d5cd..c9270139e 100644
--- a/gio/xdgmime/xdgmimeint.h
+++ b/gio/xdgmime/xdgmimeint.h
@@ -71,5 +71,6 @@ int            _xdg_utf8_validate (const char    *source);
 xdg_unichar_t *_xdg_convert_to_ucs4 (const char *source, int *len);
 void           _xdg_reverse_ucs4 (xdg_unichar_t *source, int len);
 const char    *_xdg_get_base_name (const char    *file_name);
+const char    *_xdg_binary_or_text_fallback(const void *data, size_t len);
 
 #endif /* __XDG_MIME_INT_H__ */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]