[geary/wip/db-opt] Various improvements to database reading



commit 705724a96afbedd8d437876eca19c05feee71329
Author: Jim Nelson <jim yorba org>
Date:   Mon Mar 3 19:39:07 2014 -0800

    Various improvements to database reading
    
    Mostly "loop unrolling" by combining multiple DB reads into a single
    long request.  Some other optimizations are attempted by reordering
    query parameter order; unsure at this point if this is truly helping.

 src/engine/imap-db/imap-db-folder.vala             |  238 ++++++++++++++------
 .../imap-engine/imap-engine-replay-queue.vala      |   11 +-
 .../replay-ops/imap-engine-remove-email.vala       |    2 +-
 3 files changed, 176 insertions(+), 75 deletions(-)
---
diff --git a/src/engine/imap-db/imap-db-folder.vala b/src/engine/imap-db/imap-db-folder.vala
index df244f3..dff7fbe 100644
--- a/src/engine/imap-db/imap-db-folder.vala
+++ b/src/engine/imap-db/imap-db-folder.vala
@@ -18,7 +18,8 @@
 private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
     public const Geary.Email.Field REQUIRED_FIELDS = Geary.Email.Field.PROPERTIES;
     
-    private const int LIST_EMAIL_CHUNK_COUNT = 5;
+    private const int LIST_EMAIL_WITH_MESSAGE_CHUNK_COUNT = 10;
+    private const int LIST_EMAIL_METADATA_COUNT = 100;
     private const int LIST_EMAIL_FIELDS_CHUNK_COUNT = 500;
     
     [Flags]
@@ -235,6 +236,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         // Break up work so all reading isn't done in single transaction that locks up the
         // database ... first, gather locations of all emails in database
         Gee.List<LocationIdentifier>? locations = null;
+        Timer timer = new Timer();
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
             // convert initial_id into UID to start walking the list
             Imap.UID? start_uid = null;
@@ -275,16 +277,16 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
                 """);
             }
             
-            sql.append("WHERE folder_id = ? ");
-            
             if (oldest_to_newest)
-                sql.append("AND ordering >= ? ");
+                sql.append("WHERE ordering >= ? ");
             else
-                sql.append("AND ordering <= ? ");
+                sql.append("WHERE ordering <= ? ");
             
             if (only_incomplete)
                 sql.append_printf("AND fields != %d ", Geary.Email.Field.ALL);
             
+            sql.append("AND folder_id = ? ");
+            
             if (oldest_to_newest)
                 sql.append("ORDER BY ordering ASC ");
             else
@@ -293,14 +295,16 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
             sql.append("LIMIT ?");
             
             Db.Statement stmt = cx.prepare(sql.str);
-            stmt.bind_rowid(0, folder_id);
-            stmt.bind_int64(1, start_uid.value);
+            stmt.bind_int64(0, start_uid.value);
+            stmt.bind_rowid(1, folder_id);
             stmt.bind_int(2, count);
             
             locations = do_results_to_locations(stmt.exec(cancellable), flags, cancellable);
             
             return Db.TransactionOutcome.SUCCESS;
         }, cancellable);
+        if (timer.elapsed() > 1.0 && locations != null)
+            debug("list_email_by_id_async %s locations=%d flags=%Xh required=%Xh elapsed=%lf", 
path.to_string(), locations.size, flags, required_fields, timer.elapsed());
         
         // Next, read in email in chunks
         return yield list_email_in_chunks_async(locations, required_fields, flags, cancellable);
@@ -316,6 +320,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         // Break up work so all reading isn't done in single transaction that locks up the
         // database ... first, gather locations of all emails in database
         Gee.List<LocationIdentifier>? locations = null;
+        Timer timer = new Timer();
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
             // use INCLUDE_MARKED_FOR_REMOVE because this is a ranged list ...
             // do_results_to_location() will deal with removing EmailIdentifiers if necessary
@@ -355,6 +360,8 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
             
             return Db.TransactionOutcome.SUCCESS;
         }, cancellable);
+        if (timer.elapsed() > 1.0)
+            debug("list_email_by-range: locations=%d required=%X elapsed=%lf", locations.size, 
required_fields, timer.elapsed());
         
         // Next, read in email in chunks
         return yield list_email_in_chunks_async(locations, required_fields, flags, cancellable);
@@ -423,7 +430,14 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         // Break up work so all reading isn't done in single transaction that locks up the
         // database ... first, gather locations of all emails in database
         Gee.List<LocationIdentifier> locations = new Gee.ArrayList<LocationIdentifier>();
+        Timer timer = new Timer();
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
+            // convert ids into LocationIdentifiers
+            Gee.List<LocationIdentifier>? locs = do_get_location_for_ids(cx, ids, flags,
+                cancellable);
+            if (locs == null || locs.size == 0)
+                return Db.TransactionOutcome.DONE;
+            
             StringBuilder sql = new StringBuilder("""
                 SELECT MessageLocationTable.message_id, ordering, remove_marker
                 FROM MessageLocationTable
@@ -436,24 +450,25 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
                 """);
             }
             
-            sql.append("WHERE folder_id = ? ");
+            if (locs.size != 1) {
+                sql.append("WHERE ordering IN (");
+                bool first = true;
+                foreach (LocationIdentifier location in locs) {
+                    if (!first)
+                        sql.append(", ");
+                    
+                    sql.append(location.uid.to_string());
+                    first = false;
+                }
+                sql.append(")");
+            } else {
+                sql.append_printf("WHERE ordering = '%s' ", locs[0].uid.to_string());
+            }
+            
             if (only_incomplete)
                 sql.append_printf(" AND fields != %d ", Geary.Email.Field.ALL);
             
-            sql.append("AND ordering IN (");
-            bool first = true;
-            foreach (ImapDB.EmailIdentifier id in ids) {
-                LocationIdentifier? location = do_get_location_for_id(cx, id, flags, cancellable);
-                if (location == null)
-                    continue;
-                
-                if (!first)
-                    sql.append(", ");
-                
-                sql.append(location.uid.to_string());
-                first = false;
-            }
-            sql.append(")");
+            sql.append("AND folder_id = ? ");
             
             Db.Statement stmt = cx.prepare(sql.str);
             stmt.bind_rowid(0, folder_id);
@@ -462,6 +477,8 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
             
             return Db.TransactionOutcome.SUCCESS;
         }, cancellable);
+        if (timer.elapsed() > 1.0)
+            debug("list_email_by_sparse_id %s locations=%d flags=%Xh required=%X elapsed=%lf", 
path.to_string(), locations.size, flags, required_fields, timer.elapsed());
         
         // Next, read in email in chunks
         return yield list_email_in_chunks_async(locations, required_fields, flags, cancellable);
@@ -472,26 +489,36 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         if (ids == null || ids.size == 0)
             return null;
         
-        int length_rounded_up = Numeric.int_round_up(ids.size, LIST_EMAIL_CHUNK_COUNT);
+        // chunk count depends on whether or not the message -- body + headers -- is being fetched
+        int chunk_count = required_fields.requires_any(Email.Field.BODY | Email.Field.HEADER)
+            ? LIST_EMAIL_WITH_MESSAGE_CHUNK_COUNT : LIST_EMAIL_METADATA_COUNT;
+        
+        int length_rounded_up = Numeric.int_round_up(ids.size, chunk_count);
         
         Gee.List<Geary.Email> results = new Gee.ArrayList<Geary.Email>();
-        for (int start = 0; start < length_rounded_up; start += LIST_EMAIL_CHUNK_COUNT) {
+        Timer timer = new Timer();
+        for (int start = 0; start < length_rounded_up; start += chunk_count) {
             // stop is the index *after* the end of the slice
-            int stop = Numeric.int_ceiling((start + LIST_EMAIL_CHUNK_COUNT), ids.size);
+            int stop = Numeric.int_ceiling((start + chunk_count), ids.size);
             
             Gee.List<LocationIdentifier>? slice = ids.slice(start, stop);
             assert(slice != null && slice.size > 0);
             
             Gee.List<Geary.Email>? list = null;
+            Timer inner_timer = new Timer();
             yield db.exec_transaction_async(Db.TransactionType.RO, (cx, cancellable) => {
                 list = do_list_email(cx, slice, required_fields, flags, cancellable);
                 
                 return Db.TransactionOutcome.SUCCESS;
             }, cancellable);
+            if (inner_timer.elapsed() > 1.0)
+                debug("Inner list_email_in_chunks %s chunk_count=%d list=%d required=%X elapsed=%lf", 
path.to_string(), chunk_count, (list != null) ? list.size : 0, required_fields, timer.elapsed());
             
             if (list != null)
                 results.add_all(list);
         }
+        if (timer.elapsed() > 1.0)
+            debug("Outer list_email_in_chunks %s chunk_count=%d locations=%d required=%X elapsed=%lf", 
path.to_string(), chunk_count, ids.size, required_fields, timer.elapsed());
         
         if (results.size != ids.size)
             debug("list_email_in_chunks_async: Requested %d email, returned %d", ids.size, results.size);
@@ -608,9 +635,10 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         // for another Folder
         Gee.Set<Imap.UID> uids = new Gee.HashSet<Imap.UID>();
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
-            foreach (ImapDB.EmailIdentifier id in ids) {
-                LocationIdentifier? location = do_get_location_for_id(cx, id, flags, cancellable);
-                if (location != null)
+            Gee.List<LocationIdentifier>? locs = do_get_location_for_ids(cx, ids, flags,
+                cancellable);
+            if (locs != null) {
+                foreach (LocationIdentifier location in locs)
                     uids.add(location.uid);
             }
             
@@ -640,10 +668,10 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         ListFlags flags, Cancellable? cancellable) throws Error {
         Gee.Set<ImapDB.EmailIdentifier> ids = new Gee.HashSet<ImapDB.EmailIdentifier>();
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
-            foreach (Imap.UID uid in uids) {
-                LocationIdentifier? location = do_get_location_for_uid(cx, uid, flags,
-                    cancellable);
-                if (location != null)
+            Gee.List<LocationIdentifier>? locs = do_get_location_for_uids(cx, uids, flags,
+                cancellable);
+            if (locs != null) {
+                foreach (LocationIdentifier location in locs)
                     ids.add(location.email_id);
             }
             
@@ -692,30 +720,31 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         // (since it may be located in multiple folders).  This means at some point in the future
         // a vacuum will be required to remove emails that are completely unassociated with the
         // account.
-        yield db.exec_transaction_async(Db.TransactionType.WO, (cx) => {
+        yield db.exec_transaction_async(Db.TransactionType.RW, (cx) => {
+            Gee.List<LocationIdentifier>? locs = do_get_location_for_ids(cx, ids,
+                ListFlags.INCLUDE_MARKED_FOR_REMOVE, cancellable);
+            if (locs == null || locs.size == 0)
+                return Db.TransactionOutcome.DONE;
+            
             unread_count = do_get_unread_count_for_ids(cx, ids, cancellable);
             do_add_to_unread_count(cx, -unread_count, cancellable);
             
-            // prepare DELETE Statement and invariants
-            Db.Statement delete_stmt = cx.prepare(
-                "DELETE FROM MessageLocationTable WHERE folder_id=? AND message_id=?");
-            delete_stmt.bind_rowid(0, folder_id);
-            
-            // remove one at a time, gather UIDs
-            Gee.HashSet<Imap.UID> uids = new Gee.HashSet<Imap.UID>();
-            foreach (ImapDB.EmailIdentifier id in ids) {
-                LocationIdentifier? location = do_get_location_for_id(cx, id,
-                    ListFlags.INCLUDE_MARKED_FOR_REMOVE, cancellable);
-                if (location == null)
-                    continue;
-                
-                delete_stmt.reset(Db.ResetScope.SAVE_BINDINGS);
-                delete_stmt.bind_rowid(1, location.message_id);
-                
-                delete_stmt.exec(cancellable);
-                
-                uids.add(location.uid);
+            StringBuilder sql = new StringBuilder("""
+                DELETE FROM MessageLocationTable WHERE (
+            """);
+            bool first = true;
+            foreach (LocationIdentifier location in locs) {
+                if (!first)
+                    sql.append(" OR ");
+                sql.append_printf(" message_id='%s' ", location.message_id.to_string());
+                first = false;
             }
+            sql.append(") AND folder_id=?");
+            
+            Db.Statement stmt = cx.prepare(sql.str);
+            stmt.bind_rowid(0, folder_id);
+            
+            stmt.exec(cancellable);
             
             return Db.TransactionOutcome.COMMIT;
         }, cancellable);
@@ -907,16 +936,17 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         int unread_count = 0;
         Gee.Set<ImapDB.EmailIdentifier> removed_ids = new Gee.HashSet<ImapDB.EmailIdentifier>();
         yield db.exec_transaction_async(Db.TransactionType.RW, (cx) => {
+            Gee.List<LocationIdentifier?> locs = do_get_location_for_ids(cx, ids,
+                ListFlags.INCLUDE_MARKED_FOR_REMOVE, cancellable);
+            if (locs == null || locs.size == 0)
+                return Db.TransactionOutcome.DONE;
+            
             unread_count = do_get_unread_count_for_ids(cx, ids, cancellable);
             
             Gee.HashSet<Imap.UID> uids = new Gee.HashSet<Imap.UID>();
-            foreach (ImapDB.EmailIdentifier id in ids) {
-                LocationIdentifier? location = do_get_location_for_id(cx, id,
-                    ListFlags.INCLUDE_MARKED_FOR_REMOVE, cancellable);
-                if (location != null) {
-                    uids.add(location.uid);
-                    removed_ids.add(location.email_id);
-                }
+            foreach (LocationIdentifier location in locs) {
+                uids.add(location.uid);
+                removed_ids.add(location.email_id);
             }
             
             if (uids.size > 0)
@@ -1023,21 +1053,21 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
                 continue;
             
             yield db.exec_transaction_async(Db.TransactionType.RO, (cx, cancellable) => {
+                Gee.List<LocationIdentifier>? locs = do_get_location_for_ids(cx, ids, flags,
+                    cancellable);
+                if (locs == null || locs.size == 0)
+                    return Db.TransactionOutcome.DONE;
+                
                 Db.Statement fetch_stmt = cx.prepare(
                     "SELECT fields FROM MessageTable WHERE id = ?");
                 
-                foreach (ImapDB.EmailIdentifier id in list) {
-                    LocationIdentifier? location_id = do_get_location_for_id(cx, id, flags,
-                        cancellable);
-                    if (location_id == null)
-                        continue;
-                    
+                foreach (LocationIdentifier location in locs) {
                     fetch_stmt.reset(Db.ResetScope.CLEAR_BINDINGS);
-                    fetch_stmt.bind_rowid(0, location_id.message_id);
+                    fetch_stmt.bind_rowid(0, location.message_id);
                     
                     Db.Result results = fetch_stmt.exec(cancellable);
                     if (!results.finished)
-                        map.set(id, (Geary.Email.Field) results.int_at(0));
+                        map.set(location.email_id, (Geary.Email.Field) results.int_at(0));
                 }
                 
                 return Db.TransactionOutcome.SUCCESS;
@@ -1085,6 +1115,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         return !results.finished ? results.int_at(0) : 0;
     }
     
+    // TODO: Unroll loop
     private void do_mark_unmark_removed(Db.Connection cx, Gee.Collection<Imap.UID> uids,
         bool mark_removed, Cancellable? cancellable) throws Error {
         // prepare Statement for reuse
@@ -1491,16 +1522,18 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
     
     private Gee.Map<ImapDB.EmailIdentifier, Geary.EmailFlags>? do_get_email_flags(Db.Connection cx,
         Gee.Collection<ImapDB.EmailIdentifier> ids, Cancellable? cancellable) throws Error {
+        Gee.List<LocationIdentifier>? locs = do_get_location_for_ids(cx, ids, ListFlags.NONE,
+            cancellable);
+        if (locs == null || locs.size == 0)
+            return null;
+        
         // prepare Statement for reuse
         Db.Statement fetch_stmt = cx.prepare("SELECT flags FROM MessageTable WHERE id=?");
         
         Gee.Map<ImapDB.EmailIdentifier, Geary.EmailFlags> map = new Gee.HashMap<
             Geary.EmailIdentifier, Geary.EmailFlags>();
-        foreach (ImapDB.EmailIdentifier id in ids) {
-            LocationIdentifier? location = do_get_location_for_id(cx, id, ListFlags.NONE, cancellable);
-            if (location == null)
-                continue;
-            
+        // TODO: Unroll this loop
+        foreach (LocationIdentifier location in locs) {
             fetch_stmt.reset(Db.ResetScope.CLEAR_BINDINGS);
             fetch_stmt.bind_rowid(0, location.message_id);
             
@@ -1528,6 +1561,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         return new Geary.Imap.EmailFlags(Geary.Imap.MessageFlags.deserialize(results.string_at(0)));
     }
     
+    // TODO: Unroll loop
     private void do_set_email_flags(Db.Connection cx, Gee.Map<ImapDB.EmailIdentifier, Geary.EmailFlags> map,
         Cancellable? cancellable) throws Error {
         Db.Statement update_stmt = cx.prepare(
@@ -2049,6 +2083,36 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         return (!flags.include_marked_for_remove() && location.marked_removed) ? null : location;
     }
     
+    private Gee.List<LocationIdentifier>? do_get_location_for_ids(Db.Connection cx,
+        Gee.Collection<ImapDB.EmailIdentifier>? ids, ListFlags flags, Cancellable? cancellable)
+        throws Error {
+        if (ids == null || ids.size == 0)
+            return null;
+        
+        StringBuilder sql = new StringBuilder("""
+            SELECT message_id, ordering, remove_marker
+            FROM MessageLocationTable
+            WHERE (
+        """);
+        bool first = true;
+        foreach (ImapDB.EmailIdentifier id in ids) {
+            if (!first)
+                sql.append(" OR ");
+            sql.append_printf("message_id = '%s' ", id.message_id.to_string());
+            
+            first = false;
+        }
+        sql.append(") AND folder_id = ?");
+        
+        Db.Statement stmt = cx.prepare(sql.str);
+        stmt.bind_rowid(0, folder_id);
+        
+        Gee.List<LocationIdentifier> locs = do_results_to_locations(stmt.exec(cancellable), flags,
+            cancellable);
+        
+        return (locs.size > 0) ? locs : null;
+    }
+    
     private LocationIdentifier? do_get_location_for_uid(Db.Connection cx, Imap.UID uid,
         ListFlags flags, Cancellable? cancellable) throws Error {
         Db.Statement stmt = cx.prepare("""
@@ -2068,6 +2132,36 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         return (!flags.include_marked_for_remove() && location.marked_removed) ? null : location;
     }
     
+    private Gee.List<LocationIdentifier>? do_get_location_for_uids(Db.Connection cx,
+        Gee.Collection<Imap.UID>? uids, ListFlags flags, Cancellable? cancellable)
+        throws Error {
+        if (uids == null || uids.size == 0)
+            return null;
+        
+        StringBuilder sql = new StringBuilder("""
+            SELECT message_id, ordering, remove_marker
+            FROM MessageLocationTable
+            WHERE (
+        """);
+        bool first = true;
+        foreach (Imap.UID uid in uids) {
+            if (!first)
+                sql.append(" OR ");
+            sql.append_printf("ordering = '%s' ", uid.to_string());
+            
+            first = false;
+        }
+        sql.append(") AND folder_id = ?");
+        
+        Db.Statement stmt = cx.prepare(sql.str);
+        stmt.bind_rowid(0, folder_id);
+        
+        Gee.List<LocationIdentifier> locs = do_results_to_locations(stmt.exec(cancellable), flags,
+            cancellable);
+        
+        return (locs.size > 0) ? locs : null;
+    }
+    
     private int do_get_unread_count_for_ids(Db.Connection cx,
         Gee.Collection<ImapDB.EmailIdentifier> ids, Cancellable? cancellable) throws Error {
         // Fetch flags for each email and update this folder's unread count.
diff --git a/src/engine/imap-engine/imap-engine-replay-queue.vala 
b/src/engine/imap-engine/imap-engine-replay-queue.vala
index 9b78bd1..ad9f333 100644
--- a/src/engine/imap-engine/imap-engine-replay-queue.vala
+++ b/src/engine/imap-engine/imap-engine-replay-queue.vala
@@ -389,7 +389,14 @@ private class Geary.ImapEngine.ReplayQueue : Geary.BaseObject {
                 locally_executing(op);
                 
                 try {
-                    switch (yield op.replay_local_async()) {
+                    Timer timer = new Timer();
+                    ReplayOperation.Status status = yield op.replay_local_async();
+                    double elapsed = timer.elapsed();
+                    if (elapsed > 1.0) {
+                        debug("ReplayQueue %s local-op %s elapsed=%lf", owner.to_string(),
+                            op.to_string(), elapsed);
+                    }
+                    switch (status) {
                         case ReplayOperation.Status.COMPLETED:
                             // done
                             remote_enqueue = false;
@@ -499,7 +506,7 @@ private class Geary.ImapEngine.ReplayQueue : Geary.BaseObject {
                     remote_err = replay_err;
                 }
             } else if (!is_close_op) {
-                remote_err = new EngineError.SERVER_UNAVAILABLE("Folder %s not available", to_string());
+                remote_err = new EngineError.SERVER_UNAVAILABLE("Folder %s not available", 
owner.to_string());
             }
             
             bool has_failed = !is_close_op && (status == ReplayOperation.Status.FAILED);
diff --git a/src/engine/imap-engine/replay-ops/imap-engine-remove-email.vala 
b/src/engine/imap-engine/replay-ops/imap-engine-remove-email.vala
index b2a5ab3..99bc2d4 100644
--- a/src/engine/imap-engine/replay-ops/imap-engine-remove-email.vala
+++ b/src/engine/imap-engine/replay-ops/imap-engine-remove-email.vala
@@ -79,7 +79,7 @@ private class Geary.ImapEngine.RemoveEmail : Geary.ImapEngine.SendReplayOperatio
     }
     
     public override string describe_state() {
-        return "to_remove=%d removed_ids=%d".printf(to_remove.size,
+        return "to_remove.size=%d removed_ids.size=%d".printf(to_remove.size,
             (removed_ids != null) ? removed_ids.size : 0);
     }
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]