fantasdic r350 - in trunk: . lib/fantasdic lib/fantasdic/sources test test/data



Author: mblondel
Date: Sun Aug 31 11:57:07 2008
New Revision: 350
URL: http://svn.gnome.org/viewvc/fantasdic?rev=350&view=rev

Log:
    * lib/fantasdic/binary_search.rb: Renamed BinarySearch module to
    FileBinarySearch. Renamed match_binary_search to binary_search_all.
    Added an implementation of Array#binary_search and Array#binary_search_all.
    In FileBinarySearch, handle the case when an index entry is hit exactly.
    * test/test_binary_search.rb: Unit-test.

    * lib/fantasdic/sources/dictd_file.rb: Follow the above.
    * test/test_dictd_file.rb: More test-cases.

    * lib/fantasdic/sources/stardict_file.rb: Beginning of a stardict file
    source. Still todo:
        - Due to the stardict file format, it is not possible to find
          the boundaries of an index entry without parsing the whole index.
          As a result, the whole index needs to be read in order to perform
          binary_search on it... To speed up lookups, an offset cache should
          be generated and saved to #{GLib.user_cache_dir}/fantasdic. This what
          Stardict does!
        - Add user interface.
        - Write a DictzipReader class. This will be useful for the dictd source
          file as well.

    * test/test_stardict_file.rb: Unit test.
    * test/data/dictd_www.freedict.de_eng-swa.ifo: Test data.
    * test/data/dictd_www.freedict.de_eng-swa.dict.dz: Test data.
    * test/data/dictd_www.freedict.de_eng-swa.dict: Test data.
    * test/data/dictd_www.freedict.de_eng-swa.idx: Test data.


Added:
   trunk/lib/fantasdic/sources/stardict_file.rb
   trunk/test/data/dictd_www.freedict.de_eng-swa.dict
   trunk/test/data/dictd_www.freedict.de_eng-swa.dict.dz   (contents, props changed)
   trunk/test/data/dictd_www.freedict.de_eng-swa.idx   (contents, props changed)
   trunk/test/data/dictd_www.freedict.de_eng-swa.ifo
   trunk/test/test_binary_search.rb
   trunk/test/test_stardict_file.rb
Modified:
   trunk/ChangeLog
   trunk/lib/fantasdic/binary_search.rb
   trunk/lib/fantasdic/sources/dictd_file.rb
   trunk/test/test_dictd_file.rb

Modified: trunk/lib/fantasdic/binary_search.rb
==============================================================================
--- trunk/lib/fantasdic/binary_search.rb	(original)
+++ trunk/lib/fantasdic/binary_search.rb	Sun Aug 31 11:57:07 2008
@@ -15,6 +15,58 @@
 #Âwith this program; if not, write to the Free Software Foundation, Inc.,
 #Â51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
+class Array
+
+    def binary_search(value, &comp)
+        low = 0
+        high = self.length - 1
+
+        while low <= high
+            mid = (low + high) / 2
+
+            case comp.call(self[mid], value)
+                when 1 # greater than
+                    high = mid - 1
+                when -1 # less than
+                    low = mid + 1
+                when 0 # equals
+                    return mid
+            end
+        end
+    end
+
+    def binary_search_all(word, &comp)
+        mid_offset = binary_search(word, &comp)
+
+        if mid_offset
+            arr = []
+
+            (mid_offset - 1).downto(0) do |i|
+                if comp.call(self[i], word) == 0
+                    arr.push_head(i)
+                else
+                    break
+                end
+            end
+
+            arr << mid_offset
+
+            (mid_offset + 1).upto(self.length - 1) do |i|
+                if comp.call(self[i], word) == 0
+                    arr << i
+                else
+                    break
+                end
+            end
+
+            arr
+        else
+            []
+        end
+    end
+
+end
+
 module Fantasdic
 
 # Classes that include this module must be derived from File
@@ -22,22 +74,31 @@
 # 
 # - get_prev_offset (instance method)
 # - get_next_offset (instance method)
+# - is_entry? (instance method)
 # - get_fields (class method)
 # - get_word_end (class method)
 #
-module BinarySearch
+module FileBinarySearch
 
-BUFFER_SIZE = 100
+BUFFER_SIZE = 300
 
 # Returns the first match found using the comp block for comparison.
 def binary_search(word, &comp)
     low = 0
-    high = File.size(self) - 1
+    file_size = File.size(self)
+    high = file_size
 
     while low <= high
         mid = (low + high) / 2
 
-        start = get_next_offset(mid)
+        if is_entry?(mid)
+            # by chance we hit the exact location of an entry
+            start = mid
+        else
+            # mid is not exactly the offset of an entry
+            # so we look for the closest entry before mid
+            start = get_prev_offset(mid)
+        end
         self.seek(start)
 
         buf = self.read(BUFFER_SIZE)
@@ -48,10 +109,10 @@
 
         case comp.call(curr_word, word)
             when 1 # greater than
-                high = get_prev_offset(mid)
+                high = get_prev_offset(start)
                 return nil if high.nil?
             when -1 # less than
-                low = get_next_offset(mid)
+                low = get_next_offset(start)
                 return nil if low.nil?
             when 0 # equals
                 return start
@@ -61,7 +122,7 @@
     nil
 end
 
-def match_binary_search(word, &comp)
+def binary_search_all(word, &comp)
     mid_offset = binary_search(word, &comp)
 
     if mid_offset

Modified: trunk/lib/fantasdic/sources/dictd_file.rb
==============================================================================
--- trunk/lib/fantasdic/sources/dictd_file.rb	(original)
+++ trunk/lib/fantasdic/sources/dictd_file.rb	Sun Aug 31 11:57:07 2008
@@ -21,7 +21,7 @@
 module Source
 
 class DictdIndex < File
-    include BinarySearch
+    include FileBinarySearch
 
     B64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".
           split(//)
@@ -48,6 +48,8 @@
 
     # Returns the offset of the previous word in the index or nil.
     def get_prev_offset(offset)
+        return nil if offset <= 1
+
         offset -= 1
 
         if offset - BUFFER_SIZE < 0
@@ -63,13 +65,22 @@
 
         i = buf.rindex("\n")
         if i.nil?
-            nil
+            0
         else
             offset += i + 1
             offset            
         end
     end
 
+    # Returns whether the current offset is the beginning of an entry or not
+    def is_entry?(offset)
+        return true if offset == 0
+        return false if offset < 0
+        self.seek(offset - 1)
+        char = self.read(1)
+        char == "\n" ? true : false
+    end
+
     # Returns the offset of the next word in the index or nil.
     def get_next_offset(offset)
         self.seek(offset)
@@ -97,13 +108,13 @@
     end
 
     def match_exact(word)
-        match_binary_search(word) do |s1, s2|
+        binary_search_all(word) do |s1, s2|
             s1 <=> s2
         end
     end
 
     def match_prefix(word)
-        match_binary_search(word) do |s1, s2|
+        binary_search_all(word) do |s1, s2|
             if s1 =~ /^#{s2}/
                 0
             else

Added: trunk/lib/fantasdic/sources/stardict_file.rb
==============================================================================
--- (empty file)
+++ trunk/lib/fantasdic/sources/stardict_file.rb	Sun Aug 31 11:57:07 2008
@@ -0,0 +1,173 @@
+# Fantasdic
+# Copyright (C) 2008 Mathieu Blondel
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+#Âwith this program; if not, write to the Free Software Foundation, Inc.,
+#Â51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+module Fantasdic
+module Source
+
+class StardictInfo < Hash
+
+    def initialize(file_path)
+        File.open(file_path) { |f| parse(f) }
+    end
+
+    private
+
+    def parse(f)
+        f.each_line do |line|
+            key, value = line.strip.split("=").map { |s| s.strip }
+            next if value.nil?
+            if ["wordcount", "idxfilesize"].include?(key)
+                self[key] = value.to_i
+            else
+                self[key] = value
+            end
+        end
+    end
+
+end
+
+class StardictIndex < File
+
+    OFFSET_INT_SIZE = 4
+    LEN_INT_SIZE = 4
+
+    def initialize(*args)
+        super(*args)
+    end
+
+    def open(*args)
+        super(*args)
+    end
+
+    def self.get_fields(str)
+        i = str.index("\0")
+        word = str.slice(0...i)
+        word_offset = str.slice((i+1)..(i+OFFSET_INT_SIZE))
+        word_len = \
+            str.slice((i+OFFSET_INT_SIZE+1)..(i+OFFSET_INT_SIZE+LEN_INT_SIZE))
+
+        word_offset = word_offset.nbo32_to_integer
+        word_len = word_len.nbo32_to_integer
+
+        [word, word_offset, word_len]
+    end
+
+    def get_fields(offset, len=0)
+        self.seek(offset)
+        if len > 0
+            buf = self.read(len)
+        else
+            # we don't know the size so we read the maximum entry size
+            buf = self.read(256 + 1 + OFFSET_INT_SIZE + LEN_INT_SIZE)
+        end
+        self.class.get_fields(buf)
+    end
+
+    def match_with_index_file(word, &comp)
+        offsets = self.get_index_offsets
+
+        found_indices = offsets.binary_search_all(word) do |offset, word|
+            curr_word, curr_offset, curr_len = self.get_fields(offset)
+            comp.call(curr_word, word)
+        end
+
+        found_offsets = found_indices.map { |i| offsets[i] }
+
+        found_offsets.map { |offset| self.get_fields(offset) }
+    end
+
+    def match_exact(word)
+        match_with_index_file(word) do |s1, s2|
+            s1 <=> s2
+        end
+    end
+
+    def match_prefix(word)
+        match_with_index_file(word) do |s1, s2|
+            if s1 =~ /^#{s2}/
+                0
+            else
+                s1 <=> s2
+            end
+        end
+    end
+
+    def match_suffix(word)
+        get_word_list.find_all do |curr_word, offset, len|
+            curr_word =~ /#{word}$/
+        end
+    end
+
+    def match_substring(word)
+        get_word_list.find_all do |curr_word, offset, len|
+            curr_word.include?(word)
+        end
+    end
+
+    def match_word(word)
+        match_substring(word).find_all do |curr_word, offset, len|
+            ret = false
+            curr_word.split(" ").each do |single_word|
+                if single_word == word
+                    ret = true
+                    break
+                end
+            end
+            ret
+        end         
+    end
+
+    # Returns the offsets of the beginning of each entry in the index
+    def get_index_offsets
+        self.rewind
+        buf = self.read # FIXME: don't load the whole index into memory
+        len = buf.length
+        offset = 0
+
+        offsets = []
+
+        while offset < len
+            offsets << offset
+            i = buf.index("\0", offset)
+            offset = i + OFFSET_INT_SIZE + LEN_INT_SIZE + 1
+        end
+
+        offsets
+    end
+
+    def get_word_list
+        self.rewind
+        buf = self.read # FIXME: don't load the whole index into memory
+        len = buf.length
+        offset = 0
+
+        words = []
+
+        while offset < len
+            i = buf.index("\0", offset)
+            end_offset = i + OFFSET_INT_SIZE + LEN_INT_SIZE
+            words << StardictIndex.get_fields(buf.slice(offset..end_offset))
+            offset = end_offset + 1
+        end
+
+        words
+    end
+
+end
+
+end
+end
\ No newline at end of file

Added: trunk/test/data/dictd_www.freedict.de_eng-swa.dict
==============================================================================
--- (empty file)
+++ trunk/test/data/dictd_www.freedict.de_eng-swa.dict	Sun Aug 31 11:57:07 2008
@@ -0,0 +1,3131 @@
+English-Swahili xFried/FreeDict Dictionary
+
+compiled by Beata Wojtowicz, wierzchob wp pl : 
+
+Edition: 0.0.1
+
+Size: about 1500 headwords
+
+Published by: FreeDict, 2004
+
+at: http://freedict.org/
+
+Availability:
+
+Available under the terms of the GNU General Public Licence.
+
+Source(s):
+
+This file was compiled from other electronic documents. It is based
+on Swahili-Kiswahili to English Translation Program by Morris Fried
+available from http://www.dict.org/links.html,which has been
+supplemented by entries from Freedict Swahili-English Dictionary
+created by Horst Eyermann (http://www.freedict.de) from the
+Swahili-Esperanto and Esperanto-English Ergane dictionaries,
+available from http://www.travlang.com .
+
+The Project: This dictionary comes to you trough nice people making
+it available for free and for good. It is part of the FreeDict
+project, http://www.freedict.de / http://freedict.org .00-database-short
+  English-Swahili xFried/FreeDict Dictionary00-database-url http://freedict.org/00-database-utf8
+  This dictionary is UTF-8 encoded. If you use dictd, make sure to start it with the appropriate --locale option.10c piece <n.>
+
+    kikumi5c piece <n.>
+
+    kitanoa while <n.>
+
+    kitamboabove <adv.>
+
+    juuabundance <n.>
+
+    wingiabundant <adj.>
+
+    teleabuse <n.>
+
+    matukanoaccident <n.>
+
+    ajaliaccustom <v.>
+
+    zoezaacid <n.>
+
+    asidiact <n.>
+
+    mpangoaddress <n.>
+
+    anwaniAfrica <n.>
+
+    AfrikaAfrican <n.>
+
+    Mwafrikaafter <adv.>
+
+    baadaafternoon <n.>
+
+    alasiriafterwards <adv.>
+
+    baadaye
+
+there is/are notafterwards <adv.>
+
+    hakuna
+   
+    halafuagain <adv.>
+
+    tenaagree <v.>
+
+    patana
+
+agree <v.>
+
+    kubaliagreement <n.>
+
+    mapatanoair <n.>
+
+    hewaairplane <n.>
+
+    eroplenialive <adj.>
+
+    haiall <adj.>
+
+    ote
+
+all <adj.>
+
+    piaalone <adj.>
+
+    pekealthough <conj.>
+
+    ingawaalways <adv.>
+
+    sikuzoteamount <n.>
+
+    kiasiamuse <v.>
+
+    chekeshaand <conj.>
+
+    naand I
+
+    namiangel <n.>
+
+    malaikaanimal <n.>
+
+    mnyamaanswer <n.>
+
+    jibu
+
+answer <v.>
+
+    itika
+
+answer <v.>
+
+    jibuappear <v.>
+
+    tokeaapply <v.>
+
+    pakaappointment <n.>
+
+    miadiApril <n.>
+
+    ApriliArabia <n.>
+
+    Mangaare <irrv.>
+
+    niarm <n.>
+
+    mkonoarrange <v.>
+
+    pangaarrive <v.>
+
+    fikaarrive at <v.>
+
+    fikiaart <n.>
+
+    sanaaas <conj.>
+
+    kamaash <n.>
+
+    jivuask <v.>
+
+    ulizaassociation <n.>
+
+    chamaattention <n.>
+
+    uangalifuAugust <n.>
+
+    Agostiaunt <n.>
+
+    shangaziauthority <n.>
+
+    mamlakaawaken <v.>
+
+    amshababoon <n.>
+
+    nyaniback <n.>
+
+    mgongobad <adj.>
+
+    bayabag <n.>
+
+    mfuko
+
+bag <n.>
+
+    mkobabalcony <n.>
+
+    roshanibanana <n.>
+
+    ndizibanana plant <n.>
+
+    mgombabarbarous <adj.>
+
+    shenzibarber <n.>
+
+    kinyozibasket <n.>
+
+    kikapubathe <v.>
+
+    ogabathe in <v.>
+
+    ogeabathing suit <n.>
+
+    kiogeobe <v.>
+
+    wabe able <v.>
+
+    wezabe apparent <v.>
+
+    onekanabe awakened <v.>
+
+    amshwabe boiled <v.>
+
+    chemshwabe born <v.>
+
+    zaliwabe broken <v.>
+
+    vunjikabe bruised <v.>
+
+    chubukabe called <v.>
+
+    itwabe careful <v.>
+
+    tahadharibe changed <v.>
+
+    badilishwabe clear <v.>
+
+    eleabe closed <v.>
+
+    fungwabe collected <v.>
+
+    tozwabe comfortable <v.>
+
+    starehebe complete <v.>
+
+    timiabe cut <v.>
+
+    katwabe delivered <v.>
+
+    fikishwabe disturbed <v.>
+
+    ghasiwabe done <v.>
+
+    fanyikabe dried up <v.>
+
+    kaukwabe enough <v.>
+
+    toshabe full <v.>
+
+    shibabe given <v.>
+
+    pewabe helped <v.>
+
+    saidiwabe in service <v.>
+
+    tumikabe in time <v.>
+
+    wahibe injured <v.>
+
+    umiabe kneaded <v.>
+
+    kandwabe laid out <v.>
+
+    tandikwabe loose <v.>
+
+    pwayabe married <v.>
+
+    olewabe obtainable <v.>
+
+    patikanabe opened <v.>
+
+    funguliwabe ordered <v.>
+
+    amriwabe overcome <v.>
+
+    shindwabe permitted <v.>
+
+    ruhusiwabe possible <v.>
+
+    wezekanabe put <v.>
+
+    tiwa
+
+be put <v.>
+
+    wekwabe quiet <v.>
+
+    nyamazabe rented <v.>
+
+    kodishwabe repaired <v.>
+
+    tengenezwabe requested <v.>
+
+    takiwabe required of <v.>
+
+    lazimube robbed <v.>
+
+    ibiwabe sent <v.>
+
+    pelekwabe sent to <v.>
+
+    pelekewabe sick <v.>
+
+    umwabe sorry <v.>
+
+    sikitikabe torn <v.>
+
+    chanikabe twisted <v.>
+
+    pindwabe used <v.>
+
+    tumiwabe useful <v.>
+
+    faabe washed <v.>
+
+    fuliwabe written <v.>
+
+    andikwabean <n.>
+
+    ukundebean cake <n.>
+
+    bajiabear offspring <v.>
+
+    zaabeard <n.>
+
+    ndevubeauty <n.>
+
+    uzuribed <n.>
+
+    kitandabee <n.>
+
+    nyukibeer <n.>
+
+    pombebefore <prep.>
+
+    kablabefore dawn <n.>
+
+    alfajiribeg <v.>
+
+    ombabeggar <n.>
+
+    mwombajibegin <v.>
+
+    anzabehavior <n.>
+
+    mwenendoall of youbehind <adv.>
+
+    nyote
+   
+    nyumabelieve <v.>
+
+    amini
+
+believe <v.>
+
+    sadikibell <n.>
+
+    kengelebelow <adv.>
+
+    chinibend <n.>
+
+    pindibend down <v.>
+
+    inamabeverage <n.>
+
+    kinywajiBible <n.>
+
+    bibliabig <adj.>
+
+    kubwabird <n.>
+
+    ndegebite <v.>
+
+    umabiting ant <n.>
+
+    siafubitter <adj.>
+
+    chungublack <adj.>
+
+    eusibless <v.>
+
+    barikiblessing <n.>
+
+    barakablind person <n.>
+
+    kipofublister <n.>
+
+    lengelengeblood <n.>
+
+    damublue <adj.>
+
+    buluuboard <n.>
+
+    baoboat <n.>
+
+    mashuabody <n.>
+
+    mwilibody organ <n.>
+
+    mshipaboil <n.>
+
+    jipu
+
+boil <v.>
+
+    chemshabone <n.>
+
+    mfupabook <n.>
+
+    kitabu
+
+book <n.>
+
+    msahafuborrow <v.>
+
+    kopabottle <n.>
+
+    chupabox <n.>
+
+    sandukuboy <n.>
+
+    mvulanabracelet <n.>
+
+    kikukubrain <n.>
+
+    ubongobrake <n.>
+
+    kizuizobrass <n.>
+
+    shababrassiere <n.>
+
+    sidiriabrave <adj.>
+
+    hodaribread <n.>
+
+    mkatebreak <v.>
+
+    vunjabreak down <v.>
+
+    bomoa
+
+break down <v.>
+
+    haribikabream <n.>
+
+    changubridge <n.>
+
+    darajabrightness <n.>
+
+    mwangazabring <v.>
+
+    letabring to <v.>
+
+    leteabring up <v.>
+
+    leabroom <n.>
+
+    ufagiobrother <n.>
+
+    kakabrown <adj.>
+
+    kahawiabubble up <v.>
+
+    chemkabuffalo <n.>
+
+    nyatibuild <v.>
+
+    jengabunch <n.>
+
+    shadabundle <n.>
+
+    furushi
+
+bundle <n.>
+
+    titaburn <v.>
+
+    wakabut <conj.>
+
+    bali
+
+but <conj.>
+
+    lakinibutter <n.>
+
+    siagibuttock <n.>
+
+    kitakobuy <v.>
+
+    nunuacafe <n.>
+
+    mkahawacalf <n.>
+
+    ndamacall <v.>
+
+    itacall on <v.>
+
+    tembeleacamel <n.>
+
+    ngamiacan <n.>
+
+    mkebecaptain <n.>
+
+    nahodhacare for <v.>
+
+    tunzacarelessly <adv.>
+
+    ovyocaretaker <n.>
+
+    mtunzacarpenter <n.>
+
+    seremalacarry <v.>
+
+    beba
+
+carry <v.>
+
+    chukuacarry for <v.>
+
+    chukuliacassava <n.>
+
+    muhogocat <n.>
+
+    pakacaution <n.>
+
+    hadharicent <n.>
+
+    senticentipede <n.>
+
+    tanduceremonial dance <n.>
+
+    unyagocertainly not <interj.>
+
+    hasha!certainty <n.>
+
+    hakikacertificate <n.>
+
+    chetichain <n.>
+
+    mnyororochair <n.>
+
+    kitichange <v.>
+
+    badilishacheap <adj.>
+
+    rahisicheck <n.>
+
+    hundicheek <n.>
+
+    shavucheese <n.>
+
+    jibinichest <n.>
+
+    kifuachest cold <n.>
+
+    mafuachew <v.>
+
+    tafunachicken pox <n.>
+
+    tetekuwangachief <n.>
+
+    jumbechild <n.>
+
+    mtotochildhood <n.>
+
+    utotochin <n.>
+
+    kidevuchoose <v.>
+
+    chaguachurch <n.>
+
+    kanisacircuit <n.>
+
+    mzungukocitizen <n.>
+
+    raiaclass <n.>
+
+    darasaclean <adj.>
+
+    safi
+
+clean <adj.>
+
+    takatifucleanse <v.>
+
+    safishaclerk <n.>
+
+    karaniclose <v.>
+
+    fumbacloth <n.>
+
+    kitambaaclothes <n.>
+
+    nguoclothes iron <n.>
+
+    pasicoast <n.>
+
+    pwanicoat <n.>
+
+    koticock <n.>
+
+    jogoococonut <n.>
+
+    dafu
+
+coconut <n.>
+
+    nazicoconut palm <n.>
+
+    mnazicod <n.>
+
+    chewacoffee <n.>
+
+    kahawacoin <n.>
+
+    sarafucold <n.>
+
+    baridicollarbone <n.>
+
+    mtulingacollect <v.>
+
+    tozacollective work <n.>
+
+    ujimacollector <n.>
+
+    mkusanyacolor <n.>
+
+    rangicomb <n.>
+
+    kitanacome <v.>
+
+    ja
+
+come! <irrv.>
+
+    njoo!come across <v.>
+
+    kutacommerce <n.>
+
+    biasharacompanion <n.>
+
+    mwenzicompetition <n.>
+
+    shindanocomplete <v.>
+
+    kamili
+
+complete <adj.>
+
+    kamiliconductor <n.>
+
+    utingoconfuse <v.>
+
+    ghasiconfusion <n.>
+
+    ghasiacongratulations <n.>
+
+    pongeziconsider <v.>
+
+    fikiricontent <adj.>
+
+    radhicontinue <v.>
+
+    endeleacontradict <v.>
+
+    gombaconverse <v.>
+
+    ongeaconverse in <v.>
+
+    zungumziacook <n.>
+
+    mpishi
+
+cook <v.>
+
+    pikacook in <v.>
+
+    pikiacooking pot <n.>
+
+    chungucool <v.>
+
+    pozacool down <v.>
+
+    poacool season <n.>
+
+    kipupwecool south wind <n.>
+
+    kusicopy <v.>
+
+    fuatishacord <n.>
+
+    uzicork <n.>
+
+    kizibocorpse <n.>
+
+    maiticorrect <v.>
+
+    sahihisha
+
+correct <adj.>
+
+    sahihicotton <n.>
+
+    pambacough <n.>
+
+    kikohozicount <v.>
+
+    hesabucountry <n.>
+
+    nchicourse <n.>
+
+    mwendocover <v.>
+
+    funikacow <n.>
+
+    ng'ombecrack <n.>
+
+    ufacraftsman <n.>
+
+    fundicreate <v.>
+
+    umbacrocodile <n.>
+
+    mambacross <n.>
+
+    msalabacross over <v.>
+
+    vukacrossing <n.>
+
+    kivukocrowbar <n.>
+
+    mtalimbocrush <v.>
+
+    pondacry out <v.>
+
+    liacucumber <n.>
+
+    tangocunning <adj.>
+
+    erevucup <n.>
+
+    kikombecure <v.>
+
+    ponyacustom <n.>
+
+    desturi
+
+custom <n.>
+
+    kawaidaCustom House <n.>
+
+    forodhacut <v.>
+
+    katadanger <n.>
+
+    hataridarkness <n.>
+
+    gizadate <n.>
+
+    tarehe
+
+date <n.>
+
+    tendedate palm <n.>
+
+    mtendedaughter <n.>
+
+    bintidawn <v.>
+
+    pambazukaday <n.>
+
+    sikudaytime <n.>
+
+    mchanadeaf person <n.>
+
+    kiziwiDecember <n.>
+
+    Desembadeck <n.>
+
+    sitahadeed <n.>
+
+    tendodelay <v.>
+
+    chelewadeny <v.>
+
+    kanadescent <n.>
+
+    mtelemkodesire <v.>
+
+    tumaidespise <v.>
+
+    dharaudestroy <v.>
+
+    haribudictionary <n.>
+
+    kamusidie <v.>
+
+    fadifference <n.>
+
+    tofautidifferent <adj.>
+
+    mbalimbalidiminish <v.>
+
+    punguadirect <v.>
+
+    elekezadirty <adj.>
+
+    chafudiscount <n.>
+
+    kipunguzidistance <n.>
+
+    mbalidivide <v.>
+
+    gawadivision <n.>
+
+    pandado for <v.>
+
+    fanyiadoctor <n.>
+
+    daktaridocument <n.>
+
+    hatidog <n.>
+
+    mbwadoll <n.>
+
+    bandiadonkey <n.>
+
+    pundadoor <n.>
+
+    mlangodormitory <n.>
+
+    bwenidream <n.>
+
+    ndotodress <v.>
+
+    vaadrink <v.>
+
+    nywadrinking glass <n.>
+
+    bilauridrive <v.>
+
+    ongozadrive away <v.>
+
+    kimbizadrive out <v.>
+
+    fukuzadrop <n.>
+
+    tonedrum <n.>
+
+    ngomadry <adj.>
+
+    kavudry up <v.>
+
+    kaukadry with <v.>
+
+    kaushiaduck <n.>
+
+    bataeagle <n.>
+
+    taiear <n.>
+
+    sikioearly <adv.>
+
+    mapemaeast <n.>
+
+    masharikieat <v.>
+
+    laeconomics <n.>
+
+    uchumieffort <n.>
+
+    bidiiegg <n.>
+
+    yaieight <num.>
+
+    naneeighty <num.>
+
+    themaninielder <n.>
+
+    mzeeelephant <n.>
+
+    temboeleven <num.>
+
+    edasharaembassy <n.>
+
+    ubaloziempty <adj.>
+
+    tupuenable <v.>
+
+    wezeshaenemy <n.>
+
+    aduiEnglish language <n.>
+
+    Kiingerezaenlarge <v.>
+
+    kuzaenter <v.>
+
+    ingiaenter into <v.>
+
+    ingiliaentirely <adv.>
+
+    kabisaentry <n.>
+
+    kiingilioenvelope <n.>
+
+    bahashaequal <adj.>
+
+    sawaespecially <adv.>
+
+    hasaeven if <conj.>
+
+    ijapoevening <n.>
+
+    jionievery <adj.>
+
+    kilaexactly <adv.>
+
+    halisiexcellent <adj.>
+
+    boraexcept <conj.>
+
+    ilaexpense <n.>
+
+    gharamaexpensive <adj.>
+
+    ghaliexplain <v.>
+
+    elezaexplanation <n.>
+
+    elezoeye <n.>
+
+    jichoeyeglasses <n.>
+
+    miwaniface <n.>
+
+    uso
+
+face <v.>
+
+    elekeafade <v.>
+
+    kwajukafail <v.>
+
+    kosekanafaint <v.>
+
+    zimiafaith <n.>
+
+    imanifall <v.>
+
+    angukafamily  <n.>
+
+    jamaafar <adv.>
+
+    mbalifare <n.>
+
+    naulifarmer <n.>
+
+    mlimajifarther <n.>
+
+    ng'ambofasten <v.>
+
+    fungafasten with <v.>
+
+    fungiafastening <n.>
+
+    kifungofat <adj.>
+
+    nene
+
+fat <adj.>
+
+    nonofather <n.>
+
+    babafault <n.>
+
+    kosafavor <v.>
+
+    pendeleafear <n.>
+
+    hofu
+
+fear <v.>
+
+    ogopaFebruary <n.>
+
+    Februarifeed <v.>
+
+    lishafemale <n.>
+
+    kikeferry <n.>
+
+    likonifever <n.>
+
+    homafew <adj.>
+
+    chache
+
+few <adj.>
+
+    habafiance <n.>
+
+    mchumbafiancee <n.>
+
+    mchumbafifty <num.>
+
+    hamsini
+
+fifty <num.>
+
+    khamsinifig <n.>
+
+    tinifig tree <n.>
+
+    mtinifight <v.>
+
+    piganafile <n.>
+
+    tupafill <v.>
+
+    jazafill up <v.>
+
+    jaafinally <adv.>
+
+    hatimayefinger <n.>
+
+    kidolefingernail <n.>
+
+    ukuchafinish <v.>
+
+    isha
+
+finish <v.>
+
+    malizafire <n.>
+
+    motofireplace <n.>
+
+    jikofirst <adj.>
+
+    kwanzafish <n.>
+
+    samakifive <num.>
+
+    tanoflour <n.>
+
+    ungaflower  <n.>
+
+    uafly <n.>
+
+    inzifold <v.>
+
+    kunjafollow <v.>
+
+    fuatafolly <n.>
+
+    ujingafood <n.>
+
+    chakulafool <n.>
+
+    mjingafor <prep.>
+
+    kwaforbid <v.>
+
+    katazaforeign <adj.>
+
+    geniforest <n.>
+
+    msituforget <v.>
+
+    sahauforgive <v.>
+
+    achilia
+
+forgive <v.>
+
+    samehefork <n.>
+
+    umaforty <num.>
+
+    arobainifour <adj.>
+
+    nne
+
+four <num.>
+
+    nnefowl <n.>
+
+    kukufreedom <n.>
+
+    uhuruFrench language <n.>
+
+    KifaransaFriday <n.>
+
+    Ijumaafriend <n.>
+
+    rafikifrog <n.>
+
+    churafruit <n.>
+
+    tundafry <v.>
+
+    kaangagain <n.>
+
+    nafuugame <n.>
+
+    mchezogarden <n.>
+
+    bustanigas <n.>
+
+    hewagasoline <n.>
+
+    petroligather <v.>
+
+    chumagauze <n.>
+
+    shashigem <n.>
+
+    kitogentle <adj.>
+
+    polegentleman <n.>
+
+    bwanagentleness <n.>
+
+    upoleget <v.>
+
+    pataget drunk <v.>
+
+    lewaget for <v.>
+
+    patiaget lost <v.>
+
+    poteaget tired <v.>
+
+    chokaget well <v.>
+
+    ponagiant <n.>
+
+    jitugiraffe <n.>
+
+    twigagirl <n.>
+
+    msichanagive <v.>
+
+    pagive a share <v.>
+
+    husugive for <v.>
+
+    piaglass <n.>
+
+    kioogo <v.>
+
+    enda
+
+go <irrv.>
+
+    nenda!go away <v.>
+
+    ondokago out <v.>
+
+    tokago round <v.>
+
+    zungukago to <v.>
+
+    endeagoat <n.>
+
+    mbuziGod <n.>
+
+    Mungugold <n.>
+
+    dhahabugood <adj.>
+
+    ema
+
+good <adj.>
+
+    zurigood fortune <n.>
+
+    herigown <n.>
+
+    kanzugrape <n.>
+
+    zabibugrape vine <n.>
+
+    mzabibugrapefruit <n.>
+
+    balungigreat <adj.>
+
+    kuugreater rains <n.>
+
+    masikagreet <v.>
+
+    amkia
+
+greet <v.>
+
+    salimugreetings <n.>
+
+    salamugrind <v.>
+
+    sagagrow <v.>
+
+    otagrow up <v.>
+
+    kuaguarantee <n.>
+
+    kithibitishoguard <n.>
+
+    mlinzi
+
+guard <v.>
+
+    lindaguava <n.>
+
+    peraguide <n.>
+
+    kiongozigum <n.>
+
+    gundigums <n.>
+
+    ufizihair <n.>
+
+    nywelehalf <n.>
+
+    nusuhalf gallon <n.>
+
+    pishihammer <n.>
+
+    nyundohang up on <v.>
+
+    tundikiahard <adj.>
+
+    gumuhare <n.>
+
+    sunguraharm <v.>
+
+    dhuruharvest <n.>
+
+    mavunohaste <n.>
+
+    harakahat <n.>
+
+    kofiahate <v.>
+
+    chukiahave <v.>
+
+    nahaving <adj.>
+
+    enyehe <pron.>
+
+    yeyehe she is <irrv.>
+
+    yuhead <n.>
+
+    kichwahealth <n.>
+
+    afyahear <v.>
+
+    sikiaheart <n.>
+
+    moyoheat <n.>
+
+    jotoheavy <adj.>
+
+    zitoheel <n.>
+
+    kisiginohelp <n.>
+
+    msaada
+
+help <v.>
+
+    saidiaherdsman <n.>
+
+    mchungajihere <adv.>
+
+    hapa
+
+here <adv.>
+
+    hukuhesitate <v.>
+
+    sitahide <v.>
+
+    fichahighway <n.>
+
+    barabarahill <n.>
+
+    kilimahinder <v.>
+
+    zuiahip <n.>
+
+    nyongahippo <n.>
+
+    kibokohit <v.>
+
+    pigahit for <v.>
+
+    pigiahoe <n.>
+
+    jembe
+
+hoe <v.>
+
+    limahold on to <v.>
+
+    shikaholder <n.>
+
+    mshikahole <n.>
+
+    tunduholiday <n.>
+
+    sikukuuhoney <n.>
+
+    asalihorn <n.>
+
+    honihorse <n.>
+
+    farasihospital <n.>
+
+    hospitalihot season <n.>
+
+    kaskazi
+
+hot season <n.>
+
+    kiangazihotel <n.>
+
+    hotelihour <n.>
+
+    saahouse <n.>
+
+    nyumbahow <adv.>
+
+    je?how many <adj.>
+
+    ngapi?hundred <num.>
+
+    miahundred thousand <adj.>
+
+    laki
+
+hundred thousand <num.>
+
+    lakihunger <n.>
+
+    njaahunt <v.>
+
+    windahusband <n.>
+
+    mumehut <n.>
+
+    kibandahyena <n.>
+
+    fisiI <pron.>
+
+    mimiI do not have <irrv.>
+
+    sinaI have <irrv.>
+
+    ninaice <n.>
+
+    barafuidle <adj.>
+
+    vivuif <conj.>
+
+    iwapoill-feeling <n.>
+
+    chukiimplement <n.>
+
+    kifaaimportance <n.>
+
+    umuhimuimportant <adj.>
+
+    muhimuimprove <v.>
+
+    boreshain <prep.>
+
+    katikain a corner <adv.>
+
+    pembeniin front <adv.>
+
+    mkabala
+
+in front <adv.>
+
+    mbelein here <adv.>
+
+    humuin order that <conj.>
+
+    iliin there <adv.>
+
+    mlein vain <adv.>
+
+    bureincrease <v.>
+
+    ongeza
+
+increase <v.>
+
+    zidiinfect <v.>
+
+    ambukizainfidel <n.>
+
+    kafiriinform <v.>
+
+    arifuinhabitant <n.>
+
+    mwenyejiinitiation rite <n.>
+
+    jandoink <n.>
+
+    winoin-law <n.>
+
+    mkweinquisitiveness <n.>
+
+    utafitiinsect <n.>
+
+    mduduand theyinside <adv.>
+
+    nao
+   
+    ndaniinstrument <n.>
+
+    alainsult <n.>
+
+    tusiinsurance <n.>
+
+    bimaintention <n.>
+
+    kusudi
+
+intention <n.>
+
+    mradiintestines <n.>
+
+    utumbois <irrv.>
+
+    niisland <n.>
+
+    kisiwahishersits <adj.> <adj.> <adj.>
+
+    ake
+    ake
+    akeJanuary <n.>
+
+    Januarijaw <n.>
+
+    tayajealous <adj.>
+
+    wivujeweller <n.>
+
+    sonarajoin <v.>
+
+    ungajourney <n.>
+
+    safarijoy <n.>
+
+    furahaJuly <n.>
+
+    Julaijump <v.>
+
+    rukaJune <n.>
+
+    Junikey <n.>
+
+    ufunguokidney <n.>
+
+    figokill <v.>
+
+    uakilogram <n.>
+
+    kilokind <n.>
+
+    jinsi
+
+kind <n.>
+
+    namna
+
+kind <n.>
+
+    ainaking <n.>
+
+    mfalmekingdom <n.>
+
+    ufalmekingfish <n.>
+
+    nguruknead <v.>
+
+    kandaknee <n.>
+
+    gotiknife <n.>
+
+    kisuknow <v.>
+
+    elewa
+
+know <v.>
+
+    juaknowledge <n.>
+
+    elimu
+
+knowledge <n.>
+
+    maarifaKoran <n.>
+
+    Kuranilace <n.>
+
+    almarialady <n.>
+
+    bibilamp <n.>
+
+    taaland <n.>
+
+    ardhilanguage <n.>
+
+    lughalarge basket <n.>
+
+    kapulast <adj.>
+
+    mwisholatrine <n.>
+
+    choolaugh <v.>
+
+    chekalaxative <n.>
+
+    halulilay down <v.>
+
+    lazalay out <v.>
+
+    tandikaleader <n.>
+
+    mkurugenzileaf <n.>
+
+    janileak <v.>
+
+    vujalearn <v.>
+
+    jifunzaleather strap <n.>
+
+    ukandaleave <v.>
+
+    achaleft hand <n.>
+
+    shotoleft side <n.>
+
+    kushotoleg <n.>
+
+    mguulemon <n.>
+
+    limaulend <v.>
+
+    kopeshalength <n.>
+
+    urefuleopard <n.>
+
+    chuiless <adv.>
+
+    kasa
+
+less <adv.>
+
+    kasorolesser rains <n.>
+
+    vulilesson <n.>
+
+    fundisholetter <n.>
+
+    barualid <n.>
+
+    kifunikolie down <v.>
+
+    lalalife <n.>
+
+    maisha
+
+life <n.>
+
+    uhailift up <v.>
+
+    inualight weight <adj.>
+
+    epesilightning <n.>
+
+    umemelike <conj.>
+
+    kamaliken <v.>
+
+    fananishalime <n.>
+
+    ndimuline <n.>
+
+    mstarilink <n.>
+
+    kiungolion <n.>
+
+    simbalip <n.>
+
+    mdomolist <n.>
+
+    orodhalisten <v.>
+
+    sikilizaliver <n.>
+
+    iniload <n.>
+
+    mzigolock <n.>
+
+    kitasalocust <n.>
+
+    nzigelong <adj.>
+
+    refulook at <v.>
+
+    tazamalook for <v.>
+
+    tafutalose <v.>
+
+    potezaloss <n.>
+
+    hasaralot <n.>
+
+    kuralove <v.>
+
+    pendalung <n.>
+
+    pafumachine <n.>
+
+    mashinemaiden <n.>
+
+    mwalimail <n.>
+
+    postamaize <n.>
+
+    hindimaize plant <n.>
+
+    muhindimake <v.>
+
+    fanyamale <n.>
+
+    kiumeman  <n.>
+
+    mwanamumemango <n.>
+
+    embemango tree <n.>
+
+    mwembemansion <n.>
+
+    jumbamanure <n.>
+
+    mbolea
+
+manure <n.>
+
+    samadimany <adj.>
+
+    ingimap <n.>
+
+    ramaniMarch <n.>
+
+    machimark <n.>
+
+    chapamarket <n.>
+
+    sokomarriage <n.>
+
+    ndoamarry <v.>
+
+    oamatch <n.>
+
+    kiberitimatter <n.>
+
+    jamboMay <n.>
+
+    Meimeaning <n.>
+
+    maanameasles <n.>
+
+    shuruameasure <v.>
+
+    pima
+
+measure <n.>
+
+    cheomeasurement <n.>
+
+    kipimomeat <n.>
+
+    nyamameat pie <n.>
+
+    sambusamedicine <n.>
+
+    dawameet <v.>
+
+    kutana
+
+meet <v.>
+
+    onanamen <n.>
+
+    wanaumemercy <n.>
+
+    hurumamerge <v.>
+
+    unganishamessenger <n.>
+
+    mjumbemetal <n.>
+
+    madinimannersmidday <n.> <n.>
+
+    adabu
+   
+    adhuhuriless a quartermiddle <adv.>
+
+    kasorobo
+   
+    kati
+
+middle <adv.>
+
+    katikatimile <n.>
+
+    mailimilk <n.>
+
+    maziwamillet <n.>
+
+    mtamamillion <num.>
+
+    milionimimic <v.>
+
+    igizaminister <n.>
+
+    kasisiminute <n.>
+
+    dakikamirage <n.>
+
+    sarabimiss <v.>
+
+    kosamix <v.>
+
+    changanyamixture <n.>
+
+    mchanganyikomolar <n.>
+
+    gegoMonday <n.>
+
+    Jumatatumoney <n.>
+
+    fedha
+
+money <n.>
+
+    pesamoney order <n.>
+
+    hawalamonkey <n.>
+
+    tumbilimoon <n.>
+
+    mwezimore <adv.>
+
+    zaidimorning <n.>
+
+    asubuhimosque <n.>
+
+    msikitimosquito <n.>
+
+    mbumosquito net <n.>
+
+    chandaruamother <n.>
+
+    mamamountain <n.>
+
+    mlimamouth <n.>
+
+    kinywamove from <v.>
+
+    hamamove to <v.>
+
+    hamiamovies <n.>
+
+    sinemamuch <adj.>
+
+    ingimud <n.>
+
+    topemultiply <v.>
+
+    zidishamuscle <n.>
+
+    musulimushroom <n.>
+
+    uyogamusic <n.>
+
+    muzikimy <adj.>
+
+    angunail <n.>
+
+    msumarinaked <adj.>
+
+    tupuname <n.>
+
+    jinanarrow <adj.>
+
+    embambanation <n.>
+
+    taifanausea <n.>
+
+    kichefuchefunear <adv.>
+
+    karibunecessity <n.>
+
+    lazimaneck <n.>
+
+    shingonecklace <n.>
+
+    kidanineed <v.>
+
+    hitajineedle <n.>
+
+    sindanonephew <n.>
+
+    mpwanest <n.>
+
+    kiotanet <n.>
+
+    wavunew <adj.>
+
+    pyaNew Testament <n.>
+
+    injilinews <n.>
+
+    habarinewspaper <n.>
+
+    gazetiniece <n.>
+
+    mpwanight <n.>
+
+    usikunine <num.>
+
+    tisaninety <num.>
+
+    tisinino <interj.>
+
+    la
+
+no <adv.>
+
+    siyonorth <n.>
+
+    kaskazininose <n.>
+
+    puanot <adv.>
+
+    sinot at all <adv.>
+
+    kamwenot inside <irrv.>
+
+    hamnanot only <prep.>
+
+    lichanot so <adv.>
+
+    siyonot yet <adv.>
+
+    badonotebook <n.>
+
+    daftariNovember <n.>
+
+    Novembanow <adv.>
+
+    sasanumber <n.>
+
+    hesabu
+
+number <n.>
+
+    nambarioblige <v.>
+
+    bidiOctober <n.>
+
+    Oktobaof <prep.>
+
+    aoffend <v.>
+
+    chukizaoffice <n.>
+
+    ofisiofficer <n.>
+
+    afisaofficial <adj.>
+
+    rasmioil <n.>
+
+    mafutaointment <n.>
+
+    marhamuokra <n.>
+
+    bamiaold <adj.>
+
+    kukuuold times <n.>
+
+    kaleone <num.>
+
+    mojaone who sews <n.>
+
+    mshonajionion <n.>
+
+    kitunguuonly <adv.>
+
+    tuopen <v.>
+
+    fumbua
+
+open <adj.>
+
+    waziopportunity <n.>
+
+    nafasior <conj.>
+
+    ama
+
+thank youor <conj.>
+
+    asante
+   
+    auorange <n.>
+
+    chungwaorange tree <n.>
+
+    mchungwaorator <n.>
+
+    msemajiorder <v.>
+
+    agizaorder for <v.>
+
+    agiziaornament <n.>
+
+    pamboornamentation <n.>
+
+    nakshiother <adj.>
+
+    ingineour <adj.>
+
+    etuoutside <adv.>
+
+    njeovercome <v.>
+
+    shindaoverseer <n.>
+
+    msimamiziowner <n.>
+
+    mwenye
+
+owner <n.>
+
+    mwenyeweoyster <n.>
+
+    chazapacify <v.>
+
+    tulizapackage <n.>
+
+    kifurushipage <n.>
+
+    ukurasapail <n.>
+
+    ndoopain <n.>
+
+    umivupair <n.>
+
+    jozipalm <n.>
+
+    kofipants <n.>
+
+    surualipapaya <n.>
+
+    papaipaper <n.>
+
+    karatasiparadise <n.>
+
+    peponiparent <n.>
+
+    mzazipark <v.>
+
+    egeshapart <n.>
+
+    sehemupass <v.>
+
+    pitapass by <v.>
+
+    pitiapassenger <n.>
+
+    abiriapaste <n.>
+
+    gundipay <v.>
+
+    lipapayment <n.>
+
+    lipopeace <n.>
+
+    amanipen <n.>
+
+    kalamupencil <n.>
+
+    kalamupenknife <n.>
+
+    kijembepepper <n.>
+
+    pilipilipeppermint <n.>
+
+    peremendeperhaps <adv.>
+
+    huenda
+
+perhaps <adv.>
+
+    labdaperiod <n.>
+
+    kipindiperiod of time <n.>
+
+    mudapermission <n.>
+
+    ruhusapermit <v.>
+
+    ruhusuperson <n.>
+
+    mtuphonograph <n.>
+
+    santuripicture <n.>
+
+    pichapiece <n.>
+
+    kipandepier <n.>
+
+    gatipig <n.>
+
+    nguruwepigeon <n.>
+
+    njiwapill <n.>
+
+    kidongepillar <n.>
+
+    nguzopillow case <n.>
+
+    foronyapineapple <n.>
+
+    nanasipint <n.>
+
+    kibabapipe <n.>
+
+    kikopit <n.>
+
+    shimoplace <n.>
+
+    mahaliplague <n.>
+
+    tauniplaited mat <n.>
+
+    mkekaplanet <n.>
+
+    sayariplank <n.>
+
+    ubaoplate <n.>
+
+    sahaniplay <v.>
+
+    chezaplay at <v.>
+
+    chezachezaplaying cards <n.>
+
+    karataplease <v.>
+
+    pendeza
+
+please <adv.>
+
+    tafadhaliplot <n.>
+
+    kiwanjapoor <adj.>
+
+    maskinipoor quality <adj.>
+
+    hafifuport <n.>
+
+    bandariporter <n.>
+
+    mchukuzipossessions <n.>
+
+    malipost <n.>
+
+    postapotato <n.>
+
+    kiazipound <n.>
+
+    ratlipray <v.>
+
+    saliprayer <n.>
+
+    salaprecede <v.>
+
+    tanguliaprefer <v.>
+
+    hiariprepare for <v.>
+
+    tayarishiapresent <n.>
+
+    zawadipress upon <v.>
+
+    lemeaprice <n.>
+
+    beipriest <n.>
+
+    kahiniproclaim <v.>
+
+    tangazaproduce <n.>
+
+    zaoprofit  <n.>
+
+    faidaprogram <n.>
+
+    utaratibuprogress <n.>
+
+    maendeleoprohibition <n.>
+
+    marufukuprophet <n.>
+
+    mtume
+
+prophet <n.>
+
+    nabiipull <v.>
+
+    vutapump <n.>
+
+    bombapupil <n.>
+
+    mwanafunzipurple <adj.>
+
+    zambaraupurpose <n.>
+
+    niapush <v.>
+
+    sukumaput <v.>
+
+    tia
+
+put <v.>
+
+    wekaput aside for <v.>
+
+    wekeaput in <v.>
+
+    ingiza
+
+put in <v.>
+
+    tiliaput out <v.>
+
+    zimaput out for <v.>
+
+    toleaquarrel <n.>
+
+    ugomvi
+
+quarrel <v.>
+
+    gombana
+
+quarrel <v.>
+
+    kosanaquarter <n.>
+
+    roboquestion <n.>
+
+    swaliquickly <adv.>
+
+    upesirain <n.>
+
+    mvua
+
+rain <v.>
+
+    nyesharat <n.>
+
+    panyaration <n.>
+
+    poshoraw <adj.>
+
+    bichirazor <n.>
+
+    wemberead <v.>
+
+    somareader <n.>
+
+    msomajiready <adj.>
+
+    tayarireap <v.>
+
+    vunareason <n.>
+
+    sababurebuke <v.>
+
+    kanyarecommend <v.>
+
+    sifiared <adj.>
+
+    ekundureduce <v.>
+
+    punguzarefuse <v.>
+
+    kataarejoice <v.>
+
+    furahirelative <n.>
+
+    ndugureligion <n.>
+
+    diniremain <v.>
+
+    bakiremember <v.>
+
+    kumbukaremind <v.>
+
+    kumbusharent <v.>
+
+    kodirent out <v.>
+
+    kodisharepair <v.>
+
+    tengenezarepair for <v.>
+
+    tengenezearepent <v.>
+
+    tuburesemble <v.>
+
+    fananareserve <n.>
+
+    akibarespect <n.>
+
+    heshimarest <v.>
+
+    pumzikarest-place/time <n.>
+
+    pumzikoreturn <v.>
+
+    rudirhino <n.>
+
+    kifarurib <n.>
+
+    ubavurice <n.>
+
+    mchelerice plant <n.>
+
+    mpungaright side <n.>
+
+    kuliarighteousness <n.>
+
+    hakiring <n.>
+
+    peterinse mouth <v.>
+
+    sukutuaripe <adj.>
+
+    bivurise up <v.>
+
+    ondokeariver <n.>
+
+    mtoroad <n.>
+
+    njiarob <v.>
+
+    nyang'anyaroll <n.>
+
+    kikutoroom <n.>
+
+    chumbaroot <n.>
+
+    mzizirope <n.>
+
+    kambarot <v.>
+
+    ozarotten <adj.>
+
+    bovurug <n.>
+
+    zuliarun away <v.>
+
+    kimbia
+
+run away <v.>
+
+    torokarun to <v.>
+
+    kimbiliasack <n.>
+
+    guniasailor <n.>
+
+    bahariasake <n.>
+
+    ajilisaliva <n.>
+
+    matesalt <n.>
+
+    chumviSaturday <n.>
+
+    Jumamosisave <v.>
+
+    okoascarf <n.>
+
+    lesoschedule <n.>
+
+    ratibaschool <n.>
+
+    shule
+
+school <n.>
+
+    skuli
+
+school <n.>
+
+    chuoscissors <n.>
+
+    mkasiscore (twenty) <n.>
+
+    korijascorpion <n.>
+
+    ngescrew <n.>
+
+    parafujo
+
+screw <n.>
+
+    skrubuscrewdriver <n.>
+
+    bisibisisea <n.>
+
+    baharisecond <adj.>
+
+    pilisecret <n.>
+
+    sirisee <v.>
+
+    onaseed <n.>
+
+    mbegusell <v.>
+
+    uzasend <v.>
+
+    pelekasend back <v.>
+
+    rudishasend for <v.>
+
+    pelekeasend off <v.>
+
+    safirishasense <n.>
+
+    akiliSeptember <n.>
+
+    Septembaserpent <n.>
+
+    jokaservant <n.>
+
+    mtumishiservice <n.>
+
+    uandikiajiset down <v.>
+
+    tuaseven <num.>
+
+    sabaseventy <num.>
+
+    sabinisew <v.>
+
+    shonashadow <n.>
+
+    kivulishare <v.>
+
+    gawanyashark <n.>
+
+    papasharp <adj.>
+
+    kalisharpness <n.>
+
+    ukalishave <v.>
+
+    nyoashe <pron.>
+
+    yeyeshed <n.>
+
+    bandasheep <n.>
+
+    kondooshell <n.>
+
+    bufurushilling <n.>
+
+    shilingishine on <v.>
+
+    angaziashirt <n.>
+
+    shatishiver <v.>
+
+    tetemekashoe <n.>
+
+    kiatushop <n.>
+
+    dukashort <adj.>
+
+    fupishort-cut <n.>
+
+    mkatoshorten <v.>
+
+    fupishashoulder <n.>
+
+    begashow <n.>
+
+    tamasha
+
+show <v.>
+
+    onyeshasick person <n.>
+
+    mgonjwasickle <n.>
+
+    mundusickness <n.>
+
+    maradhi
+
+sickness <n.>
+
+    ugonjwaside <n.>
+
+    kando
+
+side <n.>
+
+    upandesilk <n.>
+
+    haririsince <prep.>
+
+    tangusing <v.>
+
+    imbasister <n.>
+
+    dadasit <v.>
+
+    kaasix <num.>
+
+    sitasixty <num.>
+
+    sitinisize <n.>
+
+    ukubwaskin <n.>
+
+    ngozisky <n.>
+
+    mbinguslaughter <v.>
+
+    chinjaslave <n.>
+
+    mtumwasleep <n.>
+
+    usingizisleep in <v.>
+
+    laliaslippers <n.>
+
+    sapatuslippery place <n.>
+
+    utelezislit <v.>
+
+    chanaslowly <adv.>
+
+    polepolesmall <adj.>
+
+    dogosmall fish <n.>
+
+    dagaasmall lump <n.>
+
+    kinundusmall thing <n.>
+
+    kidudesmallpox <n.>
+
+    nduismell <n.>
+
+    harufu
+
+smell <v.>
+
+    nusasmell bad <v.>
+
+    nukasmell good <v.>
+
+    nukiasmile <v.>
+
+    chekeleasmoke <n.>
+
+    moshismooth <v.>
+
+    lainisha
+
+smooth <adj.>
+
+    lainisnake <n.>
+
+    nyokaso <interj.>
+
+    basi
+
+so <adv.>
+
+    kumbesoap <n.>
+
+    sabunisocks <n.>
+
+    soksisoft <adj.>
+
+    lainisoil <n.>
+
+    udongosoldier <n.>
+
+    askarisole <n.>
+
+    unyayosolitary <adj.>
+
+    gumbasome <n.>
+
+    baadhison <n.>
+
+    mwanasong <n.>
+
+    wimbosorceror <n.>
+
+    mchawisore <n.>
+
+    kidondasorrow <n.>
+
+    huzunisoul <n.>
+
+    rohosound <n.>
+
+    sautisoup <n.>
+
+    supusouth <n.>
+
+    kusinisparkle <n.>
+
+    kimetaspeak <v.>
+
+    nena
+
+speak <v.>
+
+    semaspecial <adj.>
+
+    maalumuspice <n.>
+
+    bizarispider <n.>
+
+    buibuispinach <n.>
+
+    mchichasplit <v.>
+
+    pasuaspoon <n.>
+
+    kijikospot <n.>
+
+    doaspring <n.>
+
+    kamanispring-mechanism <n.>
+
+    mtambosprout <v.>
+
+    chipuasqueeze <v.>
+
+    bana
+
+squeeze <v.>
+
+    kamasqueeze out <v.>
+
+    kamuastage <n.>
+
+    jukwaastairs <n.>
+
+    daraja
+
+stairs <n.>
+
+    ngazistand <v.>
+
+    simamastar <n.>
+
+    nyotastart up <v.>
+
+    anzishastate <n.>
+
+    halistation <n.>
+
+    steshenisteal <v.>
+
+    ibasteamship <n.>
+
+    melisteering-gear <n.>
+
+    usukanistep <n.>
+
+    hatua
+
+step- <n.>
+
+    kambostick <n.>
+
+    fimbo
+
+stick <v.>
+
+    kwamastick on <v.>
+
+    bandikastiff porridge <n.>
+
+    ugalistill <adv.>
+
+    badostir up <v.>
+
+    vurugastomach <n.>
+
+    tumbostone <n.>
+
+    jiwestop <n.>
+
+    kituostop up <v.>
+
+    zibastory <n.>
+
+    hadithistranger <n.>
+
+    mgenistream <n.>
+
+    kijitostrength <n.>
+
+    nguvustrengthen <v.>
+
+    imarishastriped <adj.>
+
+    miliastrong <adj.>
+
+    imarasucceed <v.>
+
+    fanikiwasuch a one <n.>
+
+    fulanisuddenly <adv.>
+
+    ghafulasugar <n.>
+
+    sukarisugar cane <n.>
+
+    muwasun <n.>
+
+    juaSunday <n.>
+
+    JumapiliSwahili language <n.>
+
+    Kiswahilisweep <v.>
+
+    fagiasweet <adj.>
+
+    tamuswell <v.>
+
+    vimbaswim <v.>
+
+    ogeleatable <n.>
+
+    mezatail <n.>
+
+    mkiatailor <n.>
+
+    mshonitake away <v.>
+
+    ondoatake leave of <v.>
+
+    agatake off <v.>
+
+    banduatake on <v.>
+
+    pakiatake out <v.>
+
+    toatamarind <n.>
+
+    ukwajutank <n.>
+
+    tangitap <v.>
+
+    gogotatape <n.>
+
+    utepetar  <n.>
+
+    lamitax <n.>
+
+    ushurutea  <n.>
+
+    chaiteach <v.>
+
+    funza
+
+teach <v.>
+
+    fundisha
+
+teach <v.>
+
+    someshateacher <n.>
+
+    mwalimuteapot <n.>
+
+    bulitelephone <n.>
+
+    simutell <v.>
+
+    ambiatemple <n.>
+
+    hekaluten <num.>
+
+    kumitermite <n.>
+
+    mchwatest <v.>
+
+    jaributhank <v.>
+
+    shukuruthat <adj.>
+
+    lethat place <adv.>
+
+    hapotheft <n.>
+
+    wizitheir <adj.>
+
+    aoit is Ithen <adv.>
+
+    ndimi
+   
+    ndipo
+
+then <adv.>
+
+    huko
+
+then <adv.>
+
+    kishathere <adv.>
+
+    huko
+
+there <adv.>
+
+    kule
+
+there <adv.>
+
+    palethere is <irrv.>
+
+    mna
+    hapanathey <pron.>
+
+    waothief <n.>
+
+    mwizithin porridge <n.>
+
+    ujithing <n.>
+
+    kituthink <v.>
+
+    dhani
+
+think <v.>
+
+    wazathirst <n.>
+
+    kiuthirty <num.>
+
+    thelathinithorn <n.>
+
+    mwibathose <adj.>
+
+    lethought <n.>
+
+    wazothousand <adj.>
+
+    elfuthree <num.>
+
+    tatuthroat <n.>
+
+    koothrow <v.>
+
+    tupaThursday <n.>
+
+    Alhamisitighten <v.>
+
+    kazatime <n.>
+
+    mara
+
+time  <n.>
+
+    wakatitin can <n.>
+
+    kopoto <prep.>
+
+    kwatoday <adv.>
+
+    leotogether <adv.>
+
+    pamojatomato <n.>
+
+    tunguletomorrow <adv.>
+
+    keshotongs <n.>
+
+    koleotongue <n.>
+
+    ulimitooth <n.>
+
+    jinotoothbrush <n.>
+
+    mswakitop <n.>
+
+    kipeotortoise <n.>
+
+    kobetouch <v.>
+
+    gusatourniquet <n.>
+
+    kisongotow <v.>
+
+    fungashatown <n.>
+
+    mjitrain <n.>
+
+    relitranslate <v.>
+
+    fasiritravel <v.>
+
+    safiritravel by <v.>
+
+    safiriatraveller <n.>
+
+    msafiritray <n.>
+
+    siniatreasury <n.>
+
+    hazinatree <n.>
+
+    mtitribe <n.>
+
+    kabilatrouble <n.>
+
+    matata
+
+trouble <n.>
+
+    shida
+
+trouble <v.>
+
+    sumbuatruly <adv.>
+
+    kwelitrunk <n.>
+
+    utitrust <v.>
+
+    tumainiTuesday <n.>
+
+    Jumanneturmeric <n.>
+
+    manjanoturn <v.>
+
+    pindaturn around <v.>
+
+    geukatwelve <num.>
+
+    thenasharatwenty <num.>
+
+    ishirinitwo <num.>
+
+    wiliumbrella <n.>
+
+    mwavuliuncle <n.>
+
+    mjombauncover <v.>
+
+    funuaundershirt <n.>
+
+    fulanaunderstand <v.>
+
+    fahamuundress <v.>
+
+    vuaunfasten <v.>
+
+    funguaunfold <v.>
+
+    kunjuaunity <n.>
+
+    umojaunravel <v.>
+
+    fumuaunripe <adj.>
+
+    bichiunstop <v.>
+
+    zibuauntil <prep.>
+
+    mpaka
+
+until <prep.>
+
+    hadi
+
+until <prep.>
+
+    hataup to <prep.>
+
+    mpakaupbringing <n.>
+
+    maleziupper floor <n.>
+
+    ghorofauproot <v.>
+
+    ng'oaus <pron.>
+
+    sisiuse <v.>
+
+    tumiause for <v.>
+
+    tumiliavacation <n.>
+
+    likizovaccinate <v.>
+
+    chanjiavalue <n.>
+
+    thamanivegetable <n.>
+
+    mbogavery <adv.>
+
+    mno
+
+very <adv.>
+
+    sanavessel <n.>
+
+    chomboview <n.>
+
+    mandharivillage <n.>
+
+    kijijivinegar <n.>
+
+    sikiwage <n.>
+
+    mshaharawages <n.>
+
+    ujirawait <v.>
+
+    ngojawait for <v.>
+
+    ngojeawaiter <n.>
+
+    mwandishiwake <v.>
+
+    amkawalk <v.>
+
+    tembeawall <n.>
+
+    ukutawant <v.>
+
+    takawarm <adj.>
+
+    vuguvuguwarn <v.>
+
+    onyawash <v.>
+
+    fua
+
+wash <v.>
+
+    osha
+
+wash <v.>
+
+    osheawash hands <v.>
+
+    nawawatch out <v.>
+
+    angaliawatch over <v.>
+
+    chungawater <n.>
+
+    majiwater bottle <n.>
+
+    kiribawater-channel <n.>
+
+    mferejiwave <n.>
+
+    wimbiwax <n.>
+
+    ntawe <pron.>
+
+    sisiwealthy <adj.>
+
+    tajiriweave <v.>
+
+    fumawedding <n.>
+
+    arusiWednesday <n.>
+
+    Jumatanoweek <n.>
+
+    juma
+
+week <n.>
+
+    wikiweight <n.>
+
+    uzitowell <n.>
+
+    kisima
+
+well <adv.>
+
+    vizuriwest <n.>
+
+    magharibiwhat <pron.>
+
+    nini?what kind <adj.>
+
+    gani?wheat <n.>
+
+    nganowheel <n.>
+
+    gurudumuwheeled vehicle <n.>
+
+    gariwhen <adv.>
+
+    lini?where <adv.>
+
+    wapi?which <adj.>
+
+    pi?which is <irrv.>
+
+    kulikowhite <adj.>
+
+    eupewhite hair <n.>
+
+    mviwho <pron.>
+
+    nani?whole <adj.>
+
+    zimawhole day <n.>
+
+    kutwawhy <adv.>
+
+    kwani?
+
+why <adv.>
+
+    mbona?wickedness <n.>
+
+    ubayawide <adj.>
+
+    panawidow <n.>
+
+    mjanewidower <n.>
+
+    mjanewidth <n.>
+
+    upanawife <n.>
+
+    mkewind <n.>
+
+    upepowindow <n.>
+
+    dirishawine <n.>
+
+    mvinyowing <n.>
+
+    ubawawipe <v.>
+
+    futawipe with <v.>
+
+    futiawith <prep.>
+
+    kwa
+    namiwithhold from <v.>
+
+    zuiliawithout <prep.>
+
+    bila
+
+without <prep.>
+
+    pasipowoman  <n.>
+
+    mwanamkewomen <n.>
+
+    wanawakeword <n.>
+
+    nenowork <n.>
+
+    kaziworld <n.>
+
+    duniaworship <n.>
+
+    ibada
+
+worship <v.>
+
+    abuduwound <n.>
+
+    jerahawrist <n.>
+
+    kiwikowrite <v.>
+
+    andikawriting <n.>
+
+    andikoyard <n.>
+
+    yadiyear <n.>
+
+    mwakayes <adv.>
+
+    naam
+
+it is weyes <adv.>
+
+    ndisi
+   
+    ndiyohelloyesterday <adv.>
+
+    jambo
+   
+    janayou <pron.>
+
+    weweyou plural <pron.>
+
+    ninyiyour <adj.>
+
+    akoyour plural <adj.>
+
+    enuyouth <n.>
+
+    kijanaZanzibar <n.>
+
+    Unguja
\ No newline at end of file

Added: trunk/test/data/dictd_www.freedict.de_eng-swa.dict.dz
==============================================================================
Binary file. No diff available.

Added: trunk/test/data/dictd_www.freedict.de_eng-swa.idx
==============================================================================
Binary file. No diff available.

Added: trunk/test/data/dictd_www.freedict.de_eng-swa.ifo
==============================================================================
--- (empty file)
+++ trunk/test/data/dictd_www.freedict.de_eng-swa.ifo	Sun Aug 31 11:57:07 2008
@@ -0,0 +1,8 @@
+StarDict's dict ifo file
+version=2.4.2
+wordcount=1341
+idxfilesize=20566
+bookname=English - Swahili
+description=Made by Hu Zheng
+date=2007.8.29
+sametypesequence=m

Added: trunk/test/test_binary_search.rb
==============================================================================
--- (empty file)
+++ trunk/test/test_binary_search.rb	Sun Aug 31 11:57:07 2008
@@ -0,0 +1,57 @@
+# Fantasdic
+# Copyright (C) 2008 Mathieu Blondel
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+#Âwith this program; if not, write to the Free Software Foundation, Inc.,
+#Â51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+$test_dir = File.expand_path(File.dirname(__FILE__))
+$top_dir = File.expand_path(File.join($test_dir, ".."))
+$lib_dir = File.expand_path(File.join($top_dir, "lib"))
+$test_data_dir = File.expand_path(File.join($test_dir, "data"))
+$LOAD_PATH.unshift($lib_dir)
+
+require "test/unit"
+require "fantasdic"
+
+class TestBinarySearch < Test::Unit::TestCase
+
+    def test_array_binary_search
+        arr = %w(a b c d e f g h i j k l m n o p q r s t u v w x y z)
+
+        arr.each_with_index do |value, index|
+            assert_equal(arr.binary_search(value) { |a,b| a <=> b }, index)
+        end
+
+        assert_equal(arr.binary_search("notfound") { |a,b| a <=> b }, nil)
+    end
+
+    def test_array_binary_search_all_1
+        arr = %w(a b c d e f g h i j k l m n o p q r s t u v w x y z)
+
+        arr.each_with_index do |value, index|
+            assert_equal(arr.binary_search_all(value) { |a,b| a <=> b },
+                         [index])
+        end
+
+        assert_equal(arr.binary_search_all("notfound") { |a,b| a <=> b }, [])
+    end
+
+    def test_array_binary_search_all_2
+        arr = %w(a b c d e f g h i i i i i i i i i r s t u v w x y z)
+
+        assert_equal(arr.binary_search_all("i") { |a,b| a <=> b },
+                    (8..16).to_a)
+    end
+
+end

Modified: trunk/test/test_dictd_file.rb
==============================================================================
--- trunk/test/test_dictd_file.rb	(original)
+++ trunk/test/test_dictd_file.rb	Sun Aug 31 11:57:07 2008
@@ -52,7 +52,8 @@
         DictdIndex.open(@index_file) do |index|
             assert_equal(index.get_prev_offset(52), 25)
             assert_equal(index.get_prev_offset(2005), 1994)
-            assert_equal(index.get_prev_offset(25), nil)
+            assert_equal(index.get_prev_offset(25), 0)
+            assert_equal(index.get_prev_offset(0), nil)
         end
     end
 
@@ -84,7 +85,13 @@
                          [])
 
             assert_equal(index.match_exact("caredig").map { |a| a.first },
-                         ["caredig"])            
+                         ["caredig"])
+
+            assert_equal(index.match_exact("00databasealphabet").
+                         map { |a| a.first }, ["00databasealphabet"])
+
+            assert_equal(index.match_exact("\303\242b").map { |a| a.first },
+                         ["\303\242b"])  
         end
     end
 

Added: trunk/test/test_stardict_file.rb
==============================================================================
--- (empty file)
+++ trunk/test/test_stardict_file.rb	Sun Aug 31 11:57:07 2008
@@ -0,0 +1,1489 @@
+# Fantasdic
+# Copyright (C) 2008 Mathieu Blondel
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+#Âwith this program; if not, write to the Free Software Foundation, Inc.,
+#Â51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+$test_dir = File.expand_path(File.dirname(__FILE__))
+$top_dir = File.expand_path(File.join($test_dir, ".."))
+$lib_dir = File.expand_path(File.join($top_dir, "lib"))
+$test_data_dir = File.expand_path(File.join($test_dir, "data"))
+$LOAD_PATH.unshift($lib_dir)
+
+require "test/unit"
+require "fantasdic"
+require "fantasdic/sources/stardict_file"
+
+class TestStardictFileSource < Test::Unit::TestCase
+    include Fantasdic::Source
+
+    def setup
+        @index_file = File.join($test_data_dir,
+                                "dictd_www.freedict.de_eng-swa.idx")
+        @info_file = File.join($test_data_dir,
+                               "dictd_www.freedict.de_eng-swa.ifo")
+        @dict_file = File.join($test_data_dir,
+                               "dictd_www.freedict.de_eng-swa.dict")
+        @dict_dz_file = File.join($test_data_dir,
+                                  "dictd_www.freedict.de_eng-swa.dict.dz")
+    end
+
+    def test_info_file
+        info_file = StardictInfo.new(@info_file)
+        assert_equal(info_file.keys.sort,
+                    ["bookname", "date", "description", "idxfilesize",
+                     "sametypesequence", "version", "wordcount"])
+        assert_equal(info_file,
+                    {"bookname"=>"English - Swahili",
+                     "sametypesequence"=>"m",
+                     "date"=>"2007.8.29",
+                     "idxfilesize"=> 20566,
+                     "version"=>"2.4.2",
+                     "wordcount"=> 1341,
+                     "description"=>"Made by Hu Zheng"})
+    end
+
+    def test_match_suffix
+        StardictIndex.open(@index_file) do |index|
+            assert_equal(index.match_suffix("ness"),
+                         [["brightness", 5735, 29], ["darkness", 9156, 23],
+                          ["gentleness", 13137, 26],
+                          ["inquisitiveness", 16249, 33],
+                          ["righteousness", 25319, 28],                       
+                          ["sharpness", 26665, 25], ["sickness", 27125, 54],
+                          ["wickedness", 33816, 26]])
+        end
+    end
+
+    def test_match_substring
+        StardictIndex.open(@index_file) do |index|
+            assert_equal(index.match_substring("fter"),
+                         [["after", 1453, 23], ["afternoon", 1476, 27],
+                          ["afterwards", 1503, 92]])
+        end
+    end
+
+    def test_match_suffix
+        StardictIndex.open(@index_file) do |index|
+            assert_equal(index.match_word("piece"),
+                         [["10c piece", 1121, 26], ["5c piece", 1147, 25],
+                          ["piece", 22857, 23]])
+        end
+    end
+
+    def test_match_exact
+        StardictIndex.open(@index_file) do |index|
+            assert_equal(index.match_exact("acid"),
+                         [["acid", 1340, 20]])
+            #assert_equal(index.match_exact("00databaseinfo"),
+            #             [["00databaseinfo", 0, 893]])
+            assert_equal(index.match_exact("pupil"),
+                         [["pupil", 24035, 26]])
+            assert_equal(index.match_exact("soldier"),
+                         [["soldier", 28096, 24]])
+            assert_equal(index.match_exact("wealthy"),
+                         [["wealthy", 33207, 26]])
+            assert_equal(index.match_exact("width"),
+                         [["width", 33907, 21]])
+            assert_equal(index.match_exact("zanzibar"),
+                         [["zanzibar", 34703, 25]])
+            assert_equal(index.match_exact("doesnexist"), [])
+        end
+    end
+
+    def test_match_prefix
+        StardictIndex.open(@index_file) do |index|
+            assert_equal(index.match_prefix("wa"),
+                         [["wage", 32692, 23],
+                          ["wages", 32715, 21],
+                          ["wait", 32736, 20],
+                          ["wait for", 32756, 25],
+                          ["waiter", 32781, 26],
+                          ["wake", 32807, 19],
+                          ["walk", 32826, 21],
+                          ["wall", 32847, 20],
+                          ["want", 32867, 19],
+                          ["warm", 32886, 25],
+                          ["warn", 32911, 19],
+                          ["wash", 32930, 61],
+                          ["wash hands", 32991, 25],
+                          ["watch out", 33016, 27],
+                          ["watch over", 33043, 27],
+                          ["water", 33070, 20],
+                          ["water bottle", 33090, 29],
+                          ["waterchannel", 33119, 31],
+                          ["wave", 33150, 20],
+                          ["wax", 33170, 17]])
+        end
+    end
+
+    def test_get_word_list
+        StardictIndex.open(@index_file) do |index|
+            assert_equal(index.get_word_list,
+                        [["00databaseinfo", 0, 893],
+                        ["00databaseshort", 893, 62],
+                        ["00databaseurl", 955, 36],
+                        ["00databaseutf8", 991, 130],
+                        ["10c piece", 1121, 26],
+                        ["5c piece", 1147, 25],
+                        ["a while", 1172, 25],
+                        ["above", 1197, 21],
+                        ["abundance", 1218, 25],
+                        ["abundant", 1243, 25],
+                        ["abuse", 1268, 24],
+                        ["accident", 1292, 24],
+                        ["accustom", 1316, 24],
+                        ["acid", 1340, 20],
+                        ["act", 1360, 20],
+                        ["address", 1380, 24],
+                        ["africa", 1404, 23],
+                        ["african", 1427, 26],
+                        ["after", 1453, 23],
+                        ["afternoon", 1476, 27],
+                        ["afterwards", 1503, 92],
+                        ["again", 1595, 22],
+                        ["agree", 1617, 46],
+                        ["agreement", 1663, 28],
+                        ["air", 1691, 18],
+                        ["airplane", 1709, 27],
+                        ["alive", 1736, 21],
+                        ["all", 1757, 40],
+                        ["alone", 1797, 22],
+                        ["although", 1819, 28],
+                        ["always", 1847, 27],
+                        ["amount", 1874, 22],
+                        ["amuse", 1896, 24],
+                        ["and", 1920, 19],
+                        ["and i", 1939, 15],
+                        ["angel", 1954, 23],
+                        ["animal", 1977, 23],
+                        ["answer", 2000, 68],
+                        ["appear", 2068, 22],
+                        ["apply", 2090, 20],
+                        ["appointment", 2110, 27],
+                        ["april", 2137, 22],
+                        ["arabia", 2159, 22],
+                        ["are", 2181, 19],
+                        ["arm", 2200, 19],
+                        ["arrange", 2219, 23],
+                        ["arrive", 2242, 21],
+                        ["arrive at", 2263, 25],
+                        ["art", 2288, 19],
+                        ["as", 2307, 20],
+                        ["ash", 2327, 18],
+                        ["ask", 2345, 19],
+                        ["association", 2364, 27],
+                        ["attention", 2391, 29],
+                        ["august", 2420, 23],
+                        ["aunt", 2443, 23],
+                        ["authority", 2466, 27],
+                        ["awaken", 2493, 22],
+                        ["baboon", 2515, 22],
+                        ["back", 2537, 21],
+                        ["bad", 2558, 20],
+                        ["bag", 2578, 40],
+                        ["balcony", 2618, 25],
+                        ["banana", 2643, 22],
+                        ["banana plant", 2665, 29],
+                        ["barbarous", 2694, 28],
+                        ["barber", 2722, 24],
+                        ["basket", 2746, 23],
+                        ["bathe", 2769, 19],
+                        ["bathe in", 2788, 23],
+                        ["bathing suit", 2811, 29],
+                        ["be", 2840, 15],
+                        ["be able", 2855, 22],
+                        ["be apparent", 2877, 29],
+                        ["be awakened", 2906, 28],
+                        ["be boiled", 2934, 28],
+                        ["be born", 2962, 24],
+                        ["be broken", 2986, 27],
+                        ["be bruised", 3013, 28],
+                        ["be called", 3041, 24],
+                        ["be careful", 3065, 30],
+                        ["be changed", 3095, 31],
+                        ["be clear", 3126, 23],
+                        ["be closed", 3149, 26],
+                        ["be collected", 3175, 28],
+                        ["be comfortable", 3203, 32],
+                        ["be complete", 3235, 27],
+                        ["be cut", 3262, 22],
+                        ["be delivered", 3284, 31],
+                        ["be disturbed", 3315, 30],
+                        ["be done", 3345, 25],
+                        ["be dried up", 3370, 28],
+                        ["be enough", 3398, 25],
+                        ["be full", 3423, 23],
+                        ["be given", 3446, 23],
+                        ["be helped", 3469, 27],
+                        ["be in service", 3496, 30],
+                        ["be in time", 3526, 25],
+                        ["be injured", 3551, 25],
+                        ["be kneaded", 3576, 27],
+                        ["be laid out", 3603, 30],
+                        ["be loose", 3633, 24],
+                        ["be married", 3657, 26],
+                        ["be obtainable", 3683, 32],
+                        ["be opened", 3715, 29],
+                        ["be ordered", 3744, 27],
+                        ["be overcome", 3771, 29],
+                        ["be permitted", 3800, 31],
+                        ["be possible", 3831, 30],
+                        ["be put", 3861, 45],
+                        ["be quiet", 3906, 26],
+                        ["be rented", 3932, 28],
+                        ["be repaired", 3960, 32],
+                        ["be requested", 3992, 29],
+                        ["be required of", 4021, 31],
+                        ["be robbed", 4052, 25],
+                        ["be sent", 4077, 25],
+                        ["be sent to", 4102, 29],
+                        ["be sick", 4131, 22],
+                        ["be sorry", 4153, 27],
+                        ["be torn", 4180, 25],
+                        ["be twisted", 4205, 27],
+                        ["be used", 4232, 24],
+                        ["be useful", 4256, 23],
+                        ["be washed", 4279, 26],
+                        ["be written", 4305, 28],
+                        ["bean", 4333, 21],
+                        ["bean cake", 4354, 25],
+                        ["bear offspring", 4379, 28],
+                        ["beard", 4407, 21],
+                        ["beauty", 4428, 22],
+                        ["bed", 4450, 21],
+                        ["bee", 4471, 19],
+                        ["beer", 4490, 20],
+                        ["before", 4510, 25],
+                        ["before dawn", 4535, 30],
+                        ["beg", 4565, 18],
+                        ["beggar", 4583, 25],
+                        ["begin", 4608, 20],
+                        ["behavior", 4628, 27],
+                        ["behind", 4655, 48],
+                        ["believe", 4703, 49],
+                        ["bell", 4752, 22],
+                        ["below", 4774, 23],
+                        ["bend", 4797, 20],
+                        ["bend down", 4817, 25],
+                        ["beverage", 4842, 27],
+                        ["bible", 4869, 22],
+                        ["big", 4891, 21],
+                        ["bird", 4912, 20],
+                        ["bite", 4932, 18],
+                        ["biting ant", 4950, 26],
+                        ["bitter", 4976, 25],
+                        ["black", 5001, 22],
+                        ["bless", 5023, 22],
+                        ["blessing", 5045, 25],
+                        ["blind person", 5070, 29],
+                        ["blister", 5099, 28],
+                        ["blood", 5127, 20],
+                        ["blue", 5147, 22],
+                        ["board", 5169, 19],
+                        ["boat", 5188, 21],
+                        ["body", 5209, 20],
+                        ["body organ", 5229, 27],
+                        ["boil", 5256, 43],
+                        ["bone", 5299, 20],
+                        ["book", 5319, 45],
+                        ["borrow", 5364, 21],
+                        ["bottle", 5385, 22],
+                        ["box", 5407, 21],
+                        ["boy", 5428, 21],
+                        ["bracelet", 5449, 25],
+                        ["brain", 5474, 22],
+                        ["brake", 5496, 23],
+                        ["brass", 5519, 21],
+                        ["brassiere", 5540, 27],
+                        ["brave", 5567, 24],
+                        ["bread", 5591, 21],
+                        ["break", 5612, 21],
+                        ["break down", 5633, 57],
+                        ["bream", 5690, 22],
+                        ["bridge", 5712, 23],
+                        ["brightness", 5735, 29],
+                        ["bring", 5764, 20],
+                        ["bring to", 5784, 24],
+                        ["bring up", 5808, 22],
+                        ["broom", 5830, 22],
+                        ["brother", 5852, 22],
+                        ["brown", 5874, 25],
+                        ["bubble up", 5899, 26],
+                        ["buffalo", 5925, 23],
+                        ["build", 5948, 21],
+                        ["bunch", 5969, 21],
+                        ["bundle", 5990, 47],
+                        ["burn", 6037, 19],
+                        ["but", 6056, 46],
+                        ["butter", 6102, 22],
+                        ["buttock", 6124, 24],
+                        ["buy", 6148, 19],
+                        ["cafe", 6167, 22],
+                        ["calf", 6189, 20],
+                        ["call", 6209, 18],
+                        ["call on", 6227, 26],
+                        ["camel", 6253, 22],
+                        ["can", 6275, 19],
+                        ["captain", 6294, 25],
+                        ["care for", 6319, 24],
+                        ["carelessly", 6343, 27],
+                        ["caretaker", 6370, 26],
+                        ["carpenter", 6396, 28],
+                        ["carry", 6424, 44],
+                        ["carry for", 6468, 28],
+                        ["cassava", 6496, 24],
+                        ["cat", 6520, 18],
+                        ["caution", 6538, 25],
+                        ["cent", 6563, 20],
+                        ["centipede", 6583, 25],
+                        ["ceremonial dance", 6608, 33],
+                        ["certainly not", 6641, 35],
+                        ["certainty", 6676, 26],
+                        ["certificate", 6702, 27],
+                        ["chain", 6729, 24],
+                        ["chair", 6753, 20],
+                        ["change", 6773, 26],
+                        ["cheap", 6799, 24],
+                        ["check", 6823, 21],
+                        ["cheek", 6844, 21],
+                        ["cheese", 6865, 23],
+                        ["chest", 6888, 21],
+                        ["chest cold", 6909, 26],
+                        ["chew", 6935, 21],
+                        ["chicken pox", 6956, 33],
+                        ["chief", 6989, 21],
+                        ["child", 7010, 21],
+                        ["childhood", 7031, 25],
+                        ["chin", 7056, 21],
+                        ["choose", 7077, 23],
+                        ["church", 7100, 23],
+                        ["circuit", 7123, 26],
+                        ["citizen", 7149, 22],
+                        ["class", 7171, 22],
+                        ["clean", 7193, 50],
+                        ["cleanse", 7243, 25],
+                        ["clerk", 7268, 22],
+                        ["close", 7290, 21],
+                        ["cloth", 7311, 24],
+                        ["clothes", 7335, 22],
+                        ["clothes iron", 7357, 27],
+                        ["coast", 7384, 21],
+                        ["coat", 7405, 19],
+                        ["cock", 7424, 20],
+                        ["coconut", 7444, 46],
+                        ["coconut palm", 7490, 28],
+                        ["cod", 7518, 19],
+                        ["coffee", 7537, 23],
+                        ["coin", 7560, 21],
+                        ["cold", 7581, 21],
+                        ["collarbone", 7602, 29],
+                        ["collect", 7631, 22],
+                        ["collective work", 7653, 31],
+                        ["collector", 7684, 28],
+                        ["color", 7712, 21],
+                        ["comb", 7733, 21],
+                        ["come", 7754, 43],
+                        ["come across", 7797, 26],
+                        ["commerce", 7823, 27],
+                        ["companion", 7850, 26],
+                        ["competition", 7876, 30],
+                        ["complete", 7906, 54],
+                        ["conductor", 7960, 26],
+                        ["confuse", 7986, 23],
+                        ["confusion", 8009, 26],
+                        ["congratulations", 8035, 33],
+                        ["consider", 8068, 25],
+                        ["content", 8093, 25],
+                        ["continue", 8118, 26],
+                        ["contradict", 8144, 26],
+                        ["converse", 8170, 24],
+                        ["converse in", 8194, 31],
+                        ["cook", 8225, 42],
+                        ["cook in", 8267, 23],
+                        ["cooking pot", 8290, 28],
+                        ["cool", 8318, 19],
+                        ["cool down", 8337, 23],
+                        ["cool season", 8360, 29],
+                        ["cool south wind", 8389, 30],
+                        ["copy", 8419, 23],
+                        ["cord", 8442, 18],
+                        ["cork", 8460, 21],
+                        ["corpse", 8481, 22],
+                        ["correct", 8503, 55],
+                        ["cotton", 8558, 22],
+                        ["cough", 8580, 24],
+                        ["count", 8604, 22],
+                        ["country", 8626, 22],
+                        ["course", 8648, 23],
+                        ["cover", 8671, 22],
+                        ["cow", 8693, 21],
+                        ["crack", 8714, 19],
+                        ["craftsman", 8733, 25],
+                        ["create", 8758, 21],
+                        ["crocodile", 8779, 25],
+                        ["cross", 8804, 23],
+                        ["cross over", 8827, 25],
+                        ["crossing", 8852, 25],
+                        ["crowbar", 8877, 26],
+                        ["crush", 8903, 21],
+                        ["cry out", 8924, 21],
+                        ["cucumber", 8945, 24],
+                        ["cunning", 8969, 25],
+                        ["cup", 8994, 21],
+                        ["cure", 9015, 20],
+                        ["custom", 9035, 50],
+                        ["custom house", 9085, 30],
+                        ["cut", 9115, 18],
+                        ["danger", 9133, 23],
+                        ["darkness", 9156, 23],
+                        ["date", 9179, 43],
+                        ["date palm", 9222, 26],
+                        ["daughter", 9248, 24],
+                        ["dawn", 9272, 24],
+                        ["day", 9296, 18],
+                        ["daytime", 9314, 24],
+                        ["deaf person", 9338, 28],
+                        ["december", 9366, 26],
+                        ["deck", 9392, 21],
+                        ["deed", 9413, 20],
+                        ["delay", 9433, 23],
+                        ["deny", 9456, 19],
+                        ["descent", 9475, 26],
+                        ["desire", 9501, 22],
+                        ["despise", 9523, 24],
+                        ["destroy", 9547, 24],
+                        ["dictionary", 9571, 27],
+                        ["die", 9598, 16],
+                        ["difference", 9614, 28],
+                        ["different", 9642, 32],
+                        ["diminish", 9674, 25],
+                        ["direct", 9699, 24],
+                        ["dirty", 9723, 23],
+                        ["discount", 9746, 28],
+                        ["distance", 9774, 24],
+                        ["divide", 9798, 21],
+                        ["division", 9819, 24],
+                        ["do for", 9843, 23],
+                        ["doctor", 9866, 24],
+                        ["document", 9890, 23],
+                        ["dog", 9913, 18],
+                        ["doll", 9931, 21],
+                        ["donkey", 9952, 22],
+                        ["door", 9974, 21],
+                        ["dormitory", 9995, 25],
+                        ["dream", 10020, 21],
+                        ["dress", 10041, 19],
+                        ["drink", 10060, 20],
+                        ["drinking glass", 10080, 32],
+                        ["drive", 10112, 22],
+                        ["drive away", 10134, 28],
+                        ["drive out", 10162, 26],
+                        ["drop", 10188, 19],
+                        ["drum", 10207, 20],
+                        ["dry", 10227, 20],
+                        ["dry up", 10247, 22],
+                        ["dry with", 10269, 26],
+                        ["duck", 10295, 19],
+                        ["eagle", 10314, 19],
+                        ["ear", 10333, 19],
+                        ["early", 10352, 24],
+                        ["east", 10376, 24],
+                        ["eat", 10400, 16],
+                        ["economics", 10416, 26],
+                        ["effort", 10442, 22],
+                        ["egg", 10464, 17],
+                        ["eight", 10481, 22],
+                        ["eighty", 10503, 28],
+                        ["elder", 10531, 20],
+                        ["elephant", 10551, 24],
+                        ["eleven", 10575, 27],
+                        ["embassy", 10602, 25],
+                        ["empty", 10627, 22],
+                        ["enable", 10649, 24],
+                        ["enemy", 10673, 20],
+                        ["english language", 10693, 37],
+                        ["enlarge", 10730, 22],
+                        ["enter", 10752, 21],
+                        ["enter into", 10773, 28],
+                        ["entirely", 10801, 27],
+                        ["entry", 10828, 25],
+                        ["envelope", 10853, 26],
+                        ["equal", 10879, 22],
+                        ["especially", 10901, 27],
+                        ["even if", 10928, 26],
+                        ["evening", 10954, 23],
+                        ["every", 10977, 22],
+                        ["exactly", 10999, 26],
+                        ["excellent", 11025, 26],
+                        ["except", 11051, 23],
+                        ["expense", 11074, 25],
+                        ["expensive", 11099, 27],
+                        ["explain", 11126, 23],
+                        ["explanation", 11149, 27],
+                        ["eye", 11176, 19],
+                        ["eyeglasses", 11195, 27],
+                        ["face", 11222, 41],
+                        ["fade", 11263, 22],
+                        ["fail", 11285, 23],
+                        ["faint", 11308, 21],
+                        ["faith", 11329, 21],
+                        ["fall", 11350, 21],
+                        ["family", 11371, 23],
+                        ["far", 11394, 21],
+                        ["fare", 11415, 20],
+                        ["farmer", 11435, 24],
+                        ["farther", 11459, 25],
+                        ["fasten", 11484, 22],
+                        ["fasten with", 11506, 28],
+                        ["fastening", 11534, 27],
+                        ["fat", 11561, 42],
+                        ["father", 11603, 21],
+                        ["fault", 11624, 20],
+                        ["favor", 11644, 24],
+                        ["fear", 11668, 41],
+                        ["february", 11709, 27],
+                        ["feed", 11736, 20],
+                        ["female", 11756, 21],
+                        ["ferry", 11777, 22],
+                        ["fever", 11799, 20],
+                        ["few", 11819, 44],
+                        ["fiance", 11863, 24],
+                        ["fiancee", 11887, 25],
+                        ["fifty", 11912, 53],
+                        ["fig", 11965, 18],
+                        ["fig tree", 11983, 24],
+                        ["fight", 12007, 22],
+                        ["file", 12029, 19],
+                        ["fill", 12048, 19],
+                        ["fill up", 12067, 21],
+                        ["finally", 12088, 28],
+                        ["finger", 12116, 23],
+                        ["fingernail", 12139, 27],
+                        ["finish", 12166, 46],
+                        ["fire", 12212, 19],
+                        ["fireplace", 12231, 24],
+                        ["first", 12255, 24],
+                        ["fish", 12279, 21],
+                        ["five", 12300, 21],
+                        ["flour", 12321, 20],
+                        ["flower", 12341, 20],
+                        ["fly", 12361, 18],
+                        ["fold", 12379, 20],
+                        ["follow", 12399, 22],
+                        ["folly", 12421, 22],
+                        ["food", 12443, 22],
+                        ["fool", 12465, 21],
+                        ["for", 12486, 20],
+                        ["forbid", 12506, 23],
+                        ["foreign", 12529, 24],
+                        ["forest", 12553, 22],
+                        ["forget", 12575, 22],
+                        ["forgive", 12597, 51],
+                        ["fork", 12648, 18],
+                        ["forty", 12666, 26],
+                        ["four", 12692, 42],
+                        ["fowl", 12734, 19],
+                        ["freedom", 12753, 23],
+                        ["french language", 12776, 35],
+                        ["friday", 12811, 23],
+                        ["friend", 12834, 23],
+                        ["frog", 12857, 20],
+                        ["fruit", 12877, 21],
+                        ["fry", 12898, 20],
+                        ["gain", 12918, 20],
+                        ["game", 12938, 21],
+                        ["garden", 12959, 24],
+                        ["gas", 12983, 18],
+                        ["gasoline", 13001, 26],
+                        ["gather", 13027, 22],
+                        ["gauze", 13049, 22],
+                        ["gem", 13071, 18],
+                        ["gentle", 13089, 23],
+                        ["gentleman", 13112, 25],
+                        ["gentleness", 13137, 26],
+                        ["get", 13163, 18],
+                        ["get drunk", 13181, 24],
+                        ["get for", 13205, 23],
+                        ["get lost", 13228, 24],
+                        ["get tired", 13252, 25],
+                        ["get well", 13277, 23],
+                        ["giant", 13300, 20],
+                        ["giraffe", 13320, 23],
+                        ["girl", 13343, 23],
+                        ["give", 13366, 17],
+                        ["give a share", 13383, 27],
+                        ["give for", 13410, 22],
+                        ["glass", 13432, 20],
+                        ["go", 13452, 41],
+                        ["go away", 13493, 24],
+                        ["go out", 13517, 21],
+                        ["go round", 13538, 26],
+                        ["go to", 13564, 21],
+                        ["goat", 13585, 20],
+                        ["god", 13605, 19],
+                        ["gold", 13624, 22],
+                        ["good", 13646, 43],
+                        ["good fortune", 13689, 27],
+                        ["gown", 13716, 20],
+                        ["grape", 13736, 22],
+                        ["grape vine", 13758, 28],
+                        ["grapefruit", 13786, 28],
+                        ["great", 13814, 21],
+                        ["greater rains", 13835, 30],
+                        ["greet", 13865, 45],
+                        ["greetings", 13910, 26],
+                        ["grind", 13936, 20],
+                        ["grow", 13956, 18],
+                        ["grow up", 13974, 21],
+                        ["guarantee", 13995, 32],
+                        ["guard", 14027, 45],
+                        ["guava", 14072, 20],
+                        ["guide", 14092, 24],
+                        ["gum", 14116, 19],
+                        ["gums", 14135, 20],
+                        ["hair", 14155, 21],
+                        ["half", 14176, 19],
+                        ["half gallon", 14195, 27],
+                        ["hammer", 14222, 23],
+                        ["hang up on", 14245, 29],
+                        ["hard", 14274, 21],
+                        ["hare", 14295, 22],
+                        ["harm", 14317, 20],
+                        ["harvest", 14337, 24],
+                        ["haste", 14361, 22],
+                        ["hat", 14383, 19],
+                        ["hate", 14402, 21],
+                        ["have", 14423, 17],
+                        ["having", 14440, 23],
+                        ["he", 14463, 20],
+                        ["he she is", 14483, 25],
+                        ["head", 14508, 21],
+                        ["health", 14529, 21],
+                        ["hear", 14550, 20],
+                        ["heart", 14570, 20],
+                        ["heat", 14590, 19],
+                        ["heavy", 14609, 22],
+                        ["heel", 14631, 23],
+                        ["help", 14654, 44],
+                        ["herdsman", 14698, 28],
+                        ["here", 14726, 44],
+                        ["hesitate", 14770, 23],
+                        ["hide", 14793, 20],
+                        ["highway", 14813, 26],
+                        ["hill", 14839, 21],
+                        ["hinder", 14860, 21],
+                        ["hip", 14881, 20],
+                        ["hippo", 14901, 22],
+                        ["hit", 14923, 18],
+                        ["hit for", 14941, 23],
+                        ["hoe", 14964, 39],
+                        ["hold on to", 15003, 26],
+                        ["holder", 15029, 23],
+                        ["hole", 15052, 20],
+                        ["holiday", 15072, 25],
+                        ["honey", 15097, 21],
+                        ["horn", 15118, 19],
+                        ["horse", 15137, 22],
+                        ["hospital", 15159, 28],
+                        ["hot season", 15187, 59],
+                        ["hotel", 15246, 22],
+                        ["hour", 15268, 18],
+                        ["house", 15286, 22],
+                        ["how", 15308, 19],
+                        ["how many", 15327, 27],
+                        ["hundred", 15354, 23],
+                        ["hundred thousand", 15377, 68],
+                        ["hunger", 15445, 21],
+                        ["hunt", 15466, 20],
+                        ["husband", 15486, 22],
+                        ["hut", 15508, 21],
+                        ["hyena", 15529, 20],
+                        ["i", 15549, 19],
+                        ["i do not have", 15568, 31],
+                        ["i have", 15599, 24],
+                        ["ice", 15623, 20],
+                        ["idle", 15643, 21],
+                        ["if", 15664, 21],
+                        ["illfeeling", 15685, 27],
+                        ["implement", 15712, 25],
+                        ["importance", 15737, 28],
+                        ["important", 15765, 28],
+                        ["improve", 15793, 25],
+                        ["in", 15818, 22],
+                        ["in a corner", 15840, 31],
+                        ["in front", 15871, 56],
+                        ["in here", 15927, 24],
+                        ["in order that", 15951, 30],
+                        ["in there", 15981, 24],
+                        ["in vain", 16005, 24],
+                        ["increase", 16029, 50],
+                        ["infect", 16079, 25],
+                        ["infidel", 16104, 24],
+                        ["inform", 16128, 22],
+                        ["inhabitant", 16150, 29],
+                        ["initiation rite", 16179, 31],
+                        ["ink", 16210, 18],
+                        ["inlaw", 16228, 21],
+                        ["inquisitiveness", 16249, 33],
+                        ["insect", 16282, 22],
+                        ["inside", 16304, 44],
+                        ["instrument", 16348, 24],
+                        ["insult", 16372, 21],
+                        ["insurance", 16393, 24],
+                        ["intention", 16417, 53],
+                        ["intestines", 16470, 27],
+                        ["is", 16497, 18],
+                        ["island", 16515, 23],
+                        ["its", 16538, 56],
+                        ["january", 16594, 25],
+                        ["jaw", 16619, 18],
+                        ["jealous", 16637, 24],
+                        ["jeweller", 16661, 25],
+                        ["join", 16686, 19],
+                        ["journey", 16705, 24],
+                        ["joy", 16729, 20],
+                        ["july", 16749, 20],
+                        ["jump", 16769, 19],
+                        ["june", 16788, 19],
+                        ["key", 16807, 21],
+                        ["kidney", 16828, 21],
+                        ["kill", 16849, 17],
+                        ["kilogram", 16866, 23],
+                        ["kind", 16889, 63],
+                        ["king", 16952, 21],
+                        ["kingdom", 16973, 24],
+                        ["kingfish", 16997, 24],
+                        ["knead", 17021, 21],
+                        ["knee", 17042, 19],
+                        ["knife", 17061, 20],
+                        ["know", 17081, 40],
+                        ["knowledge", 17121, 54],
+                        ["koran", 17175, 22],
+                        ["lace", 17197, 22],
+                        ["lady", 17219, 19],
+                        ["lamp", 17238, 18],
+                        ["land", 17256, 20],
+                        ["language", 17276, 24],
+                        ["large basket", 17300, 27],
+                        ["last", 17327, 23],
+                        ["latrine", 17350, 22],
+                        ["laugh", 17372, 21],
+                        ["laxative", 17393, 25],
+                        ["lay down", 17418, 23],
+                        ["lay out", 17441, 25],
+                        ["leader", 17466, 27],
+                        ["leaf", 17493, 19],
+                        ["leak", 17512, 19],
+                        ["learn", 17531, 23],
+                        ["leather strap", 17554, 30],
+                        ["leave", 17584, 20],
+                        ["left hand", 17604, 25],
+                        ["left side", 17629, 27],
+                        ["leg", 17656, 18],
+                        ["lemon", 17674, 21],
+                        ["lend", 17695, 22],
+                        ["length", 17717, 22],
+                        ["leopard", 17739, 22],
+                        ["less", 17761, 46],
+                        ["lesser rains", 17807, 27],
+                        ["lesson", 17834, 25],
+                        ["letter", 17859, 22],
+                        ["lid", 17881, 22],
+                        ["lie down", 17903, 23],
+                        ["life", 17926, 42],
+                        ["lift up", 17968, 22],
+                        ["light weight", 17990, 30],
+                        ["lightning", 18020, 25],
+                        ["like", 18045, 22],
+                        ["liken", 18067, 25],
+                        ["lime", 18092, 20],
+                        ["line", 18112, 21],
+                        ["link", 18133, 21],
+                        ["lion", 18154, 20],
+                        ["lip", 18174, 19],
+                        ["list", 18193, 21],
+                        ["listen", 18214, 25],
+                        ["liver", 18239, 19],
+                        ["load", 18258, 20],
+                        ["lock", 18278, 21],
+                        ["locust", 18299, 22],
+                        ["long", 18321, 21],
+                        ["look at", 18342, 24],
+                        ["look for", 18366, 25],
+                        ["lose", 18391, 21],
+                        ["loss", 18412, 21],
+                        ["lot", 18433, 18],
+                        ["love", 18451, 20],
+                        ["lung", 18471, 19],
+                        ["machine", 18490, 25],
+                        ["maiden", 18515, 22],
+                        ["mail", 18537, 20],
+                        ["maize", 18557, 21],
+                        ["maize plant", 18578, 29],
+                        ["make", 18607, 20],
+                        ["male", 18627, 20],
+                        ["man", 18647, 24],
+                        ["mango", 18671, 20],
+                        ["mango tree", 18691, 27],
+                        ["mansion", 18718, 23],
+                        ["manure", 18741, 48],
+                        ["many", 18789, 21],
+                        ["map", 18810, 20],
+                        ["march", 18830, 21],
+                        ["mark", 18851, 20],
+                        ["market", 18871, 21],
+                        ["marriage", 18892, 23],
+                        ["marry", 18915, 18],
+                        ["match", 18933, 24],
+                        ["matter", 18957, 22],
+                        ["may", 18979, 17],
+                        ["meaning", 18996, 23],
+                        ["measles", 19019, 24],
+                        ["measure", 19043, 46],
+                        ["measurement", 19089, 28],
+                        ["meat", 19117, 20],
+                        ["meat pie", 19137, 26],
+                        ["medicine", 19163, 23],
+                        ["meet", 19186, 43],
+                        ["men", 19229, 21],
+                        ["mercy", 19250, 22],
+                        ["merge", 19272, 25],
+                        ["messenger", 19297, 26],
+                        ["metal", 19323, 22],
+                        ["midday", 19345, 51],
+                        ["middle", 19396, 83],
+                        ["mile", 19479, 20],
+                        ["milk", 19499, 21],
+                        ["millet", 19520, 22],
+                        ["million", 19542, 27],
+                        ["mimic", 19569, 21],
+                        ["minister", 19590, 25],
+                        ["minute", 19615, 23],
+                        ["mirage", 19638, 23],
+                        ["miss", 19661, 19],
+                        ["mix", 19680, 23],
+                        ["mixture", 19703, 30],
+                        ["molar", 19733, 20],
+                        ["monday", 19753, 25],
+                        ["money", 19778, 43],
+                        ["money order", 19821, 28],
+                        ["monkey", 19849, 24],
+                        ["moon", 19873, 20],
+                        ["more", 19893, 22],
+                        ["morning", 19915, 25],
+                        ["mosque", 19940, 24],
+                        ["mosquito", 19964, 22],
+                        ["mosquito net", 19986, 32],
+                        ["mother", 20018, 21],
+                        ["mountain", 20039, 24],
+                        ["mouth", 20063, 22],
+                        ["move from", 20085, 24],
+                        ["move to", 20109, 23],
+                        ["movies", 20132, 23],
+                        ["much", 20155, 21],
+                        ["mud", 20176, 18],
+                        ["multiply", 20194, 26],
+                        ["muscle", 20220, 23],
+                        ["mushroom", 20243, 24],
+                        ["music", 20267, 22],
+                        ["my", 20289, 19],
+                        ["nail", 20308, 22],
+                        ["naked", 20330, 22],
+                        ["name", 20352, 19],
+                        ["narrow", 20371, 26],
+                        ["nation", 20397, 22],
+                        ["nausea", 20419, 29],
+                        ["near", 20448, 23],
+                        ["necessity", 20471, 26],
+                        ["neck", 20497, 21],
+                        ["necklace", 20518, 25],
+                        ["need", 20543, 21],
+                        ["needle", 20564, 24],
+                        ["nephew", 20588, 21],
+                        ["nest", 20609, 20],
+                        ["net", 20629, 18],
+                        ["new", 20647, 19],
+                        ["new testament", 20666, 30],
+                        ["news", 20696, 21],
+                        ["newspaper", 20717, 26],
+                        ["niece", 20743, 20],
+                        ["night", 20763, 21],
+                        ["nine", 20784, 21],
+                        ["ninety", 20805, 25],
+                        ["no", 20830, 41],
+                        ["north", 20871, 25],
+                        ["nose", 20896, 18],
+                        ["not", 20914, 18],
+                        ["not at all", 20932, 28],
+                        ["not inside", 20960, 29],
+                        ["not only", 20989, 27],
+                        ["not so", 21016, 23],
+                        ["not yet", 21039, 24],
+                        ["notebook", 21063, 26],
+                        ["november", 21089, 26],
+                        ["now", 21115, 20],
+                        ["number", 21135, 49],
+                        ["oblige", 21184, 21],
+                        ["october", 21205, 24],
+                        ["of", 21229, 17],
+                        ["offend", 21246, 24],
+                        ["office", 21270, 22],
+                        ["officer", 21292, 23],
+                        ["official", 21315, 26],
+                        ["oil", 21341, 20],
+                        ["ointment", 21361, 26],
+                        ["okra", 21387, 20],
+                        ["old", 21407, 21],
+                        ["old times", 21428, 24],
+                        ["one", 21452, 20],
+                        ["one who sews", 21472, 31],
+                        ["onion", 21503, 24],
+                        ["only", 21527, 19],
+                        ["open", 21546, 44],
+                        ["opportunity", 21590, 28],
+                        ["or", 21618, 63],
+                        ["orange", 21681, 24],
+                        ["orange tree", 21705, 30],
+                        ["orator", 21735, 24],
+                        ["order", 21759, 21],
+                        ["order for", 21780, 26],
+                        ["ornament", 21806, 24],
+                        ["ornamentation", 21830, 30],
+                        ["other", 21860, 24],
+                        ["our", 21884, 19],
+                        ["outside", 21903, 23],
+                        ["overcome", 21926, 25],
+                        ["overseer", 21951, 28],
+                        ["owner", 21979, 48],
+                        ["oyster", 22027, 22],
+                        ["pacify", 22049, 23],
+                        ["package", 22072, 27],
+                        ["page", 22099, 22],
+                        ["pail", 22121, 19],
+                        ["pain", 22140, 20],
+                        ["pair", 22160, 19],
+                        ["palm", 22179, 19],
+                        ["pants", 22198, 23],
+                        ["papaya", 22221, 22],
+                        ["paper", 22243, 24],
+                        ["paradise", 22267, 25],
+                        ["parent", 22292, 22],
+                        ["park", 22314, 21],
+                        ["part", 22335, 21],
+                        ["pass", 22356, 19],
+                        ["pass by", 22375, 23],
+                        ["passenger", 22398, 26],
+                        ["paste", 22424, 21],
+                        ["pay", 22445, 18],
+                        ["payment", 22463, 22],
+                        ["peace", 22485, 21],
+                        ["pen", 22506, 20],
+                        ["pencil", 22526, 23],
+                        ["penknife", 22549, 26],
+                        ["pepper", 22575, 25],
+                        ["peppermint", 22600, 30],
+                        ["perhaps", 22630, 53],
+                        ["period", 22683, 24],
+                        ["period of time", 22707, 29],
+                        ["permission", 22736, 27],
+                        ["permit", 22763, 23],
+                        ["person", 22786, 20],
+                        ["phonograph", 22806, 28],
+                        ["picture", 22834, 23],
+                        ["piece", 22857, 23],
+                        ["pier", 22880, 19],
+                        ["pig", 22899, 21],
+                        ["pigeon", 22920, 22],
+                        ["pill", 22942, 22],
+                        ["pillar", 22964, 22],
+                        ["pillow case", 22986, 29],
+                        ["pineapple", 23015, 26],
+                        ["pint", 23041, 21],
+                        ["pipe", 23062, 19],
+                        ["pit", 23081, 19],
+                        ["place", 23100, 22],
+                        ["plague", 23122, 22],
+                        ["plaited mat", 23144, 27],
+                        ["planet", 23171, 23],
+                        ["plank", 23194, 20],
+                        ["plate", 23214, 22],
+                        ["play", 23236, 20],
+                        ["play at", 23256, 28],
+                        ["playing cards", 23284, 30],
+                        ["please", 23314, 54],
+                        ["plot", 23368, 22],
+                        ["poor", 23390, 24],
+                        ["poor quality", 23414, 31],
+                        ["port", 23445, 22],
+                        ["porter", 23467, 25],
+                        ["possessions", 23492, 26],
+                        ["post", 23518, 20],
+                        ["potato", 23538, 22],
+                        ["pound", 23560, 21],
+                        ["pray", 23581, 19],
+                        ["prayer", 23600, 21],
+                        ["precede", 23621, 26],
+                        ["prefer", 23647, 22],
+                        ["prepare for", 23669, 32],
+                        ["present", 23701, 24],
+                        ["press upon", 23725, 26],
+                        ["price", 23751, 19],
+                        ["priest", 23770, 23],
+                        ["proclaim", 23793, 26],
+                        ["produce", 23819, 21],
+                        ["profit", 23840, 23],
+                        ["program", 23863, 27],
+                        ["progress", 23890, 28],
+                        ["prohibition", 23918, 30],
+                        ["prophet", 23948, 48],
+                        ["pull", 23996, 19],
+                        ["pump", 24015, 20],
+                        ["pupil", 24035, 26],
+                        ["purple", 24061, 27],
+                        ["purpose", 24088, 21],
+                        ["push", 24109, 21],
+                        ["put", 24130, 37],
+                        ["put aside for", 24167, 29],
+                        ["put in", 24196, 47],
+                        ["put out", 24243, 22],
+                        ["put out for", 24265, 27],
+                        ["quarrel", 24292, 77],
+                        ["quarter", 24369, 22],
+                        ["question", 24391, 24],
+                        ["quickly", 24415, 25],
+                        ["rain", 24440, 42],
+                        ["rat", 24482, 19],
+                        ["ration", 24501, 22],
+                        ["raw", 24523, 21],
+                        ["razor", 24544, 21],
+                        ["read", 24565, 19],
+                        ["reader", 24584, 24],
+                        ["ready", 24608, 24],
+                        ["reap", 24632, 19],
+                        ["reason", 24651, 23],
+                        ["rebuke", 24674, 22],
+                        ["recommend", 24696, 25],
+                        ["red", 24721, 22],
+                        ["reduce", 24743, 24],
+                        ["refuse", 24767, 22],
+                        ["rejoice", 24789, 24],
+                        ["relative", 24813, 24],
+                        ["religion", 24837, 23],
+                        ["remain", 24860, 21],
+                        ["remember", 24881, 26],
+                        ["remind", 24907, 25],
+                        ["rent", 24932, 19],
+                        ["rent out", 24951, 26],
+                        ["repair", 24977, 26],
+                        ["repair for", 25003, 31],
+                        ["repent", 25034, 21],
+                        ["resemble", 25055, 25],
+                        ["reserve", 25080, 23],
+                        ["respect", 25103, 25],
+                        ["rest", 25128, 22],
+                        ["restplacetime", 25150, 33],
+                        ["return", 25183, 21],
+                        ["rhino", 25204, 22],
+                        ["rib", 25226, 19],
+                        ["rice", 25245, 21],
+                        ["rice plant", 25266, 27],
+                        ["right side", 25293, 26],
+                        ["righteousness", 25319, 28],
+                        ["ring", 25347, 19],
+                        ["rinse mouth", 25366, 29],
+                        ["ripe", 25395, 21],
+                        ["rise up", 25416, 25],
+                        ["river", 25441, 19],
+                        ["road", 25460, 19],
+                        ["rob", 25479, 24],
+                        ["roll", 25503, 21],
+                        ["room", 25524, 21],
+                        ["root", 25545, 20],
+                        ["rope", 25565, 20],
+                        ["rot", 25585, 17],
+                        ["rotten", 25602, 23],
+                        ["rug", 25625, 19],
+                        ["run away", 25644, 52],
+                        ["run to", 25696, 25],
+                        ["sack", 25721, 20],
+                        ["sailor", 25741, 24],
+                        ["sake", 25765, 20],
+                        ["saliva", 25785, 21],
+                        ["salt", 25806, 21],
+                        ["saturday", 25827, 27],
+                        ["save", 25854, 19],
+                        ["scarf", 25873, 20],
+                        ["schedule", 25893, 25],
+                        ["school", 25918, 69],
+                        ["scissors", 25987, 24],
+                        ["score twenty", 26011, 31],
+                        ["scorpion", 26042, 22],
+                        ["screw", 26064, 48],
+                        ["screwdriver", 26112, 30],
+                        ["sea", 26142, 20],
+                        ["second", 26162, 23],
+                        ["secret", 26185, 21],
+                        ["see", 26206, 17],
+                        ["seed", 26223, 20],
+                        ["sell", 26243, 18],
+                        ["send", 26261, 21],
+                        ["send back", 26282, 27],
+                        ["send for", 26309, 26],
+                        ["send off", 26335, 28],
+                        ["sense", 26363, 21],
+                        ["september", 26384, 28],
+                        ["serpent", 26412, 22],
+                        ["servant", 26434, 26],
+                        ["service", 26460, 28],
+                        ["set down", 26488, 22],
+                        ["seven", 26510, 22],
+                        ["seventy", 26532, 26],
+                        ["sew", 26558, 19],
+                        ["shadow", 26577, 23],
+                        ["share", 26600, 23],
+                        ["shark", 26623, 20],
+                        ["sharp", 26643, 22],
+                        ["sharpness", 26665, 25],
+                        ["shave", 26690, 20],
+                        ["she", 26710, 21],
+                        ["shed", 26731, 20],
+                        ["sheep", 26751, 22],
+                        ["shell", 26773, 22],
+                        ["shilling", 26795, 27],
+                        ["shine on", 26822, 26],
+                        ["shirt", 26848, 21],
+                        ["shiver", 26869, 25],
+                        ["shoe", 26894, 20],
+                        ["shop", 26914, 19],
+                        ["short", 26933, 22],
+                        ["shortcut", 26955, 25],
+                        ["shorten", 26980, 25],
+                        ["shoulder", 27005, 23],
+                        ["show", 27028, 46],
+                        ["sick person", 27074, 29],
+                        ["sickle", 27103, 22],
+                        ["sickness", 27125, 54],
+                        ["side", 27179, 43],
+                        ["silk", 27222, 21],
+                        ["since", 27243, 24],
+                        ["sing", 27267, 19],
+                        ["sister", 27286, 21],
+                        ["sit", 27307, 17],
+                        ["six", 27324, 20],
+                        ["sixty", 27344, 24],
+                        ["size", 27368, 21],
+                        ["skin", 27389, 20],
+                        ["sky", 27409, 20],
+                        ["slaughter", 27429, 26],
+                        ["slave", 27455, 22],
+                        ["sleep", 27477, 24],
+                        ["sleep in", 27501, 24],
+                        ["slippers", 27525, 25],
+                        ["slippery place", 27550, 32],
+                        ["slit", 27582, 20],
+                        ["slowly", 27602, 27],
+                        ["small", 27629, 22],
+                        ["small fish", 27651, 26],
+                        ["small lump", 27677, 28],
+                        ["small thing", 27705, 28],
+                        ["smallpox", 27733, 23],
+                        ["smell", 27756, 44],
+                        ["smell bad", 27800, 24],
+                        ["smell good", 27824, 26],
+                        ["smile", 27850, 24],
+                        ["smoke", 27874, 21],
+                        ["smooth", 27895, 51],
+                        ["snake", 27946, 21],
+                        ["so", 27967, 44],
+                        ["soap", 28011, 21],
+                        ["socks", 28032, 21],
+                        ["soft", 28053, 22],
+                        ["soil", 28075, 21],
+                        ["soldier", 28096, 24],
+                        ["sole", 28120, 21],
+                        ["solitary", 28141, 26],
+                        ["some", 28167, 21],
+                        ["son", 28188, 19],
+                        ["song", 28207, 20],
+                        ["sorceror", 28227, 25],
+                        ["sore", 28252, 22],
+                        ["sorrow", 28274, 23],
+                        ["soul", 28297, 19],
+                        ["sound", 28316, 21],
+                        ["soup", 28337, 19],
+                        ["south", 28356, 22],
+                        ["sparkle", 28378, 24],
+                        ["speak", 28402, 42],
+                        ["special", 28444, 27],
+                        ["spice", 28471, 22],
+                        ["spider", 28493, 23],
+                        ["spinach", 28516, 25],
+                        ["split", 28541, 21],
+                        ["spoon", 28562, 22],
+                        ["spot", 28584, 18],
+                        ["spring", 28602, 23],
+                        ["springmechanism", 28625, 33],
+                        ["sprout", 28658, 23],
+                        ["squeeze", 28681, 46],
+                        ["squeeze out", 28727, 27],
+                        ["stage", 28754, 22],
+                        ["stairs", 28776, 47],
+                        ["stand", 28823, 22],
+                        ["star", 28845, 20],
+                        ["start up", 28865, 26],
+                        ["state", 28891, 20],
+                        ["station", 28911, 26],
+                        ["steal", 28937, 19],
+                        ["steamship", 28956, 24],
+                        ["steeringgear", 28980, 31],
+                        ["step", 29011, 43],
+                        ["stick", 29054, 44],
+                        ["stick on", 29098, 26],
+                        ["stiff porridge", 29124, 30],
+                        ["still", 29154, 22],
+                        ["stir up", 29176, 24],
+                        ["stomach", 29200, 23],
+                        ["stone", 29223, 20],
+                        ["stop", 29243, 20],
+                        ["stop up", 29263, 22],
+                        ["story", 29285, 23],
+                        ["stranger", 29308, 24],
+                        ["stream", 29332, 23],
+                        ["strength", 29355, 24],
+                        ["strengthen", 29379, 29],
+                        ["striped", 29408, 25],
+                        ["strong", 29433, 24],
+                        ["succeed", 29457, 26],
+                        ["such a one", 29483, 27],
+                        ["suddenly", 29510, 28],
+                        ["sugar", 29538, 22],
+                        ["sugar cane", 29560, 25],
+                        ["sun", 29585, 17],
+                        ["sunday", 29602, 25],
+                        ["swahili language", 29627, 36],
+                        ["sweep", 29663, 21],
+                        ["sweet", 29684, 22],
+                        ["swell", 29706, 21],
+                        ["swim", 29727, 21],
+                        ["table", 29748, 20],
+                        ["tail", 29768, 19],
+                        ["tailor", 29787, 23],
+                        ["take away", 29810, 25],
+                        ["take leave of", 29835, 27],
+                        ["take off", 29862, 25],
+                        ["take on", 29887, 23],
+                        ["take out", 29910, 22],
+                        ["tamarind", 29932, 25],
+                        ["tank", 29957, 20],
+                        ["tap", 29977, 20],
+                        ["tape", 29997, 20],
+                        ["tar", 30017, 19],
+                        ["tax", 30036, 20],
+                        ["tea", 30056, 19],
+                        ["teach", 30075, 72],
+                        ["teacher", 30147, 25],
+                        ["teapot", 30172, 21],
+                        ["telephone", 30193, 24],
+                        ["tell", 30217, 20],
+                        ["temple", 30237, 23],
+                        ["ten", 30260, 20],
+                        ["termite", 30280, 23],
+                        ["test", 30303, 21],
+                        ["thank", 30324, 23],
+                        ["that", 30347, 19],
+                        ["that place", 30366, 27],
+                        ["theft", 30393, 20],
+                        ["their", 30413, 20],
+                        ["then", 30433, 90],
+                        ["there", 30523, 70],
+                        ["there is", 30593, 25],
+                        ["there is not there", 30618, 11],
+                        ["they", 30629, 21],
+                        ["thief", 30650, 21],
+                        ["thin porridge", 30671, 27],
+                        ["thing", 30698, 20],
+                        ["think", 30718, 43],
+                        ["thirst", 30761, 20],
+                        ["thirty", 30781, 29],
+                        ["thorn", 30810, 21],
+                        ["those", 30831, 20],
+                        ["thought", 30851, 22],
+                        ["thousand", 30873, 25],
+                        ["three", 30898, 22],
+                        ["throat", 30920, 20],
+                        ["throw", 30940, 20],
+                        ["thursday", 30960, 27],
+                        ["tighten", 30987, 22],
+                        ["time", 31009, 43],
+                        ["tin can", 31052, 22],
+                        ["to", 31074, 19],
+                        ["today", 31093, 21],
+                        ["together", 31114, 27],
+                        ["tomato", 31141, 24],
+                        ["tomorrow", 31165, 26],
+                        ["tongs", 31191, 21],
+                        ["tongue", 31212, 22],
+                        ["tooth", 31234, 20],
+                        ["toothbrush", 31254, 27],
+                        ["top", 31281, 19],
+                        ["tortoise", 31300, 23],
+                        ["touch", 31323, 20],
+                        ["tourniquet", 31343, 28],
+                        ["tow", 31371, 22],
+                        ["town", 31393, 18],
+                        ["train", 31411, 20],
+                        ["translate", 31431, 26],
+                        ["travel", 31457, 23],
+                        ["travel by", 31480, 27],
+                        ["traveller", 31507, 27],
+                        ["tray", 31534, 20],
+                        ["treasury", 31554, 25],
+                        ["tree", 31579, 18],
+                        ["tribe", 31597, 22],
+                        ["trouble", 31619, 75],
+                        ["truly", 31694, 23],
+                        ["trunk", 31717, 19],
+                        ["trust", 31736, 23],
+                        ["tuesday", 31759, 25],
+                        ["turmeric", 31784, 26],
+                        ["turn", 31810, 20],
+                        ["turn around", 31830, 27],
+                        ["twelve", 31857, 29],
+                        ["twenty", 31886, 27],
+                        ["two", 31913, 20],
+                        ["umbrella", 31933, 26],
+                        ["uncle", 31959, 22],
+                        ["uncover", 31981, 23],
+                        ["undershirt", 32004, 27],
+                        ["understand", 32031, 27],
+                        ["undress", 32058, 21],
+                        ["unfasten", 32079, 25],
+                        ["unfold", 32104, 23],
+                        ["unity", 32127, 21],
+                        ["unravel", 32148, 23],
+                        ["unripe", 32171, 24],
+                        ["unstop", 32195, 22],
+                        ["until", 32217, 74],
+                        ["up to", 32291, 24],
+                        ["upbringing", 32315, 27],
+                        ["upper floor", 32342, 29],
+                        ["uproot", 32371, 22],
+                        ["us", 32393, 20],
+                        ["use", 32413, 19],
+                        ["use for", 32432, 25],
+                        ["vacation", 32457, 25],
+                        ["vaccinate", 32482, 27],
+                        ["value", 32509, 23],
+                        ["vegetable", 32532, 25],
+                        ["very", 32557, 43],
+                        ["vessel", 32600, 23],
+                        ["view", 32623, 23],
+                        ["village", 32646, 24],
+                        ["vinegar", 32670, 22],
+                        ["wage", 32692, 23],
+                        ["wages", 32715, 21],
+                        ["wait", 32736, 20],
+                        ["wait for", 32756, 25],
+                        ["waiter", 32781, 26],
+                        ["wake", 32807, 19],
+                        ["walk", 32826, 21],
+                        ["wall", 32847, 20],
+                        ["want", 32867, 19],
+                        ["warm", 32886, 25],
+                        ["warn", 32911, 19],
+                        ["wash", 32930, 61],
+                        ["wash hands", 32991, 25],
+                        ["watch out", 33016, 27],
+                        ["watch over", 33043, 27],
+                        ["water", 33070, 20],
+                        ["water bottle", 33090, 29],
+                        ["waterchannel", 33119, 31],
+                        ["wave", 33150, 20],
+                        ["wax", 33170, 17],
+                        ["we", 33187, 20],
+                        ["wealthy", 33207, 26],
+                        ["weave", 33233, 20],
+                        ["wedding", 33253, 23],
+                        ["wednesday", 33276, 28],
+                        ["week", 33304, 40],
+                        ["weight", 33344, 22],
+                        ["well", 33366, 46],
+                        ["west", 33412, 24],
+                        ["what", 33436, 23],
+                        ["what kind", 33459, 27],
+                        ["wheat", 33486, 21],
+                        ["wheel", 33507, 24],
+                        ["wheeled vehicle", 33531, 30],
+                        ["when", 33561, 22],
+                        ["where", 33583, 23],
+                        ["which", 33606, 21],
+                        ["which is", 33627, 28],
+                        ["white", 33655, 22],
+                        ["white hair", 33677, 24],
+                        ["who", 33701, 22],
+                        ["whole", 33723, 22],
+                        ["whole day", 33745, 25],
+                        ["why", 33770, 46],
+                        ["wickedness", 33816, 26],
+                        ["wide", 33842, 21],
+                        ["widow", 33863, 21],
+                        ["widower", 33884, 23],
+                        ["width", 33907, 21],
+                        ["wife", 33928, 18],
+                        ["wind", 33946, 20],
+                        ["window", 33966, 24],
+                        ["wine", 33990, 21],
+                        ["wing", 34011, 20],
+                        ["wipe", 34031, 19],
+                        ["wipe with", 34050, 25],
+                        ["with", 34075, 21],
+                        ["with me", 34096, 9],
+                        ["withhold from", 34105, 30],
+                        ["without", 34135, 54],
+                        ["woman", 34189, 25],
+                        ["women", 34214, 24],
+                        ["word", 34238, 19],
+                        ["work", 34257, 19],
+                        ["world", 34276, 21],
+                        ["worship", 34297, 48],
+                        ["wound", 34345, 22],
+                        ["wrist", 34367, 22],
+                        ["write", 34389, 22],
+                        ["writing", 34411, 24],
+                        ["yard", 34435, 19],
+                        ["year", 34454, 20],
+                        ["yes", 34474, 65],
+                        ["yesterday", 34539, 45],
+                        ["you", 34584, 21],
+                        ["you plural", 34605, 29],
+                        ["your", 34634, 20],
+                        ["your plural", 34654, 27],
+                        ["youth", 34681, 22],
+                        ["zanzibar", 34703, 25]])
+        end
+    end
+
+    def test_get_index_offsets
+        StardictIndex.open(@index_file) do |index|
+            word_list = index.get_word_list
+
+            index.get_index_offsets.each_with_index do |index_offset, i|
+                fields = index.get_fields(index_offset)
+                assert_equal(fields, word_list[i])
+            end
+        end
+    end
+
+end



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]