fantasdic r327 - in trunk: . lib/fantasdic/sources test test/data
- From: mblondel svn gnome org
- To: svn-commits-list gnome org
- Subject: fantasdic r327 - in trunk: . lib/fantasdic/sources test test/data
- Date: Fri, 22 Aug 2008 18:22:18 +0000 (UTC)
Author: mblondel
Date: Fri Aug 22 18:22:17 2008
New Revision: 327
URL: http://svn.gnome.org/viewvc/fantasdic?rev=327&view=rev
Log:
* test/test_edict_file.rb:
* test/data/edict.eucjp.gz:
* test/data/edict.utf8.gz:
* test/data/edict.utf8:
* test/data/edict.eucjp: Unit test for EdictFile source + test data.
* lib/fantasdic/sources/edict_file.rb: Better way to choose between the two
available implementations. Fixed a bunch of bugs spotted by the unit test.
Added:
trunk/test/data/edict.eucjp
trunk/test/data/edict.eucjp.gz (contents, props changed)
trunk/test/data/edict.utf8
trunk/test/data/edict.utf8.gz (contents, props changed)
trunk/test/test_edict_file.rb
Modified:
trunk/ChangeLog
trunk/lib/fantasdic/sources/edict_file.rb
Modified: trunk/lib/fantasdic/sources/edict_file.rb
==============================================================================
--- trunk/lib/fantasdic/sources/edict_file.rb (original)
+++ trunk/lib/fantasdic/sources/edict_file.rb Fri Aug 22 18:22:17 2008
@@ -20,13 +20,7 @@
module Fantasdic
module Source
-class EdictFile < Base
- authors ["Mathieu Blondel"]
- title _("EDICT file")
- description _("Look up words in an EDICT file.")
- license Fantasdic::GPL
- copyright "Copyright (C) 2007 Mathieu Blondel"
- no_databases true
+class EdictFileBase < Base
STRATEGIES_DESC = {
"define" => "Results match with the word exactly.",
@@ -45,7 +39,6 @@
HAVE_EGREP = (File.which("egrep") and File.which("iconv") and
File.which("gunzip") and File.which("cat"))
-
class ConfigWidget < Base::ConfigWidget
def initialize(*arg)
super(*arg)
@@ -183,13 +176,13 @@
wesc = escape_string(word)
if word.latin?
- regexp = "\/#{wesc}\/"
+ regexp = "/#{wesc}/"
elsif word.kana?
- regexp = "^#{wesc} |\[#{wesc}\]"
+ regexp = "^#{wesc} |\\[#{wesc}\\]"
elsif word.japanese?
regexp = "^#{wesc} "
else
- regexp = "^#{wesc}|\[#{wesc}\]|\/#{wesc}\/"
+ regexp = "^#{wesc}|\\[#{wesc}\\]|/#{wesc}/"
end
db = File.basename(@hash[:filename])
@@ -198,7 +191,7 @@
match_with_regexp(regexp).map do |line|
defi = Definition.new
defi.word = word
- defi.body = line
+ defi.body = line.strip
defi.database = db
defi.description = db_capitalize
defi
@@ -232,7 +225,7 @@
def match_word(db, word)
arr = []
- match_suffix(db, word).each do |line|
+ match_substring(db, word).each do |line|
get_fields(line).each do |field|
field.split(" ").each do |w|
if w == word
@@ -249,13 +242,13 @@
def match_prefix(db, word)
wesc = escape_string(word)
if word.latin?
- regexp = "\/#{wesc}[^\/]*\/"
+ regexp = "/#{wesc}"
elsif word.kana?
- regexp = "^#{wesc}|\[#{wesc}[^\]]*\]"
+ regexp = "^#{wesc}| \\[#{wesc}"
elsif word.japanese?
regexp = "^#{wesc}"
else
- regexp = "^#{wesc}|\[#{wesc}[^\]]*\]|\/#{wesc}[^\/]*\/"
+ regexp = "^#{wesc}|\\[#{wesc}|/#{wesc}"
end
match_with_regexp(regexp)
@@ -264,13 +257,13 @@
def match_suffix(db, word)
wesc = escape_string(word)
if word.latin?
- regexp = "\/[^\/]*#{wesc}\/"
+ regexp = "#{wesc}/"
elsif word.kana?
- regexp = "^[^\[]*#{wesc} |\[[^\]]*#{wesc}\]"
+ regexp = "#{wesc} \\[|#{wesc}\\]"
elsif word.japanese?
- regexp = "^[^\[]*#{wesc} "
+ regexp = "#{wesc} \\["
else
- regexp = "^[^\[]*#{wesc} |\[[^\]]*#{wesc}\]|\/[^\/]*#{wesc}\/"
+ regexp = "#{wesc} \\[|#{wesc}\\]|#{wesc}/"
end
match_with_regexp(regexp)
@@ -321,67 +314,74 @@
Regexp.escape(str).sub('"', "\\\"")
end
-end # class EdictFile
+end # class EdictFileBase
-if EdictFile::HAVE_EGREP
- # Using egrep. This is significantly faster!
- class EdictFile
- def initialize(*args)
- super(*args)
- edict_file_open.close # Tries to open file to ensure it exists
- end
- private
+# Using egrep. This is significantly faster!
+class EdictFileEgrep < EdictFileBase
+ def initialize(*args)
+ super(*args)
+ edict_file_open.close # Tries to open file to ensure it exists
+ end
- def match_with_regexp(regexp)
- cmd = get_command(regexp)
- IO.popen(cmd).readlines
- end
+ private
- def get_command(regexp)
- cmd = []
+ def match_with_regexp(regexp)
+ cmd = get_command(regexp)
+ IO.popen(cmd).readlines
+ end
- cmd << "cat #{ hash[:filename]}"
+ def get_command(regexp)
+ cmd = []
- if @hash[:filename] =~ /.gz$/
- cmd << "gunzip -c"
- end
+ cmd << "cat #{ hash[:filename]}"
- if @hash[:encoding] and @hash[:encoding] != "UTF-8"
- cmd << "iconv -f #{ hash[:encoding]} -t UTF-8"
- end
+ if @hash[:filename] =~ /.gz$/
+ cmd << "gunzip -c"
+ end
- cmd << "egrep \"#{regexp}\""
-
- cmd.join(" | ")
+ if @hash[:encoding] and @hash[:encoding] != "UTF-8"
+ cmd << "iconv -f #{ hash[:encoding]} -t UTF-8"
end
+ cmd << "egrep \"#{regexp}\""
+
+ cmd.join(" | ")
end
-else
- # Pure Ruby
- class EdictFile
- def initialize(*args)
- super(*args)
- if @hash and @hash[:encoding] != "UTF-8"
- # FIXME: Find a way to look up words in EUC-JP with reasonable
- # performance...
- raise Source::SourceError,
- _("Encoding not supported.")
- end
+end
+
+# Pure Ruby
+class EdictFileRuby < EdictFileBase
+ def initialize(*args)
+ super(*args)
+ if @hash and @hash[:encoding] != "UTF-8"
+ # FIXME: Find a way to look up words in EUC-JP with reasonable
+ # performance...
+ raise Source::SourceError,
+ _("Encoding not supported.")
end
+ end
- private
+ private
- def match_with_regexp(regexp)
- edict_file_open do |file|
- file.grep(Regexp.new(regexp))
- end
+ def match_with_regexp(regexp)
+ edict_file_open do |file|
+ file.grep(Regexp.new(regexp))
end
-
end
+end
-end # if EdictFile::HAVE_EGREP
+class EdictFile < (EdictFileBase::HAVE_EGREP ? EdictFileEgrep : EdictFileRuby)
+ authors ["Mathieu Blondel"]
+ title _("EDICT file")
+ description _("Look up words in an EDICT file.")
+ license Fantasdic::GPL
+ copyright "Copyright (C) 2007 Mathieu Blondel"
+ no_databases true
+end
end
end
+
+Fantasdic::Source::Base.register_source(Fantasdic::Source::EdictFile)
Added: trunk/test/data/edict.eucjp
==============================================================================
--- (empty file)
+++ trunk/test/data/edict.eucjp Fri Aug 22 18:22:17 2008
@@ -0,0 +1,20 @@
+ó [¤¦¤·¤ç] /(n) tooth decay/
+󤢤´] /(n) chin/jaw/
+󤬤ó(n) niche or alcove for an image/
+ó [¤¬¤ó¦] /(n) Buddhist altar light/
+ó¹Ã[¤«¤á¤³¤¦] /(oK) (n) tortoise shell/
+ó»Ò[¤«¤á¤³] /(oK) (n) (1) young turtle (tortoise)/(2) turtle (tortoise) shell/
+ó¼ê¤«¤á¤Æ /(oK) (n) barnacle/
+ó [¤¤«¤ó(oK) (n) pattern/example/model/paragon/mirror/
+ó [¤¤³¤¦] /(oK) (n) tortoise shell/
+ó [¤¤Ã³¤¦] /(oK) (n) tortoise shell/
+ó [¤«¤á¤·] /(oK) (n) (uk) shield bug/stink bug/
+ó [¤«¤á¤é/(oK) (n) tortoise-shell divination/
+ó [¤¤Ü¯] /(oK) (n) tortoise-shell divination/
+ó [¤¤ì] /(oK) (n) crack/crevice/fissure/chap/
+ô¤Þ] /(oK) (n) (uk) yew plum pine (Podocarpus macrophyllus)/
+ô [¤Ïë¤ë/(adv) from afar/over a great distance/all the way/
+ô [¤Ïë] /(iK) (adj-na,adv,n) far/far away/distant/remote/far off/
+ô¤Ë[¤Ïë¤Ë /(iK) (adv) far off/in the distance/long ago/far/by far/far and away/
+ô [¤Ïë¤ë/(adv) from afar/over a great distance/all the way/
+ô¤ê] /(adj-t,adv-to) cold/
Added: trunk/test/data/edict.eucjp.gz
==============================================================================
Binary file. No diff available.
Added: trunk/test/data/edict.utf8
==============================================================================
--- (empty file)
+++ trunk/test/data/edict.utf8 Fri Aug 22 18:22:17 2008
@@ -0,0 +1,20 @@
+éè [ãããã] /(n) tooth decay/
+é [ãã] /(n) chin/jaw/
+é [ãã] /(n) niche or alcove for an image/
+éç [ãããã] /(n) Buddhist altar light/
+éãç [ããããã] /(oK) (n) tortoise shell/
+éãå [ãããã] /(oK) (n) (1) young turtle (tortoise)/(2) turtle (tortoise) shell/
+éãæ [ãããã] /(oK) (n) barnacle/
+éé [ããã] /(oK) (n) pattern/example/model/paragon/mirror/
+éç [ããã] /(oK) (n) tortoise shell/
+éç [ãããã] /(oK) (n) tortoise shell/
+éè [ãããã] /(oK) (n) (uk) shield bug/stink bug/
+éå [ãããã] /(oK) (n) tortoise-shell divination/
+éå [ããã] /(oK) (n) tortoise-shell divination/
+éè [ããã] /(oK) (n) crack/crevice/fissure/chap/
+æ [ãã] /(oK) (n) (uk) yew plum pine (Podocarpus macrophyllus)/
+éã [ãããã] /(adv) from afar/over a great distance/all the way/
+éã [ããã] /(iK) (adj-na,adv,n) far/far away/distant/remote/far off/
+éãã [ãããã] /(iK) (adv) far off/in the distance/long ago/far/by far/far and away/
+éé [ãããã] /(adv) from afar/over a great distance/all the way/
+å [ãã] /(adj-t,adv-to) cold/
Added: trunk/test/data/edict.utf8.gz
==============================================================================
Binary file. No diff available.
Added: trunk/test/test_edict_file.rb
==============================================================================
--- (empty file)
+++ trunk/test/test_edict_file.rb Fri Aug 22 18:22:17 2008
@@ -0,0 +1,157 @@
+# Fantasdic
+# Copyright (C) 2008 Mathieu Blondel
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+#Âwith this program; if not, write to the Free Software Foundation, Inc.,
+#Â51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+test_dir = File.expand_path(File.dirname(__FILE__))
+top_dir = File.expand_path(File.join(test_dir, ".."))
+lib_dir = File.expand_path(File.join(top_dir, "lib"))
+$test_data_dir = File.expand_path(File.join(test_dir, "data"))
+$LOAD_PATH.unshift(lib_dir)
+
+require "test/unit"
+require "fantasdic"
+require "fantasdic/sources/edict_file"
+
+$KCODE = "u"
+
+class TestEdictFileSource < Test::Unit::TestCase
+ include Fantasdic::Source
+
+ private
+
+ def test_define(source)
+ defs = source.define("*", "éç")
+ assert_equal(defs.length, 2)
+
+ assert_equal(defs[0].word, "éç")
+ assert_equal(defs[0].body, "éç [ããã] /(oK) (n) tortoise shell/")
+
+ assert_equal(defs[1].word, "éç")
+ assert_equal(defs[1].body, "éç [ãããã] /(oK) (n) tortoise shell/")
+
+ defs = source.define("*", "ããã")
+ assert_equal(defs.length, 1)
+
+ assert_equal(defs[0].word, "ããã")
+ assert_equal(defs[0].body, "éç [ããã] /(oK) (n) tortoise shell/")
+
+ defs = source.define("*", "tortoise")
+ assert_equal(defs.length, 0)
+ end
+
+ def test_match_prefix(source)
+ matches = source.match("*", "prefix", "é")
+ key = matches.keys.first
+ assert_equal(matches,
+ {key=>["éã", "éã", "éãã", "éé"]})
+
+ matches = source.match("*", "prefix", "ãã")
+ assert_equal(matches,
+ {key=>["éãç", "éãå", "éãæ", "éè", "éå"]})
+
+ matches = source.match("*", "prefix", "(adv)")
+ assert_equal(matches,
+ {key=>["(adv) from afar/over a great distance/all the way",
+ "(adv) from afar/over a great distance/all the way"]})
+
+ end
+
+ def test_match_suffix(source)
+ matches = source.match("*", "suffix", "ç")
+ key = matches.keys.first
+ assert_equal(matches,
+ {key=>["éãç", "éç", "éç"]})
+
+ matches = source.match("*", "suffix", "ãã")
+ assert_equal(matches,
+ {key=>["éãç", "éç", "éç"]})
+
+ matches = source.match("*", "suffix", "tion")
+ assert_equal(matches,
+ {key=>["(oK) (n) tortoise-shell divination",
+ "(oK) (n) tortoise-shell divination"]})
+ end
+
+ def test_match_word(source)
+ matches = source.match("*", "word", "éç")
+ key = matches.keys.first
+ assert_equal(matches,
+ {key=>["éç", "éç"]})
+
+ matches = source.match("*", "word", "ããã")
+ assert_equal(matches,
+ {key=>["éç"]})
+
+ matches = source.match("*", "word", "tortoise")
+ assert_equal(matches,
+ {key=>["(oK) (n) tortoise shell",
+ "(oK) (n) tortoise shell",
+ "(oK) (n) tortoise shell"]})
+ end
+
+ def test_match_substring(source)
+ matches = source.match("*", "substring", "é")
+ key = matches.keys.first
+ assert_equal(matches,
+ {key=>["éãç", "éãå", "éãæ", "éé",
+ "éç", "éç", "éè", "éå", "éå", "éè"]})
+
+ matches = source.match("*", "substring", "ããã")
+ assert_equal(matches,
+ {key=>["éãç", "éãå"]})
+
+ matches = source.match("*", "substring", "-shell")
+ assert_equal(matches,
+ {key=>["(oK) (n) tortoise-shell divination",
+ "(oK) (n) tortoise-shell divination"]})
+ end
+
+ public
+
+ utf8 = {:filename => File.join($test_data_dir, "edict.utf8"),
+ :encoding => "UTF-8"}
+ utf8gz = {:filename => File.join($test_data_dir, "edict.utf8.gz"),
+ :encoding => "UTF-8"}
+ eucjp = {:filename => File.join($test_data_dir, "edict.eucjp"),
+ :encoding => "EUC-JP"}
+ eucjpgz = {:filename => File.join($test_data_dir, "edict.eucjp.gz"),
+ :encoding => "EUC-JP"}
+
+ [EdictFileRuby, EdictFileEgrep].each do |klass|
+ [utf8, utf8gz, eucjp, eucjpgz].each do |hash|
+ encoding = hash[:encoding].gsub("-", "").downcase
+
+ # EUC-JP is not supported by EdictFileRuby implementation
+ next if klass == EdictFileRuby and encoding == "eucjp"
+
+ klass_short = klass.to_s.split("::").last.downcase
+ gz = hash[:filename] =~ /gz$/ ? "gz" : "nogz"
+
+ method = "test_#{klass_short}_#{encoding}_#{gz}_define"
+ define_method(method) do
+ send("test_define", klass.new(hash))
+ end
+
+ ["prefix", "suffix", "word", "substring"].each do |match|
+ method = "test_#{klass_short}_#{encoding}_#{gz}_#{match}"
+ define_method(method) do
+ send("test_match_#{match}", klass.new(hash))
+ end
+ end
+ end
+ end
+
+end
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]