Ruby 1.9 で migemo
うっかり Ruby を 1.9 にバージョンアップしたら migemo が動かなくなったので、修正してみた。正しいかどうかは知らん。
追記 : またうっかり Ruby を 1.9.1-p430 にバージョンアップしてしまい、migemo が動かなくなったので再修正した。
Github に登録してみた。 https://github.com/yshl/migemo-for-Ruby-1.9
以下パッチ。
migemo が利用しているローマ字かな変換ライブラリ romkan の修正部分。
diff -ur ruby-romkan-0.4/romkan.rb ruby-romkan-0.4-1.9/romkan.rb --- ruby-romkan-0.4/romkan.rb 2002-02-12 10:45:16.000000000 +0900 +++ ruby-romkan-0.4-1.9/romkan.rb 2009-08-14 05:09:57.000000000 +0900 @@ -1,3 +1,4 @@ +# -*- encoding:euc-jp -*- # # Ruby/Romkan - a Romaji <-> Kana conversion library for Ruby. #
migemo の修正部分。
Ruby-1.9.1-p243 用
diff -ur migemo-0.40/genchars.sh migemo-0.40-1.9/genchars.sh --- migemo-0.40/genchars.sh 2001-08-13 18:30:48.000000000 +0900 +++ migemo-0.40-1.9/genchars.sh 2009-08-14 05:03:43.000000000 +0900 @@ -1,6 +1,6 @@ #! /bin/sh -ruby -rromkan -nle 'head = split[0]; if /^\w+$/ =~ head then puts head else roma = head.to_roma; puts roma, roma.to_kunrei end' migemo-dict |uniq> tmp.ascii.words +ruby -rromkan -nle 'head = $_.split[0]; if /^\w+$/ =~ head then puts head else roma = head.to_roma; puts roma, roma.to_kunrei end' migemo-dict |uniq> tmp.ascii.words # Get the top 500 frequent ngrams. for i in 1 2 3 4 5 6 7 8; do diff -ur migemo-0.40/migemo migemo-0.40-1.9/migemo --- migemo-0.40/migemo 2003-05-27 12:01:10.000000000 +0900 +++ migemo-0.40-1.9/migemo 2009-08-14 05:03:43.000000000 +0900 @@ -10,7 +10,7 @@ # the GNU General Public License version 2. # -$KCODE = "e" +#$KCODE = "e" require 'migemo' require 'getoptlong' diff -ur migemo-0.40/migemo-cache.rb migemo-0.40-1.9/migemo-cache.rb --- migemo-0.40/migemo-cache.rb 2001-07-15 02:38:56.000000000 +0900 +++ migemo-0.40-1.9/migemo-cache.rb 2009-08-14 05:03:43.000000000 +0900 @@ -1,5 +1,5 @@ require 'migemo' -$KCODE="e" +#$KCODE="e" raise if ARGV[0] == nil dict = ARGV[0] static_dict = MigemoStaticDict.new(dict) @@ -18,10 +18,13 @@ migemo = Migemo.new(static_dict, pattern) migemo.optimization = 3 data = Marshal.dump(migemo.regex_tree) - output = [pattern.length].pack("N") + pattern + - [data.length].pack("N") + data +# output = [pattern.length].pack("N") + pattern + +# [data.length].pack("N") + data + output = [pattern.bytesize].pack("N") + pattern.dup.force_encoding("ASCII-8BIT") + + [data.bytesize].pack("N") + data cache.print output index.print [idx].pack("N") - idx += output.length +# idx += output.length + idx += output.bytesize end diff -ur migemo-0.40/migemo-convert.rb migemo-0.40-1.9/migemo-convert.rb --- migemo-0.40/migemo-convert.rb 2003-05-26 15:55:22.000000000 +0900 +++ migemo-0.40-1.9/migemo-convert.rb 2009-08-14 05:03:43.000000000 +0900 @@ -1,3 +1,4 @@ +# -*- encoding:euc-jp -*- # # Ruby/Migemo - a library for Japanese incremental search. # @@ -14,7 +15,7 @@ # # Convert a SKK's dictionary into Migemo's. # -$KCODE= "e" +#$KCODE= "e" require 'romkan' HIRAGANA = "[ぁ-んー〜]" diff -ur migemo-0.40/migemo-dict.rb migemo-0.40-1.9/migemo-dict.rb --- migemo-0.40/migemo-dict.rb 2002-10-22 14:38:14.000000000 +0900 +++ migemo-0.40-1.9/migemo-dict.rb 2009-08-14 05:03:43.000000000 +0900 @@ -122,8 +122,8 @@ def lookup (pattern) raise if pattern == nil pattern = pattern.downcase - idx = @index.bsearch_first do |idx| - key, data = decompose(idx) + idx = @index.bsearch_first do |idx1| + key, data = decompose(idx1) key <=> pattern end if idx diff -ur migemo-0.40/migemo-index.rb migemo-0.40-1.9/migemo-index.rb --- migemo-0.40/migemo-index.rb 2003-05-26 15:45:53.000000000 +0900 +++ migemo-0.40-1.9/migemo-index.rb 2009-08-14 05:03:43.000000000 +0900 @@ -19,5 +19,6 @@ unless line =~ /^;/ print [offset].pack("N") end - offset += line.length +# offset += line.length + offset += line.bytesize end diff -ur migemo-0.40/migemo.rb.in migemo-0.40-1.9/migemo.rb.in --- migemo-0.40/migemo.rb.in 2003-05-28 21:00:52.000000000 +0900 +++ migemo-0.40-1.9/migemo.rb.in 2009-08-14 05:03:43.000000000 +0900 @@ -1,3 +1,4 @@ +# -*- encoding:euc-jp -*- # # Ruby/Migemo - a library for Japanese incremental search. # @@ -14,7 +15,7 @@ require 'migemo-dict' require 'migemo-regex' require 'romkan' -require 'jcode' +#require 'jcode' include MigemoRegex class String @@ -177,7 +178,7 @@ expand_kanas.each do |x| compiler.push(x) compiler.push(x.to_katakana) - expand_words(@static_dict, x).each do |x| compiler.push(x) end + expand_words(@static_dict, x).each do |y| compiler.push(y) end end expand_words(@static_dict, @pattern).each do |x| compiler.push(x) end compiler.uniq @@ -188,7 +189,7 @@ def lookup_user_dict compiler = RegexCompiler.new expand_kanas.each do |x| - expand_words(@user_dict, x).each do |x| compiler.push(x) end + expand_words(@user_dict, x).each do |y| compiler.push(y) end end expand_words(@user_dict, @pattern).each do |x| compiler.push(x) end compiler.uniq diff -ur migemo-0.40/tests/Makefile.in migemo-0.40-1.9/tests/Makefile.in --- migemo-0.40/tests/Makefile.in 2003-05-29 17:09:03.000000000 +0900 +++ migemo-0.40-1.9/tests/Makefile.in 2009-08-14 05:03:43.000000000 +0900 @@ -203,7 +203,7 @@ test-dict.cache: test-dict test-dict.idx ../migemo-cache.rb ruby -rromkan -ne 'puts $$1.to_roma if /^(.+?) /' test-dict |\ while read line; do\ - echo $$line | ruby -ne 'chomp!;1.upto($$_.length) do |x| puts $$_[0,x] end';\ + echo $$line | ruby -ne '$$_.chomp!;1.upto($$_.length) do |x| puts $$_[0,x] end';\ done | ruby -I.. ../migemo-cache.rb test-dict clean-local:
Ruby-1.9.1-p430 用
diff -ur migemo-0.40/genchars.sh migemo-0.40-1.9.1/genchars.sh --- migemo-0.40/genchars.sh 2001-08-13 18:30:48.000000000 +0900 +++ migemo-0.40-1.9.1/genchars.sh 2010-09-24 00:32:26.000000000 +0900 @@ -1,6 +1,6 @@ #! /bin/sh -ruby -rromkan -nle 'head = split[0]; if /^\w+$/ =~ head then puts head else roma = head.to_roma; puts roma, roma.to_kunrei end' migemo-dict |uniq> tmp.ascii.words +ruby -rromkan -nle 'head = $_.split[0]; if /^\w+$/ =~ head then puts head else roma = head.to_roma; puts roma, roma.to_kunrei end' migemo-dict |uniq> tmp.ascii.words # Get the top 500 frequent ngrams. for i in 1 2 3 4 5 6 7 8; do diff -ur migemo-0.40/migemo migemo-0.40-1.9.1/migemo --- migemo-0.40/migemo 2003-05-27 12:01:10.000000000 +0900 +++ migemo-0.40-1.9.1/migemo 2010-09-24 00:32:26.000000000 +0900 @@ -10,7 +10,6 @@ # the GNU General Public License version 2. # -$KCODE = "e" require 'migemo' require 'getoptlong' diff -ur migemo-0.40/migemo-cache.rb migemo-0.40-1.9.1/migemo-cache.rb --- migemo-0.40/migemo-cache.rb 2001-07-15 02:38:56.000000000 +0900 +++ migemo-0.40-1.9.1/migemo-cache.rb 2010-09-24 00:32:26.000000000 +0900 @@ -1,5 +1,4 @@ require 'migemo' -$KCODE="e" raise if ARGV[0] == nil dict = ARGV[0] static_dict = MigemoStaticDict.new(dict) @@ -18,10 +17,10 @@ migemo = Migemo.new(static_dict, pattern) migemo.optimization = 3 data = Marshal.dump(migemo.regex_tree) - output = [pattern.length].pack("N") + pattern + - [data.length].pack("N") + data + output = [pattern.bytesize].pack("N") + pattern.dup.force_encoding("ASCII-8BIT") + + [data.bytesize].pack("N") + data cache.print output index.print [idx].pack("N") - idx += output.length + idx += output.bytesize end diff -ur migemo-0.40/migemo-convert.rb migemo-0.40-1.9.1/migemo-convert.rb --- migemo-0.40/migemo-convert.rb 2003-05-26 15:55:22.000000000 +0900 +++ migemo-0.40-1.9.1/migemo-convert.rb 2010-09-24 00:32:26.000000000 +0900 @@ -1,3 +1,4 @@ +# -*- encoding:euc-jp -*- # # Ruby/Migemo - a library for Japanese incremental search. # @@ -14,7 +15,6 @@ # # Convert a SKK's dictionary into Migemo's. # -$KCODE= "e" require 'romkan' HIRAGANA = "[ぁ-んー〜]" diff -ur migemo-0.40/migemo-dict.rb migemo-0.40-1.9.1/migemo-dict.rb --- migemo-0.40/migemo-dict.rb 2002-10-22 14:38:14.000000000 +0900 +++ migemo-0.40-1.9.1/migemo-dict.rb 2010-09-24 00:32:26.000000000 +0900 @@ -122,8 +122,8 @@ def lookup (pattern) raise if pattern == nil pattern = pattern.downcase - idx = @index.bsearch_first do |idx| - key, data = decompose(idx) + idx = @index.bsearch_first do |idx1| + key, data = decompose(idx1) key <=> pattern end if idx diff -ur migemo-0.40/migemo-index.rb migemo-0.40-1.9.1/migemo-index.rb --- migemo-0.40/migemo-index.rb 2003-05-26 15:45:53.000000000 +0900 +++ migemo-0.40-1.9.1/migemo-index.rb 2010-09-24 00:32:26.000000000 +0900 @@ -19,5 +19,5 @@ unless line =~ /^;/ print [offset].pack("N") end - offset += line.length + offset += line.bytesize end diff -ur migemo-0.40/migemo.rb.in migemo-0.40-1.9.1/migemo.rb.in --- migemo-0.40/migemo.rb.in 2003-05-28 21:00:52.000000000 +0900 +++ migemo-0.40-1.9.1/migemo.rb.in 2010-09-24 00:33:04.000000000 +0900 @@ -1,3 +1,4 @@ +# -*- encoding:euc-jp -*- # # Ruby/Migemo - a library for Japanese incremental search. # @@ -14,7 +15,6 @@ require 'migemo-dict' require 'migemo-regex' require 'romkan' -require 'jcode' include MigemoRegex class String @@ -24,7 +24,7 @@ end def quotemeta - self.gsub(/([^ \w])/, '\\\\\\1') + self.gsub(/([[:punct:]])/, '\\\\\\1') end def first @@ -177,7 +177,7 @@ expand_kanas.each do |x| compiler.push(x) compiler.push(x.to_katakana) - expand_words(@static_dict, x).each do |x| compiler.push(x) end + expand_words(@static_dict, x).each do |y| compiler.push(y) end end expand_words(@static_dict, @pattern).each do |x| compiler.push(x) end compiler.uniq @@ -188,7 +188,7 @@ def lookup_user_dict compiler = RegexCompiler.new expand_kanas.each do |x| - expand_words(@user_dict, x).each do |x| compiler.push(x) end + expand_words(@user_dict, x).each do |y| compiler.push(y) end end expand_words(@user_dict, @pattern).each do |x| compiler.push(x) end compiler.uniq diff -ur migemo-0.40/tests/Makefile.in migemo-0.40-1.9.1/tests/Makefile.in --- migemo-0.40/tests/Makefile.in 2003-05-29 17:09:03.000000000 +0900 +++ migemo-0.40-1.9.1/tests/Makefile.in 2010-09-24 00:32:26.000000000 +0900 @@ -203,7 +203,7 @@ test-dict.cache: test-dict test-dict.idx ../migemo-cache.rb ruby -rromkan -ne 'puts $$1.to_roma if /^(.+?) /' test-dict |\ while read line; do\ - echo $$line | ruby -ne 'chomp!;1.upto($$_.length) do |x| puts $$_[0,x] end';\ + echo $$line | ruby -ne '$$_.chomp!;1.upto($$_.length) do |x| puts $$_[0,x] end';\ done | ruby -I.. ../migemo-cache.rb test-dict clean-local: