Ruby 1.9 で migemo

うっかり Ruby を 1.9 にバージョンアップしたら migemo が動かなくなったので、修正してみた。正しいかどうかは知らん。
追記 : またうっかり Ruby を 1.9.1-p430 にバージョンアップしてしまい、migemo が動かなくなったので再修正した。
Github に登録してみた。 https://github.com/yshl/migemo-for-Ruby-1.9
以下パッチ。
migemo が利用しているローマ字かな変換ライブラリ romkan の修正部分。

diff -ur ruby-romkan-0.4/romkan.rb ruby-romkan-0.4-1.9/romkan.rb
--- ruby-romkan-0.4/romkan.rb	2002-02-12 10:45:16.000000000 +0900
+++ ruby-romkan-0.4-1.9/romkan.rb	2009-08-14 05:09:57.000000000 +0900
@@ -1,3 +1,4 @@
+# -*- encoding:euc-jp -*-
 #
 # Ruby/Romkan - a Romaji <-> Kana conversion library for Ruby.
 #

migemo の修正部分。
Ruby-1.9.1-p243 用

diff -ur migemo-0.40/genchars.sh migemo-0.40-1.9/genchars.sh
--- migemo-0.40/genchars.sh	2001-08-13 18:30:48.000000000 +0900
+++ migemo-0.40-1.9/genchars.sh	2009-08-14 05:03:43.000000000 +0900
@@ -1,6 +1,6 @@
 #! /bin/sh
 
-ruby -rromkan -nle 'head = split[0]; if /^\w+$/ =~ head then puts head else roma = head.to_roma; puts roma, roma.to_kunrei end' migemo-dict |uniq> tmp.ascii.words
+ruby -rromkan -nle 'head = $_.split[0]; if /^\w+$/ =~ head then puts head else roma = head.to_roma; puts roma, roma.to_kunrei end' migemo-dict |uniq> tmp.ascii.words
 
 # Get the top 500 frequent ngrams.
 for i in 1 2 3 4 5 6 7 8; do
diff -ur migemo-0.40/migemo migemo-0.40-1.9/migemo
--- migemo-0.40/migemo	2003-05-27 12:01:10.000000000 +0900
+++ migemo-0.40-1.9/migemo	2009-08-14 05:03:43.000000000 +0900
@@ -10,7 +10,7 @@
 # the GNU General Public License version 2.
 #
 
-$KCODE = "e"
+#$KCODE = "e"
 
 require 'migemo'
 require 'getoptlong'
diff -ur migemo-0.40/migemo-cache.rb migemo-0.40-1.9/migemo-cache.rb
--- migemo-0.40/migemo-cache.rb	2001-07-15 02:38:56.000000000 +0900
+++ migemo-0.40-1.9/migemo-cache.rb	2009-08-14 05:03:43.000000000 +0900
@@ -1,5 +1,5 @@
 require 'migemo'
-$KCODE="e"
+#$KCODE="e"
 raise if ARGV[0] == nil
 dict = ARGV[0]
 static_dict = MigemoStaticDict.new(dict)
@@ -18,10 +18,13 @@
   migemo = Migemo.new(static_dict, pattern)
   migemo.optimization = 3
   data = Marshal.dump(migemo.regex_tree)
-  output = [pattern.length].pack("N") + pattern + 
-    [data.length].pack("N") + data
+#  output = [pattern.length].pack("N") + pattern + 
+#    [data.length].pack("N") + data
+  output = [pattern.bytesize].pack("N") + pattern.dup.force_encoding("ASCII-8BIT") + 
+    [data.bytesize].pack("N") + data
   cache.print output
   index.print [idx].pack("N")
-  idx += output.length
+#  idx += output.length
+  idx += output.bytesize
 end
 
diff -ur migemo-0.40/migemo-convert.rb migemo-0.40-1.9/migemo-convert.rb
--- migemo-0.40/migemo-convert.rb	2003-05-26 15:55:22.000000000 +0900
+++ migemo-0.40-1.9/migemo-convert.rb	2009-08-14 05:03:43.000000000 +0900
@@ -1,3 +1,4 @@
+# -*- encoding:euc-jp -*-
 #
 # Ruby/Migemo - a library for Japanese incremental search.
 #
@@ -14,7 +15,7 @@
 #
 # Convert a SKK's dictionary into Migemo's.
 #
-$KCODE= "e"
+#$KCODE= "e"
 require 'romkan'
 
 HIRAGANA = "[ぁ-んー〜]"
diff -ur migemo-0.40/migemo-dict.rb migemo-0.40-1.9/migemo-dict.rb
--- migemo-0.40/migemo-dict.rb	2002-10-22 14:38:14.000000000 +0900
+++ migemo-0.40-1.9/migemo-dict.rb	2009-08-14 05:03:43.000000000 +0900
@@ -122,8 +122,8 @@
   def lookup (pattern)
     raise if pattern == nil
     pattern = pattern.downcase
-    idx = @index.bsearch_first do |idx| 
-      key, data = decompose(idx)
+    idx = @index.bsearch_first do |idx1| 
+      key, data = decompose(idx1)
       key <=> pattern 
     end
     if idx
diff -ur migemo-0.40/migemo-index.rb migemo-0.40-1.9/migemo-index.rb
--- migemo-0.40/migemo-index.rb	2003-05-26 15:45:53.000000000 +0900
+++ migemo-0.40-1.9/migemo-index.rb	2009-08-14 05:03:43.000000000 +0900
@@ -19,5 +19,6 @@
   unless line =~ /^;/
     print [offset].pack("N")
   end
-  offset += line.length
+#  offset += line.length
+  offset += line.bytesize
 end
diff -ur migemo-0.40/migemo.rb.in migemo-0.40-1.9/migemo.rb.in
--- migemo-0.40/migemo.rb.in	2003-05-28 21:00:52.000000000 +0900
+++ migemo-0.40-1.9/migemo.rb.in	2009-08-14 05:03:43.000000000 +0900
@@ -1,3 +1,4 @@
+# -*- encoding:euc-jp -*-
 #
 # Ruby/Migemo - a library for Japanese incremental search.
 #
@@ -14,7 +15,7 @@
 require 'migemo-dict'
 require 'migemo-regex'
 require 'romkan'
-require 'jcode'
+#require 'jcode'
 include MigemoRegex
 
 class String
@@ -177,7 +178,7 @@
     expand_kanas.each do |x| 
       compiler.push(x)
       compiler.push(x.to_katakana)
-      expand_words(@static_dict, x).each do |x| compiler.push(x) end
+      expand_words(@static_dict, x).each do |y| compiler.push(y) end
     end
     expand_words(@static_dict, @pattern).each do |x| compiler.push(x) end
     compiler.uniq
@@ -188,7 +189,7 @@
   def lookup_user_dict
     compiler = RegexCompiler.new
     expand_kanas.each do |x| 
-      expand_words(@user_dict, x).each do |x| compiler.push(x) end
+      expand_words(@user_dict, x).each do |y| compiler.push(y) end
     end
     expand_words(@user_dict, @pattern).each do |x| compiler.push(x) end
     compiler.uniq
diff -ur migemo-0.40/tests/Makefile.in migemo-0.40-1.9/tests/Makefile.in
--- migemo-0.40/tests/Makefile.in	2003-05-29 17:09:03.000000000 +0900
+++ migemo-0.40-1.9/tests/Makefile.in	2009-08-14 05:03:43.000000000 +0900
@@ -203,7 +203,7 @@
 test-dict.cache: test-dict test-dict.idx ../migemo-cache.rb
 	ruby -rromkan -ne 'puts $$1.to_roma if /^(.+?)	/' test-dict |\
 	while read line; do\
-	    echo $$line | ruby -ne 'chomp!;1.upto($$_.length) do |x| puts $$_[0,x] end';\
+	    echo $$line | ruby -ne '$$_.chomp!;1.upto($$_.length) do |x| puts $$_[0,x] end';\
 	done | ruby -I.. ../migemo-cache.rb test-dict
 
 clean-local:

Ruby-1.9.1-p430 用

diff -ur migemo-0.40/genchars.sh migemo-0.40-1.9.1/genchars.sh
--- migemo-0.40/genchars.sh	2001-08-13 18:30:48.000000000 +0900
+++ migemo-0.40-1.9.1/genchars.sh	2010-09-24 00:32:26.000000000 +0900
@@ -1,6 +1,6 @@
 #! /bin/sh
 
-ruby -rromkan -nle 'head = split[0]; if /^\w+$/ =~ head then puts head else roma = head.to_roma; puts roma, roma.to_kunrei end' migemo-dict |uniq> tmp.ascii.words
+ruby -rromkan -nle 'head = $_.split[0]; if /^\w+$/ =~ head then puts head else roma = head.to_roma; puts roma, roma.to_kunrei end' migemo-dict |uniq> tmp.ascii.words
 
 # Get the top 500 frequent ngrams.
 for i in 1 2 3 4 5 6 7 8; do
diff -ur migemo-0.40/migemo migemo-0.40-1.9.1/migemo
--- migemo-0.40/migemo	2003-05-27 12:01:10.000000000 +0900
+++ migemo-0.40-1.9.1/migemo	2010-09-24 00:32:26.000000000 +0900
@@ -10,7 +10,6 @@
 # the GNU General Public License version 2.
 #
 
-$KCODE = "e"
 
 require 'migemo'
 require 'getoptlong'
diff -ur migemo-0.40/migemo-cache.rb migemo-0.40-1.9.1/migemo-cache.rb
--- migemo-0.40/migemo-cache.rb	2001-07-15 02:38:56.000000000 +0900
+++ migemo-0.40-1.9.1/migemo-cache.rb	2010-09-24 00:32:26.000000000 +0900
@@ -1,5 +1,4 @@
 require 'migemo'
-$KCODE="e"
 raise if ARGV[0] == nil
 dict = ARGV[0]
 static_dict = MigemoStaticDict.new(dict)
@@ -18,10 +17,10 @@
   migemo = Migemo.new(static_dict, pattern)
   migemo.optimization = 3
   data = Marshal.dump(migemo.regex_tree)
-  output = [pattern.length].pack("N") + pattern + 
-    [data.length].pack("N") + data
+  output = [pattern.bytesize].pack("N") + pattern.dup.force_encoding("ASCII-8BIT") + 
+    [data.bytesize].pack("N") + data
   cache.print output
   index.print [idx].pack("N")
-  idx += output.length
+  idx += output.bytesize
 end
 
diff -ur migemo-0.40/migemo-convert.rb migemo-0.40-1.9.1/migemo-convert.rb
--- migemo-0.40/migemo-convert.rb	2003-05-26 15:55:22.000000000 +0900
+++ migemo-0.40-1.9.1/migemo-convert.rb	2010-09-24 00:32:26.000000000 +0900
@@ -1,3 +1,4 @@
+# -*- encoding:euc-jp -*-
 #
 # Ruby/Migemo - a library for Japanese incremental search.
 #
@@ -14,7 +15,6 @@
 #
 # Convert a SKK's dictionary into Migemo's.
 #
-$KCODE= "e"
 require 'romkan'
 
 HIRAGANA = "[ぁ-んー〜]"
diff -ur migemo-0.40/migemo-dict.rb migemo-0.40-1.9.1/migemo-dict.rb
--- migemo-0.40/migemo-dict.rb	2002-10-22 14:38:14.000000000 +0900
+++ migemo-0.40-1.9.1/migemo-dict.rb	2010-09-24 00:32:26.000000000 +0900
@@ -122,8 +122,8 @@
   def lookup (pattern)
     raise if pattern == nil
     pattern = pattern.downcase
-    idx = @index.bsearch_first do |idx| 
-      key, data = decompose(idx)
+    idx = @index.bsearch_first do |idx1| 
+      key, data = decompose(idx1)
       key <=> pattern 
     end
     if idx
diff -ur migemo-0.40/migemo-index.rb migemo-0.40-1.9.1/migemo-index.rb
--- migemo-0.40/migemo-index.rb	2003-05-26 15:45:53.000000000 +0900
+++ migemo-0.40-1.9.1/migemo-index.rb	2010-09-24 00:32:26.000000000 +0900
@@ -19,5 +19,5 @@
   unless line =~ /^;/
     print [offset].pack("N")
   end
-  offset += line.length
+  offset += line.bytesize
 end
diff -ur migemo-0.40/migemo.rb.in migemo-0.40-1.9.1/migemo.rb.in
--- migemo-0.40/migemo.rb.in	2003-05-28 21:00:52.000000000 +0900
+++ migemo-0.40-1.9.1/migemo.rb.in	2010-09-24 00:33:04.000000000 +0900
@@ -1,3 +1,4 @@
+# -*- encoding:euc-jp -*-
 #
 # Ruby/Migemo - a library for Japanese incremental search.
 #
@@ -14,7 +15,6 @@
 require 'migemo-dict'
 require 'migemo-regex'
 require 'romkan'
-require 'jcode'
 include MigemoRegex
 
 class String
@@ -24,7 +24,7 @@
   end
 
   def quotemeta
-    self.gsub(/([^ \w])/, '\\\\\\1')
+    self.gsub(/([[:punct:]])/, '\\\\\\1')
   end
 
   def first
@@ -177,7 +177,7 @@
     expand_kanas.each do |x| 
       compiler.push(x)
       compiler.push(x.to_katakana)
-      expand_words(@static_dict, x).each do |x| compiler.push(x) end
+      expand_words(@static_dict, x).each do |y| compiler.push(y) end
     end
     expand_words(@static_dict, @pattern).each do |x| compiler.push(x) end
     compiler.uniq
@@ -188,7 +188,7 @@
   def lookup_user_dict
     compiler = RegexCompiler.new
     expand_kanas.each do |x| 
-      expand_words(@user_dict, x).each do |x| compiler.push(x) end
+      expand_words(@user_dict, x).each do |y| compiler.push(y) end
     end
     expand_words(@user_dict, @pattern).each do |x| compiler.push(x) end
     compiler.uniq
diff -ur migemo-0.40/tests/Makefile.in migemo-0.40-1.9.1/tests/Makefile.in
--- migemo-0.40/tests/Makefile.in	2003-05-29 17:09:03.000000000 +0900
+++ migemo-0.40-1.9.1/tests/Makefile.in	2010-09-24 00:32:26.000000000 +0900
@@ -203,7 +203,7 @@
 test-dict.cache: test-dict test-dict.idx ../migemo-cache.rb
 	ruby -rromkan -ne 'puts $$1.to_roma if /^(.+?)	/' test-dict |\
 	while read line; do\
-	    echo $$line | ruby -ne 'chomp!;1.upto($$_.length) do |x| puts $$_[0,x] end';\
+	    echo $$line | ruby -ne '$$_.chomp!;1.upto($$_.length) do |x| puts $$_[0,x] end';\
 	done | ruby -I.. ../migemo-cache.rb test-dict
 
 clean-local: