01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
#!/usr/local/bin/ruby# -*- coding: euc-jp -*- $KCODE = 'e' require 'romkan'require "open-uri"JP_REGEXP = /^[\sぁ-ん]+$/NON_JP_REGEXP = /\b[ぁぃぅぇぉゃゅょぱぴぷぺぽ]/open( "http://www.soros.org/openaccess/browse.cfm?st=A&fn=Z" ) do |io| io.each do |line| next if not line =~ /^<TD><b>(.*?)<\/b>/ name = $1.gsub( /, /, " " ).strip #name.gsub!( / /, " " ) next if name.length <= 3 yomi = name.downcase.to_kana if yomi =~ JP_REGEXP && yomi !~ NON_JP_REGEXP then puts "#{yomi} :: #{name}" else #puts yomi + " not" end endend