01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
#!/usr/local/bin/ruby
# -*- coding: euc-jp -*-
$KCODE = 'e'
require 'romkan'
require "open-uri"
JP_REGEXP = /^[\sぁ-ん]+$/
NON_JP_REGEXP = /\b[ぁぃぅぇぉゃゅょぱぴぷぺぽ]/
open( "http://www.soros.org/openaccess/browse.cfm?st=A&fn=Z" ) do |io|
io.each do |line|
next if not line =~ /^<TD><b>(.*?)<\/b>/
name = $1.gsub( /, /, " " ).strip
#name.gsub!( / /, " " )
next if name.length <= 3
yomi = name.downcase.to_kana
if yomi =~ JP_REGEXP && yomi !~ NON_JP_REGEXP then
puts "#{yomi} :: #{name}"
else
#puts yomi + " not"
end
end
end