01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
Index: clsearch.cgi===================================================================RCS file: /home/masao/.cvsroot/public_html/d/clsearch.cgi,vretrieving revision 1.2diff -u -b -r1.2 clsearch.cgi--- clsearch.cgi 26 Apr 2006 16:12:32 -0000 1.2+++ clsearch.cgi 29 Apr 2006 06:57:18 -0000@@ -67,6 +67,9 @@ my $outstr = ""; my $cnt = 0; +my $ascii = '[\x00-\x7F]';+my $twoBytes = '[\x8E\xA1-\xFE][\xA1-\xFE]';+my $threeBytes = '\x8F[\xA1-\xFE][\xA1-\xFE]'; sub clean { local ($_) = @_;@@ -101,7 +104,8 @@ $match_num++ if ($c =~ /^.+\[$tmp\].*\t.*$/); } else { my $tmp = clean($k);- $match_num++ if ($c =~ m|$tmp|i);+ $match_num+++ if $c =~ /^(?:$ascii|$twoBytes|$threeBytes)*?(?:$tmp)/i; push @regular_keys, $tmp; } }@@ -128,7 +132,7 @@ $pos =~ s!^(([\x80-\xff]{2})*?)[\x80-\xff]$!$1!; $c = qq($pre$k$pos); my $p = join('|', @regular_keys);- $c =~ s!($p)!$open_tag$1$close_tag!gi;+ $c =~ s!\G((?:$ascii|$twoBytes|$threeBytes)*?)($p)!$1$open_tag$2$close_tag!gi; } } elsif ($mode == 1) { # アイテムモード my ($file, $id) = ($date =~ /href="(.*?.html).*?">\[(.+?)\]/);--- hosts.allow.20060428 2006-04-28 18:07:03.250482728 +0900+++ hosts.allow 2006-04-28 18:29:22.049954272 +0900@@ -67,10 +67,14 @@ # Allow connets to sshd from sample.foobar.edu. #sshd: sample.foobar.edu+sshd: .nii.ac.jp 136.187.+sshd: .bbtec.net 220.1.152. 220.1.155. 221.89.8.+sshd: .nier.go.jp .nicer.go.jp 210.164.30. 210.137.6. +sshd: .slis.tsukuba.ac.jp 133.51.+sshd: .sakura.ne.jp 59.106.13. ################################################################################ # Allow all requests from hosts in .foobar.edu with one exception (notice the # leading dot) #ALL: .foobar.edu EXCEPT terminalserver.foobar.edu-sshd : ALL--- /etc/fstab 2006-04-27 19:53:07.000000000 +0900+++ /etc/fstab.20060427 2006-04-27 19:48:05.000000000 +0900@@ -1,11 +1,11 @@-/dev/hda2 / ext3 defaults,acl 1 1+/dev/hda2 / ext3 defaults 1 1 /dev/cdrom /mnt/cdrom iso9660 noauto,owner,ro 0 0 /dev/fd0 /mnt/floppy auto noauto,owner 0 0 none /proc proc defaults 0 0 none /dev/pts devpts gid=5,mode=620 0 0 sysfs /sys sysfs defaults 0 0 /dev/hda1 swap swap defaults 0 0-/dev/hdb /project ext3 defaults,acl 1 2+/dev/hdb /project ext3 defaults 1 2 #-- dynaplug -- # The following line will be automatically modified by dynaplug. /dev/hdb1 /mnt/hdb1 auto noauto,user,exec 0 0% sudo init 1% mount/dev/hda2 on / type ext3 (rw,acl)none on /proc type proc (rw)usbfs on /proc/bus/usb type usbfs (rw)sysfs on /sys type sysfs (rw)none on /dev/pts type devpts (rw,gid=5,mode=620)/dev/hdb on /project type ext3 (rw,acl) * MSR IME Corpus: This corpus provides a test data set for the task of Japanese character conversion for test input. See (Suzuki and Gao 2006) for a detailed description. * S-MSRSeg: Chinese word segmenter, named entity recognizer, described inrml H :0:tlminchogbm H :0:tlgothicrmlv V :0:tlminchogbmv V :0:tlgothic% dvipdfmx -f cid-x.map main% ruby -ne 'next if /^#/;code,prop,val = $_.chomp.split(/\t/);puts [[code.sub(/^U\+/,"").hex].pack("U*"),val].join("\t") if prop =~ /^kJapanese(Kun|On)$/' Unihan.txt | ruby -e 'w={};ARGF.each do |l|char,yomi=l.chomp.split(/\t/);w[char]||=[];w[char]<<yomi;end;w.keys.sort.each do|char|puts [char,w[char]].join("\t");end単漢字の読みをダウンロードできるサイトを教えて下さい.CSV,Exel形式などで,シフトJISの全ての漢字,できればJIS X0213の範囲までカバーしていると嬉しいです.これは知っていたのですが,加工するのがすごく大変そうです.読みがローマ字なのもちょっと.% ruby -ne 'next if /^#/;code,prop,val = $_.chomp.split(/\t/);puts [[code.sub(/^U\+/,"").hex].pack("U*"),val].join("\t") if prop =~ /Japanese/' Unihan.txt% sudo mkdir -p /usr/share/texmf-local/tex/latex/lncs/% cd /usr/share/texmf-local/tex/latex/lncs% sudo unzip ~/src/llncs2e.zip% sudo mkdir -p /usr/share/texmf-local/bibtex% cd /usr/share/texmf-local/bibtex% sudo cp -p /usr/share/texmf-local/tex/latex/lncs/splncs.bst .% sudo mktexlsr