リンク: [ホーム] [自己紹介] [リンク集] [アルバム] [ソフトウェア] [発表文献] [その他]

まさおのChangeLogメモ / 2005-12-05

01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31

2005-12-05 Mon

* pdf.rb

柱を付けるのを自動化できないか、検討する際に、PDFフォーマットの解
析するルーチンの初歩の部分を書いてみた。。。
結局使わ(え)なかったけど、一応、痕跡を残しておく:

#!/usr/local/bin/ruby

require "zlib"

CR = "(?:\r?\n|\r)"
class PDF
   def initialize
   end
end

cont = ARGF.read
puts cont[/\A.*?#{CR}/]
while cont =~ /^%.*?#{CR}/
   cont.sub!(/^%.*?#{CR}/, "")
end

obj = {}
trailer = nil
startxref = 0

until cont.empty?
   if cont =~ /\A\s+/
      cont.sub!(/\A\s+/, "")
   elsif cont =~ /^(\d+ \d+ obj)(.*?)endobj#{CR}?/m
      obj[$1] = $2
      p $1
      cont.sub!(/^(\d+ \d+ obj)(.*?)endobj#{CR}?/m, "")
   elsif cont =~ /^xref#{CR}0 (\d+)#{CR}/
      cont.sub!(/^xref#{CR}0 (\d+)#{CR}/, "")
      xrefs = $1.to_i
      #p xrefs
      xrefs.times do |i|
         cont.sub!(/^.*#{CR}+/, "")
      end
   elsif cont =~ /^trailer\s*<<(.*?)>>\s*/m
      trailer = $1
      cont.sub!(/^trailer\s*<<(.*?)>>/m, "")
   elsif cont =~ /^startxref#{CR}(\d+)#{CR}/
      startxref = $1
      cont.sub!(/^startxref#{CR}(\d+)#{CR}/, "")
   end
end

p trailer

@info = {}
if trailer =~ /\/Root\s*(\d+ \d+) R/
   @info["/Root"] = obj["#$1 obj"]
end
if @info["/Root"] and @info["/Root"] =~ /\/Pages\s*(\d+ \d+) R/
   @info["/Pages"] = obj["#$1 obj"]
   p @info["/Pages"]
end
if @info["/Pages"] and @info["/Pages"] =~ /\/Kids\s*\[((\d+ \d+ R\s*)+)\]/
   @info["/Kids"] = []
   $1.split(/(\d+ \d+) R\s*/).each do |e|
      next if e.empty?
      @info["/Kids"] << obj["#{e} obj"]
   end
   p @info["/Kids"]
end
if @info["/Kids"]
   @info["/Kids"].each do |page|
      @info["/Contents"] = []
      if page =~ /\/Contents (\d+ \d+) R/
         @info["/Contents"] << obj["#$1 obj"]
      end
   end
   p @info["/Contents"][0]
end

if @info["/Contents"]
   @info["stream"] = []
   @info["/Contents"].each do |cont|
      puts cont
      if cont =~ /stream#{CR}(.*)endstream#{CR}/m
         stream = $1
         #puts stream
         if cont =~ /\/Filter\s*\/FlateDecode/
            stream = Zlib::Inflate.inflate(stream)
         end
         p stream
         @info["stream"] << stream
      end
   end
end