01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
#!/usr/local/bin/rubyrequire "zlib"CR = "(?:\r?\n|\r)"class PDF def initialize endendcont = ARGF.readputs cont[/\A.*?#{CR}/]while cont =~ /^%.*?#{CR}/ cont.sub!(/^%.*?#{CR}/, "")endobj = {}trailer = nilstartxref = 0until cont.empty? if cont =~ /\A\s+/ cont.sub!(/\A\s+/, "") elsif cont =~ /^(\d+ \d+ obj)(.*?)endobj#{CR}?/m obj[$1] = $2 p $1 cont.sub!(/^(\d+ \d+ obj)(.*?)endobj#{CR}?/m, "") elsif cont =~ /^xref#{CR}0 (\d+)#{CR}/ cont.sub!(/^xref#{CR}0 (\d+)#{CR}/, "") xrefs = $1.to_i #p xrefs xrefs.times do |i| cont.sub!(/^.*#{CR}+/, "") end elsif cont =~ /^trailer\s*<<(.*?)>>\s*/m trailer = $1 cont.sub!(/^trailer\s*<<(.*?)>>/m, "") elsif cont =~ /^startxref#{CR}(\d+)#{CR}/ startxref = $1 cont.sub!(/^startxref#{CR}(\d+)#{CR}/, "") endendp trailer@info = {}if trailer =~ /\/Root\s*(\d+ \d+) R/ @info["/Root"] = obj["#$1 obj"]endif @info["/Root"] and @info["/Root"] =~ /\/Pages\s*(\d+ \d+) R/ @info["/Pages"] = obj["#$1 obj"] p @info["/Pages"]endif @info["/Pages"] and @info["/Pages"] =~ /\/Kids\s*\[((\d+ \d+ R\s*)+)\]/ @info["/Kids"] = [] $1.split(/(\d+ \d+) R\s*/).each do |e| next if e.empty? @info["/Kids"] << obj["#{e} obj"] end p @info["/Kids"]endif @info["/Kids"] @info["/Kids"].each do |page| @info["/Contents"] = [] if page =~ /\/Contents (\d+ \d+) R/ @info["/Contents"] << obj["#$1 obj"] end end p @info["/Contents"][0]endif @info["/Contents"] @info["stream"] = [] @info["/Contents"].each do |cont| puts cont if cont =~ /stream#{CR}(.*)endstream#{CR}/m stream = $1 #puts stream if cont =~ /\/Filter\s*\/FlateDecode/ stream = Zlib::Inflate.inflate(stream) end p stream @info["stream"] << stream end endend