01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
#!/usr/local/bin/ruby
require "zlib"
CR = "(?:\r?\n|\r)"
class PDF
def initialize
end
end
cont = ARGF.read
puts cont[/\A.*?#{CR}/]
while cont =~ /^%.*?#{CR}/
cont.sub!(/^%.*?#{CR}/, "")
end
obj = {}
trailer = nil
startxref = 0
until cont.empty?
if cont =~ /\A\s+/
cont.sub!(/\A\s+/, "")
elsif cont =~ /^(\d+ \d+ obj)(.*?)endobj#{CR}?/m
obj[$1] = $2
p $1
cont.sub!(/^(\d+ \d+ obj)(.*?)endobj#{CR}?/m, "")
elsif cont =~ /^xref#{CR}0 (\d+)#{CR}/
cont.sub!(/^xref#{CR}0 (\d+)#{CR}/, "")
xrefs = $1.to_i
#p xrefs
xrefs.times do |i|
cont.sub!(/^.*#{CR}+/, "")
end
elsif cont =~ /^trailer\s*<<(.*?)>>\s*/m
trailer = $1
cont.sub!(/^trailer\s*<<(.*?)>>/m, "")
elsif cont =~ /^startxref#{CR}(\d+)#{CR}/
startxref = $1
cont.sub!(/^startxref#{CR}(\d+)#{CR}/, "")
end
end
p trailer
@info = {}
if trailer =~ /\/Root\s*(\d+ \d+) R/
@info["/Root"] = obj["#$1 obj"]
end
if @info["/Root"] and @info["/Root"] =~ /\/Pages\s*(\d+ \d+) R/
@info["/Pages"] = obj["#$1 obj"]
p @info["/Pages"]
end
if @info["/Pages"] and @info["/Pages"] =~ /\/Kids\s*\[((\d+ \d+ R\s*)+)\]/
@info["/Kids"] = []
$1.split(/(\d+ \d+) R\s*/).each do |e|
next if e.empty?
@info["/Kids"] << obj["#{e} obj"]
end
p @info["/Kids"]
end
if @info["/Kids"]
@info["/Kids"].each do |page|
@info["/Contents"] = []
if page =~ /\/Contents (\d+ \d+) R/
@info["/Contents"] << obj["#$1 obj"]
end
end
p @info["/Contents"][0]
end
if @info["/Contents"]
@info["stream"] = []
@info["/Contents"].each do |cont|
puts cont
if cont =~ /stream#{CR}(.*)endstream#{CR}/m
stream = $1
#puts stream
if cont =~ /\/Filter\s*\/FlateDecode/
stream = Zlib::Inflate.inflate(stream)
end
p stream
@info["stream"] << stream
end
end
end