# -*- coding: binary -*- ### # # This module provides methods for parsing and interacting # with the PDF format. # ### module Msf module Exploit::PDF_Parse def initialize(info = {}) super register_options( [ OptString.new('FILENAME', [ true, 'The file name.', 'some.pdf']), ], Msf::Exploit::PDF_Parse ) end def read_pdf() stream = IO.read("#{datastore['INFILENAME']}") return stream end def xref_trailer_parse(offset, stream) a = offset b = stream.index(/>>/,a) + 2 return stream[a..b] end def trailer_parse(xref_trailer) trailer = Hash.new() if match = xref_trailer.match(/Size (\d+)/m) trailer['Size'] = match[1] end if match = xref_trailer.match(/Root (\d+ \d)/m) trailer["Root"] = match[1] end if match = xref_trailer.match(/Info (\d+ \d)/m) trailer["Info"] = match[1] end if match = xref_trailer.match(/ID(\[.+\])/m) trailer["ID"] = match[1] end if match = xref_trailer.match(/Prev (\d+)/m) trailer["Prev"] = match[1] end if match = xref_trailer.match(/XRefStm (\d+)/m) trailer["XRefStm"] = match[1] end return trailer end def object_locate(xref_trailer,obj_name) found = false match = obj_name.match(/(\d+) (\d+)/) obj = match[1] gen = match[2] xrefs_end = xref_trailer.index(/trailer/) - 1 xrefs = xref_trailer[0..xrefs_end] if gen.to_i != 0 else len = xrefs.length match = xrefs.match(/xref\r?\n?(\d+) (\d+)\r?\n?/m) offset = 0 while offset < len if match start_obj = match[1] num_obj = match[2] offset = match.end(0) else break end if start_obj.to_i > obj.to_i jump = num_obj.to_i * 20 offset += jump else if obj.to_i <= ( start_obj.to_i + num_obj.to_i - 1) jump = (obj.to_i - start_obj.to_i) * 20 offset += jump found = true break else jump = num_obj.to_i * 20 offset += jump end end xrefs.index(/(\d+) (\d+)\r?\n?/m,offset) match = Regexp.last_match end end if found offset_end = offset + 11 return xrefs[offset..offset_end].to_i else return nil end end def parse_object(xref_trailers,obj_name,stream) for xrefs in xref_trailers offset = object_locate(xrefs,obj_name) if offset break end end if offset stream.index(/endobj/,offset) object_end = Regexp.last_match.end(0) return stream[offset..object_end] else return nil end end def xref_create(stream,offset,num_obj) xref = Array.new() object = String.new() case when num_obj.to_s == "1" obj = stream.index(/(\d+) \d obj/,offset) if obj num = obj.to_s dif = 10 - num.length out = String.new while dif > 0 out << "0" dif -= 1 end out << num xref.push("#{out}") object = "#{Regexp.last_match(1)}" end when num_obj.to_s == "*" len = stream.length n = offset while n < len obj = stream.index(/(\d+) \d obj/,n) if obj != nil num = obj.to_s dif = 10 - num.length out = String.new while dif > 0 out << "0" dif -= 1 end out << num xref.push("#{out}") n = Regexp.last_match.end(0) if object.empty? object = "#{Regexp.last_match(1)}" end else break end end end output = String.new() output << "#{object} #{xref.length}\r\n" xref.each {|xref_| output << "#{xref_} 00000 n\r\n"} return output end def parse_pdf(stream) xref_array = Array.new() startxrefs = Array.new() startxref_offsets = Hash.new() xref_trailers = Array.new() xref_trailer = Hash.new() trailers = Array.new() trailer = Hash.new() len = stream.length n = 0 while n < len obj = stream.index(/startxref\r?\n?/m,n) if obj != nil n = Regexp.last_match.end(0) stream.index(/\d+/,n) startxref_offsets["#{Regexp.last_match}"] = "#{obj}" startxrefs.push("#{Regexp.last_match}") else break end end xref_trailer = xref_trailer_parse(startxrefs.last.to_i,stream) xref_trailers.push(xref_trailer) trailer = trailer_parse(xref_trailer) trailers.push(trailer) root_obj = trailers[0].fetch("Root") while trailer["Prev"] xref_trailer = xref_trailer_parse(trailer.fetch("Prev").to_i,stream) xref_trailers.push(xref_trailer) trailer = trailer_parse(xref_trailer) trailers.each {|check| if check.fetch("Prev") == trailer["Prev"] then trailer.delete("Prev") end} if trailer.has_key?("Prev") trailers.push(trailer) end end return xref_trailers, trailers, startxrefs, root_obj end end end