0e78719eaf
git-svn-id: file:///home/svn/framework3/trunk@9042 4d416f70-5f16-0410-b530-b9f4589650da
36 lines
533 B
Ruby
36 lines
533 B
Ruby
require 'rubygems'
|
|
require 'pathname'
|
|
require 'hpricot'
|
|
require 'uri'
|
|
|
|
class CrawlerObjects < BaseParser
|
|
|
|
def parse(request,result)
|
|
|
|
if !result['Content-Type'].include? "text/html"
|
|
return
|
|
end
|
|
|
|
hr = ''
|
|
m = ''
|
|
|
|
doc = Hpricot(result.body.to_s)
|
|
doc.search("//object/embed").each do |obj|
|
|
|
|
s = obj['src']
|
|
|
|
begin
|
|
hreq = urltohash('GET',s,request['uri'],nil)
|
|
|
|
insertnewpath(hreq)
|
|
|
|
|
|
rescue URI::InvalidURIError
|
|
#puts "Parse error"
|
|
#puts "Error: #{link[0]}"
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|