Retab all the things (except external/)

This commit is contained in:
Tab Assassin
2013-09-30 13:47:53 -05:00
parent 0ecba377f5
commit 2e8d19edcf
293 changed files with 32962 additions and 32962 deletions
+18 -18
View File
@@ -18,29 +18,29 @@ require 'uri'
class CrawlerSimple < BaseParser
def parse(request,result)
def parse(request,result)
if !result['Content-Type'].include? "text/html"
return
end
if !result['Content-Type'].include? "text/html"
return
end
doc = Hpricot(result.body.to_s)
doc.search('a').each do |link|
doc = Hpricot(result.body.to_s)
doc.search('a').each do |link|
hr = link.attributes['href']
hr = link.attributes['href']
if hr and !hr.match(/^(\#|javascript\:)/)
begin
hreq = urltohash('GET',hr,request['uri'],nil)
if hr and !hr.match(/^(\#|javascript\:)/)
begin
hreq = urltohash('GET',hr,request['uri'],nil)
insertnewpath(hreq)
insertnewpath(hreq)
rescue URI::InvalidURIError
#puts "Parse error"
#puts "Error: #{link[0]}"
end
end
end
end
rescue URI::InvalidURIError
#puts "Parse error"
#puts "Error: #{link[0]}"
end
end
end
end
end