Files

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

32 lines
760 B
Ruby
Raw Permalink Normal View History

2010-05-03 17:13:09 +00:00
##
2017-07-24 06:26:21 -07:00
# This module requires Metasploit: https://metasploit.com/download
2017-03-13 17:36:21 +01:00
# Current source: https://github.com/rapid7/metasploit-framework
2010-05-03 17:13:09 +00:00
##
2010-02-06 05:16:29 +00:00
require 'pathname'
2014-07-17 00:14:07 +02:00
require 'nokogiri'
2010-01-26 04:21:07 +00:00
require 'uri'
2010-03-21 00:13:12 +00:00
class CrawlerSimple < BaseParser
2010-01-26 04:21:07 +00:00
2013-09-30 13:47:53 -05:00
def parse(request,result)
2017-03-13 17:36:21 +01:00
return unless result['Content-Type'].include?('text/html')
2010-05-03 17:13:09 +00:00
2014-07-17 00:14:07 +02:00
# doc = Hpricot(result.body.to_s)
doc = Nokogiri::HTML(result.body.to_s)
doc.css('a').each do |anchor_tag|
hr = anchor_tag['href']
if hr && !hr.match(/^(\#|javascript\:)/)
begin
hreq = urltohash('GET', hr, request['uri'], nil)
insertnewpath(hreq)
rescue URI::InvalidURIError
#puts "Parse error"
#puts "Error: #{link[0]}"
end
2013-09-30 13:47:53 -05:00
end
end
end
2010-01-26 04:21:07 +00:00
end