462 lines
11 KiB
Ruby
462 lines
11 KiB
Ruby
##
|
|
# This module requires Metasploit: https://metasploit.com/download
|
|
# Current source: https://github.com/rapid7/metasploit-framework
|
|
##
|
|
|
|
#
|
|
# Web Crawler.
|
|
#
|
|
# Author: Efrain Torres et [at] metasploit.com 2010
|
|
#
|
|
#
|
|
|
|
# openssl before rubygems mac os
|
|
require 'English'
|
|
require 'openssl'
|
|
require 'pathname'
|
|
require 'uri'
|
|
require 'rinda/rinda'
|
|
require 'rinda/tuplespace'
|
|
|
|
class MetasploitModule < Msf::Auxiliary
|
|
include Msf::Auxiliary::Scanner
|
|
include Msf::Auxiliary::Report
|
|
|
|
def initialize(info = {})
|
|
super(
|
|
update_info(
|
|
info,
|
|
'Name' => 'Metasploit Web Crawler',
|
|
'Description' => 'This auxiliary module is a modular web crawler, to be used in conjunction with wmap (someday) or standalone.',
|
|
'Author' => 'et',
|
|
'License' => MSF_LICENSE,
|
|
'Notes' => {
|
|
'Stability' => [CRASH_SAFE],
|
|
'SideEffects' => [IOC_IN_LOGS],
|
|
'Reliability' => []
|
|
}
|
|
)
|
|
)
|
|
|
|
register_options([
|
|
OptString.new('PATH', [true, 'Starting crawling path', '/']),
|
|
OptInt.new('RPORT', [true, 'Remote port', 80 ])
|
|
])
|
|
|
|
register_advanced_options([
|
|
OptPath.new(
|
|
'CrawlerModulesDir',
|
|
[
|
|
true,
|
|
'The base directory containing the crawler modules',
|
|
File.join(Msf::Config.data_directory, 'msfcrawler')
|
|
]
|
|
),
|
|
OptBool.new('EnableUl', [ false, 'Enable maximum number of request per URI', true ]),
|
|
OptBool.new('StoreDB', [ false, 'Store requests in database', false ]),
|
|
OptInt.new('MaxUriLimit', [ true, 'Number max. request per URI', 10]),
|
|
OptInt.new('SleepTime', [ true, 'Sleep time (secs) between requests', 0]),
|
|
OptInt.new('TakeTimeout', [ true, 'Timeout for loop ending', 15]),
|
|
OptInt.new('ReadTimeout', [ true, 'Read timeout (-1 forever)', 3]),
|
|
OptInt.new('ThreadNum', [ true, 'Threads number', 20]),
|
|
OptString.new('DontCrawl', [true, 'Filestypes not to crawl', '.exe,.zip,.tar,.bz2,.run,.asc,.gz'])
|
|
])
|
|
end
|
|
|
|
attr_accessor :ctarget, :cport, :cssl
|
|
|
|
def run
|
|
# i = 0
|
|
# a = []
|
|
|
|
self.ctarget = datastore['RHOSTS']
|
|
self.cport = datastore['RPORT']
|
|
self.cssl = datastore['SSL']
|
|
inipath = datastore['PATH']
|
|
|
|
cinipath = (inipath.nil? || inipath.empty?) ? '/' : inipath
|
|
|
|
inireq = {
|
|
'rhost' => ctarget,
|
|
'rport' => cport,
|
|
'uri' => cinipath,
|
|
'method' => 'GET',
|
|
'ctype' => 'text/plain',
|
|
'ssl' => cssl,
|
|
'query' => nil,
|
|
'data' => nil
|
|
}
|
|
|
|
@not_viewed_queue = ::Rinda::TupleSpace.new
|
|
@viewed_queue = Hash.new
|
|
@uri_limits = Hash.new
|
|
@current_site = ctarget
|
|
|
|
insertnewpath(inireq)
|
|
|
|
print_status("Loading modules: #{datastore['CrawlerModulesDir']}")
|
|
load_modules(datastore['CrawlerModulesDir'])
|
|
print_status('OK')
|
|
|
|
if datastore['EnableUl']
|
|
print_status("URI LIMITS ENABLED: #{datastore['MaxUriLimit']} (Maximum number of requests per uri)")
|
|
end
|
|
|
|
print_status("Target: #{ctarget} Port: #{cport} Path: #{cinipath} SSL: #{cssl}")
|
|
|
|
begin
|
|
reqfilter = reqtemplate(ctarget, cport, cssl)
|
|
|
|
# i = 0
|
|
|
|
loop do
|
|
####
|
|
# if i <= datastore['ThreadNum']
|
|
# a.push(Thread.new {
|
|
####
|
|
|
|
hashreq = @not_viewed_queue.take(reqfilter, datastore['TakeTimeout'])
|
|
|
|
ul = false
|
|
if @uri_limits.include?(hashreq['uri']) && datastore['EnableUl']
|
|
# puts "Request #{@uri_limits[hashreq['uri']]}/#{$maxurilimit} #{hashreq['uri']}"
|
|
if @uri_limits[hashreq['uri']] >= datastore['MaxUriLimit']
|
|
# puts "URI LIMIT Reached: #{$maxurilimit} for uri #{hashreq['uri']}"
|
|
ul = true
|
|
end
|
|
else
|
|
@uri_limits[hashreq['uri']] = 0
|
|
end
|
|
|
|
if !@viewed_queue.include?(hashsig(hashreq)) && !ul
|
|
|
|
@viewed_queue[hashsig(hashreq)] = Time.now
|
|
@uri_limits[hashreq['uri']] += 1
|
|
|
|
if !File.extname(hashreq['uri']).empty? && datastore['DontCrawl'].include?(File.extname(hashreq['uri']))
|
|
vprint_status "URI not crawled #{hashreq['uri']}"
|
|
else
|
|
prx = nil
|
|
# if self.useproxy
|
|
# prx = "HTTP:"+self.proxyhost.to_s+":"+self.proxyport.to_s
|
|
# end
|
|
|
|
c = Rex::Proto::Http::Client.new(
|
|
ctarget,
|
|
cport.to_i,
|
|
{},
|
|
cssl,
|
|
nil,
|
|
prx
|
|
)
|
|
|
|
sendreq(c, hashreq)
|
|
end
|
|
else
|
|
vprint_line "#{hashreq['uri']} already visited. "
|
|
end
|
|
|
|
####
|
|
# })
|
|
|
|
# i += 1
|
|
# else
|
|
# sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?
|
|
# i = 0
|
|
# end
|
|
####
|
|
end
|
|
rescue ::Rinda::RequestExpiredError
|
|
print_status('END.')
|
|
return
|
|
end
|
|
|
|
print_status('Finished crawling')
|
|
end
|
|
|
|
def reqtemplate(target, port, ssl)
|
|
hreq = {
|
|
'rhost' => target,
|
|
'rport' => port,
|
|
'uri' => nil,
|
|
'method' => nil,
|
|
'ctype' => nil,
|
|
'ssl' => ssl,
|
|
'query' => nil,
|
|
'data' => nil
|
|
}
|
|
|
|
return hreq
|
|
end
|
|
|
|
def storedb(hashreq, response)
|
|
# Added host/port/ssl for report_web_page support
|
|
info = {
|
|
web_site: @current_site,
|
|
path: hashreq['uri'],
|
|
query: hashreq['query'],
|
|
host: hashreq['rhost'],
|
|
port: hashreq['rport'],
|
|
ssl: !hashreq['ssl'].nil?,
|
|
data: hashreq['data'],
|
|
code: response.code,
|
|
body: response.body,
|
|
headers: response.headers
|
|
}
|
|
|
|
# if response['content-type']
|
|
# info[:ctype] = response['content-type'][0]
|
|
# end
|
|
|
|
# if response['set-cookie']
|
|
# info[:cookie] = page.headers['set-cookie'].join("\n")
|
|
# end
|
|
|
|
# if page.headers['authorization']
|
|
# info[:auth] = page.headers['authorization'].join("\n")
|
|
# end
|
|
|
|
# if page.headers['location']
|
|
# info[:location] = page.headers['location'][0]
|
|
# end
|
|
|
|
# if page.headers['last-modified']
|
|
# info[:mtime] = page.headers['last-modified'][0]
|
|
# end
|
|
|
|
# Report the web page to the database
|
|
report_web_page(info)
|
|
end
|
|
|
|
#
|
|
# Modified version of load_protocols from psnuffle by Max Moser <mmo@remote-exploit.org>
|
|
#
|
|
|
|
def load_modules(crawlermodulesdir)
|
|
base = crawlermodulesdir
|
|
if !File.directory?(base)
|
|
raise 'The Crawler modules parameter is set to an invalid directory'
|
|
end
|
|
|
|
@crawlermodules = {}
|
|
cmodules = Dir.new(base).entries.grep(/\.rb$/).sort
|
|
cmodules.each do |n|
|
|
f = File.join(base, n)
|
|
m = ::Module.new
|
|
begin
|
|
m.module_eval(File.read(f, File.size(f)))
|
|
m.constants.grep(/^Crawler(.*)/) do
|
|
cmod = ::Regexp.last_match(1)
|
|
klass = m.const_get("Crawler#{cmod}")
|
|
@crawlermodules[cmod.downcase] = klass.new(self)
|
|
|
|
print_status("Loaded crawler module #{cmod} from #{f}...")
|
|
end
|
|
rescue StandardError => e
|
|
print_error("Crawler module #{n} failed to load: #{e.class} #{e} #{e.backtrace}")
|
|
end
|
|
end
|
|
end
|
|
|
|
def sendreq(nclient, reqopts = {})
|
|
r = nclient.request_raw(reqopts)
|
|
resp = nclient.send_recv(r, datastore['ReadTimeout'])
|
|
|
|
unless resp
|
|
print_status('No response')
|
|
sleep(datastore['SleepTime'])
|
|
return
|
|
end
|
|
|
|
#
|
|
# Quickfix for bug packet.rb to_s line: 190
|
|
# In case modules or crawler calls to_s on de-chunked responses
|
|
#
|
|
resp.transfer_chunked = false
|
|
|
|
if datastore['StoreDB']
|
|
storedb(reqopts, resp)
|
|
end
|
|
|
|
print_status ">> [#{resp.code}] #{reqopts['uri']}"
|
|
|
|
if reqopts['query'] && !reqopts['query'].empty?
|
|
print_status ">>> [Q] #{reqopts['query']}"
|
|
end
|
|
|
|
if reqopts['data']
|
|
print_status ">>> [D] #{reqopts['data']}"
|
|
end
|
|
|
|
case resp.code
|
|
when 200
|
|
@crawlermodules.each_key do |k|
|
|
@crawlermodules[k].parse(reqopts, resp)
|
|
end
|
|
when 301..303
|
|
print_line("[#{resp.code}] Redirection to: #{resp['Location']}")
|
|
vprint_status urltohash('GET', resp['Location'], reqopts['uri'], nil)
|
|
insertnewpath(urltohash('GET', resp['Location'], reqopts['uri'], nil))
|
|
when 404
|
|
print_status "[404] Invalid link #{reqopts['uri']}"
|
|
else
|
|
print_status "Unhandled #{resp.code}"
|
|
end
|
|
|
|
sleep(datastore['SleepTime'])
|
|
rescue StandardError => e
|
|
print_status("Error: #{e.message}")
|
|
vprint_status("#{$ERROR_INFO}: #{$ERROR_INFO.backtrace}")
|
|
end
|
|
|
|
#
|
|
# Add new path (uri) to test non-viewed queue
|
|
#
|
|
|
|
def insertnewpath(hashreq)
|
|
hashreq['uri'] = canonicalize(hashreq['uri'])
|
|
|
|
if (hashreq['rhost'] == datastore['RHOSTS']) && (hashreq['rport'] == datastore['RPORT'])
|
|
if !@viewed_queue.include?(hashsig(hashreq))
|
|
if !@not_viewed_queue.read_all(hashreq).empty?
|
|
vprint_status "Already in queue to be viewed: #{hashreq['uri']}"
|
|
else
|
|
vprint_status "Inserted: #{hashreq['uri']}"
|
|
|
|
@not_viewed_queue.write(hashreq)
|
|
end
|
|
else
|
|
vprint_status "#{hashreq['uri']} already visited at #{@viewed_queue[hashsig(hashreq)]}"
|
|
end
|
|
end
|
|
end
|
|
|
|
#
|
|
# Build a new hash for a local path
|
|
#
|
|
|
|
def urltohash(method, url, basepath, dat)
|
|
# method: HTTP method
|
|
# url: uri?[query]
|
|
# basepath: base path/uri to determine absolute path when relative
|
|
# data: body data, nil if GET and query = uri.query
|
|
|
|
uri = URI.parse(url)
|
|
uritargetssl = (uri.scheme == 'https') ? true : false
|
|
|
|
uritargethost = uri.host
|
|
if uri.host.nil? || uri.host.empty?
|
|
uritargethost = ctarget
|
|
uritargetssl = cssl
|
|
end
|
|
|
|
uritargetport = uri.port
|
|
if uri.port.nil?
|
|
uritargetport = cport
|
|
end
|
|
|
|
uritargetpath = uri.path
|
|
if uri.path.nil? || uri.path.empty?
|
|
uritargetpath = '/'
|
|
end
|
|
|
|
newp = Pathname.new(uritargetpath)
|
|
oldp = Pathname.new(basepath)
|
|
if !newp.absolute?
|
|
if oldp.to_s[-1, 1] == '/'
|
|
newp = oldp + newp
|
|
elsif !newp.to_s.empty?
|
|
newp = File.join(oldp.dirname, newp)
|
|
end
|
|
end
|
|
|
|
hashreq = {
|
|
'rhost' => uritargethost,
|
|
'rport' => uritargetport,
|
|
'uri' => newp.to_s,
|
|
'method' => method,
|
|
'ctype' => 'text/plain',
|
|
'ssl' => uritargetssl,
|
|
'query' => uri.query,
|
|
'data' => nil
|
|
}
|
|
|
|
if (method == 'GET') && !dat.nil?
|
|
hashreq['query'] = dat
|
|
else
|
|
hashreq['data'] = dat
|
|
end
|
|
|
|
return hashreq
|
|
end
|
|
|
|
def canonicalize(uri)
|
|
uri = URI(uri) unless uri.is_a?(URI)
|
|
uri.normalize!
|
|
|
|
path = uri.path.dup
|
|
segments = path.split('/')
|
|
resolved = []
|
|
|
|
segments.each do |segment|
|
|
next if segment == '.' || segment.empty?
|
|
|
|
if segment == '..'
|
|
resolved.pop unless resolved.empty?
|
|
else
|
|
resolved << segment
|
|
end
|
|
end
|
|
|
|
uri.path = '/' + resolved.join('/')
|
|
uri.to_s
|
|
end
|
|
|
|
def hashsig(hashreq)
|
|
hashreq.to_s
|
|
end
|
|
end
|
|
|
|
class BaseParser
|
|
attr_accessor :crawler
|
|
|
|
def initialize(crawler)
|
|
self.crawler = crawler
|
|
end
|
|
|
|
def parse(_request, _result)
|
|
nil
|
|
end
|
|
|
|
#
|
|
# Add new path (uri) to test hash queue
|
|
#
|
|
def insertnewpath(hashreq)
|
|
crawler.insertnewpath(hashreq)
|
|
end
|
|
|
|
def hashsig(hashreq)
|
|
crawler.hashsig(hashreq)
|
|
end
|
|
|
|
def urltohash(method, url, basepath, dat)
|
|
crawler.urltohash(method, url, basepath, dat)
|
|
end
|
|
|
|
def targetssl
|
|
crawler.cssl
|
|
end
|
|
|
|
def targetport
|
|
crawler.cport
|
|
end
|
|
|
|
def targethost
|
|
crawler.ctarget
|
|
end
|
|
|
|
def targetinipath
|
|
crawler.cinipath
|
|
end
|
|
end
|