Fixed some things

This commit is contained in:
Chris Watson 2019-07-01 10:27:55 -07:00
parent 18eb31c0ce
commit 11207b60f6
No known key found for this signature in database
GPG Key ID: 37DAEF5F446370A4
3 changed files with 11 additions and 7 deletions

View File

@ -70,7 +70,11 @@ module Arachnid
@pool.shift.get @pool.shift.get
end end
break if @queue.empty? || @paused break if @paused
if @queue.empty?
sleep(1)
break if @queue.empty?
end
end end
end end
end end

View File

@ -20,7 +20,6 @@ module Arachnid
opts.ignore.each do |pattern| opts.ignore.each do |pattern|
pattern = Regex.new(pattern) pattern = Regex.new(pattern)
pp pattern
spider.ignore_urls_like(pattern) spider.ignore_urls_like(pattern)
end end

View File

@ -17,14 +17,13 @@ module Arachnid
opts.ignore.each do |pattern| opts.ignore.each do |pattern|
pattern = Regex.new(pattern) pattern = Regex.new(pattern)
pp pattern
spider.ignore_urls_like(pattern) spider.ignore_urls_like(pattern)
end end
pages = 0 pages = 0
internal_links = Hash(String, Array(String)).new internal_links = Hash(String, Array(String)).new
external_links = Hash(String, Array(String)).new external_links = Hash(String, Array(String)).new
codes = Hash(Int32, Array(String)).new codes = Hash(Int32, Hash(String, Array(String))).new
spinner.start("Crawling...") spinner.start("Crawling...")
@ -32,8 +31,10 @@ module Arachnid
pages += 1 pages += 1
if opts.codes.includes?(page.code) if opts.codes.includes?(page.code)
codes[page.code] ||= [] of String referrer = page.headers["Referer"]? || "unknown"
codes[page.code] << page.url.to_s codes[page.code] ||= {} of String => Array(String)
codes[page.code][referrer] ||= [] of String
codes[page.code][referrer] << page.url.to_s
end end
spinner.message = "Scanning #{page.url.to_s}" spinner.message = "Scanning #{page.url.to_s}"
@ -62,7 +63,7 @@ module Arachnid
end end
def generate_report(outfile, pages, internal_links, external_links, codes) def generate_report(outfile, pages, internal_links, external_links, codes)
report = {} of String => Hash(String, Array(String)) | Hash(Int32, Array(String)) | Int32 report = {} of String => Hash(String, Array(String)) | Hash(Int32, Hash(String, Array(String))) | Int32
report["pages"] = pages report["pages"] = pages
report["internal_links"] = internal_links if internal_links report["internal_links"] = internal_links if internal_links