Fixed some things
This commit is contained in:
parent
18eb31c0ce
commit
11207b60f6
|
@ -70,7 +70,11 @@ module Arachnid
|
|||
@pool.shift.get
|
||||
end
|
||||
|
||||
break if @queue.empty? || @paused
|
||||
break if @paused
|
||||
if @queue.empty?
|
||||
sleep(1)
|
||||
break if @queue.empty?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -20,7 +20,6 @@ module Arachnid
|
|||
|
||||
opts.ignore.each do |pattern|
|
||||
pattern = Regex.new(pattern)
|
||||
pp pattern
|
||||
spider.ignore_urls_like(pattern)
|
||||
end
|
||||
|
||||
|
|
|
@ -17,14 +17,13 @@ module Arachnid
|
|||
|
||||
opts.ignore.each do |pattern|
|
||||
pattern = Regex.new(pattern)
|
||||
pp pattern
|
||||
spider.ignore_urls_like(pattern)
|
||||
end
|
||||
|
||||
pages = 0
|
||||
internal_links = Hash(String, Array(String)).new
|
||||
external_links = Hash(String, Array(String)).new
|
||||
codes = Hash(Int32, Array(String)).new
|
||||
codes = Hash(Int32, Hash(String, Array(String))).new
|
||||
|
||||
spinner.start("Crawling...")
|
||||
|
||||
|
@ -32,8 +31,10 @@ module Arachnid
|
|||
pages += 1
|
||||
|
||||
if opts.codes.includes?(page.code)
|
||||
codes[page.code] ||= [] of String
|
||||
codes[page.code] << page.url.to_s
|
||||
referrer = page.headers["Referer"]? || "unknown"
|
||||
codes[page.code] ||= {} of String => Array(String)
|
||||
codes[page.code][referrer] ||= [] of String
|
||||
codes[page.code][referrer] << page.url.to_s
|
||||
end
|
||||
|
||||
spinner.message = "Scanning #{page.url.to_s}"
|
||||
|
@ -62,7 +63,7 @@ module Arachnid
|
|||
end
|
||||
|
||||
def generate_report(outfile, pages, internal_links, external_links, codes)
|
||||
report = {} of String => Hash(String, Array(String)) | Hash(Int32, Array(String)) | Int32
|
||||
report = {} of String => Hash(String, Array(String)) | Hash(Int32, Hash(String, Array(String))) | Int32
|
||||
|
||||
report["pages"] = pages
|
||||
report["internal_links"] = internal_links if internal_links
|
||||
|
|
Loading…
Reference in New Issue