Fixed some things
This commit is contained in:
parent
18eb31c0ce
commit
11207b60f6
|
@ -70,7 +70,11 @@ module Arachnid
|
||||||
@pool.shift.get
|
@pool.shift.get
|
||||||
end
|
end
|
||||||
|
|
||||||
break if @queue.empty? || @paused
|
break if @paused
|
||||||
|
if @queue.empty?
|
||||||
|
sleep(1)
|
||||||
|
break if @queue.empty?
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -20,7 +20,6 @@ module Arachnid
|
||||||
|
|
||||||
opts.ignore.each do |pattern|
|
opts.ignore.each do |pattern|
|
||||||
pattern = Regex.new(pattern)
|
pattern = Regex.new(pattern)
|
||||||
pp pattern
|
|
||||||
spider.ignore_urls_like(pattern)
|
spider.ignore_urls_like(pattern)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -17,14 +17,13 @@ module Arachnid
|
||||||
|
|
||||||
opts.ignore.each do |pattern|
|
opts.ignore.each do |pattern|
|
||||||
pattern = Regex.new(pattern)
|
pattern = Regex.new(pattern)
|
||||||
pp pattern
|
|
||||||
spider.ignore_urls_like(pattern)
|
spider.ignore_urls_like(pattern)
|
||||||
end
|
end
|
||||||
|
|
||||||
pages = 0
|
pages = 0
|
||||||
internal_links = Hash(String, Array(String)).new
|
internal_links = Hash(String, Array(String)).new
|
||||||
external_links = Hash(String, Array(String)).new
|
external_links = Hash(String, Array(String)).new
|
||||||
codes = Hash(Int32, Array(String)).new
|
codes = Hash(Int32, Hash(String, Array(String))).new
|
||||||
|
|
||||||
spinner.start("Crawling...")
|
spinner.start("Crawling...")
|
||||||
|
|
||||||
|
@ -32,8 +31,10 @@ module Arachnid
|
||||||
pages += 1
|
pages += 1
|
||||||
|
|
||||||
if opts.codes.includes?(page.code)
|
if opts.codes.includes?(page.code)
|
||||||
codes[page.code] ||= [] of String
|
referrer = page.headers["Referer"]? || "unknown"
|
||||||
codes[page.code] << page.url.to_s
|
codes[page.code] ||= {} of String => Array(String)
|
||||||
|
codes[page.code][referrer] ||= [] of String
|
||||||
|
codes[page.code][referrer] << page.url.to_s
|
||||||
end
|
end
|
||||||
|
|
||||||
spinner.message = "Scanning #{page.url.to_s}"
|
spinner.message = "Scanning #{page.url.to_s}"
|
||||||
|
@ -62,7 +63,7 @@ module Arachnid
|
||||||
end
|
end
|
||||||
|
|
||||||
def generate_report(outfile, pages, internal_links, external_links, codes)
|
def generate_report(outfile, pages, internal_links, external_links, codes)
|
||||||
report = {} of String => Hash(String, Array(String)) | Hash(Int32, Array(String)) | Int32
|
report = {} of String => Hash(String, Array(String)) | Hash(Int32, Hash(String, Array(String))) | Int32
|
||||||
|
|
||||||
report["pages"] = pages
|
report["pages"] = pages
|
||||||
report["internal_links"] = internal_links if internal_links
|
report["internal_links"] = internal_links if internal_links
|
||||||
|
|
Loading…
Reference in New Issue