Fix some things

This commit is contained in:
Chris Watson 2019-07-01 15:56:46 -07:00
parent 642ff912b5
commit 3ccc63da7d
No known key found for this signature in database
GPG Key ID: 37DAEF5F446370A4
5 changed files with 19 additions and 14 deletions

View File

@ -70,11 +70,7 @@ module Arachnid
@pool.shift.get @pool.shift.get
end end
break if @paused break if @paused || @queue.empty?
if @queue.empty?
sleep(1)
break if @queue.empty?
end
end end
end end
end end

View File

@ -105,8 +105,10 @@ module Arachnid
USAGE USAGE
option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan" option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan"
option "-d", "--depth NUM", type: Int32, desc: "maximum depth to scan"
option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10 option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10
option "-i", "--ignore PATTERNS", type: Array(String), desc: "url patterns to ignore (regex)" option "-i", "--ignore PATTERNS", type: Array(String), desc: "url patterns to ignore (regex)"
option "-a", "--match PATTERNS", type: Array(String), desc: "url patterns to match (regex)"
option "-o DIR", "--outdir=DIR", type: String, desc: "directory to save images to", default: "./imgd-downloads" option "-o DIR", "--outdir=DIR", type: String, desc: "directory to save images to", default: "./imgd-downloads"
option "-m NUM", "--minsize=NUM", type: Int32, desc: "image minimum size (in bytes)" option "-m NUM", "--minsize=NUM", type: Int32, desc: "image minimum size (in bytes)"
option "-x NUM", "--maxsize=NUM", type: Int32, desc: "image maximum size (in bytes)" option "-x NUM", "--maxsize=NUM", type: Int32, desc: "image maximum size (in bytes)"

View File

@ -13,7 +13,7 @@ module Arachnid
count = 0 count = 0
outdir = File.expand_path(opts.outdir, __DIR__) outdir = File.expand_path(opts.outdir, __DIR__)
spider = Arachnid::Agent.new(limit: opts.limit, fibers: opts.fibers) spider = Arachnid::Agent.new(limit: opts.limit, max_depth: opts.depth, fibers: opts.fibers)
spider.visit_urls_like(Regex.new(url.to_s)) spider.visit_urls_like(Regex.new(url.to_s))
opts.ignore.each do |pattern| opts.ignore.each do |pattern|
@ -21,6 +21,15 @@ module Arachnid
spider.ignore_urls_like(pattern) spider.ignore_urls_like(pattern)
end end
opts.match.each do |pattern|
pattern = Regex.new(pattern)
spider.visit_urls_like(pattern)
end
spider.every_html_page do |page|
spinner.message = "Scanning #{page.url.to_s}"
end
spider.every_image do |res| spider.every_image do |res|
next if opts.minsize && res.body.bytesize < opts.minsize.not_nil! next if opts.minsize && res.body.bytesize < opts.minsize.not_nil!
next if opts.maxsize && res.body.bytesize > opts.maxsize.not_nil! next if opts.maxsize && res.body.bytesize > opts.maxsize.not_nil!
@ -52,7 +61,7 @@ module Arachnid
ext = extensions.first? || ".unknown" ext = extensions.first? || ".unknown"
end end
filename = basename + ext "#{basename}-#{index}#{ext}"
end end
end end
end end

View File

@ -16,7 +16,7 @@ module Arachnid
spinner = Spinner::Spinner.new("Wait...") spinner = Spinner::Spinner.new("Wait...")
spider = Arachnid::Agent.new(fibers: opts.fibers) spider = Arachnid::Agent.new(fibers: opts.fibers)
spider.visit_urls_like(Regex.new(Regex.escape(url.to_s))) spider.visit_urls_like(Regex.new(url.to_s))
opts.ignore.each do |pattern| opts.ignore.each do |pattern|
pattern = Regex.new(pattern) pattern = Regex.new(pattern)

View File

@ -6,14 +6,12 @@ module Arachnid
class Cli < Clim class Cli < Clim
class Summarize < Cli::Action class Summarize < Cli::Action
def run(opts, urls) def run(opts, args)
url = URI.parse(args[0])
spinner = Spinner::Spinner.new("Wait...") spinner = Spinner::Spinner.new("Wait...")
spider = Arachnid::Agent.new(limit: opts.limit, fibers: opts.fibers) spider = Arachnid::Agent.new(limit: opts.limit, fibers: opts.fibers)
spider.visit_urls_like(Regex.new(url.to_s))
urls.each do |url|
spider.visit_urls_like(Regex.new(url))
end
opts.ignore.each do |pattern| opts.ignore.each do |pattern|
pattern = Regex.new(pattern) pattern = Regex.new(pattern)
@ -50,7 +48,7 @@ module Arachnid
end end
end end
spider.start_at(urls[0]) spider.start_at(url)
spinner.stop("Finished scanning!\n") spinner.stop("Finished scanning!\n")
generate_report( generate_report(