Fix some things
This commit is contained in:
parent
642ff912b5
commit
3ccc63da7d
|
@ -70,11 +70,7 @@ module Arachnid
|
||||||
@pool.shift.get
|
@pool.shift.get
|
||||||
end
|
end
|
||||||
|
|
||||||
break if @paused
|
break if @paused || @queue.empty?
|
||||||
if @queue.empty?
|
|
||||||
sleep(1)
|
|
||||||
break if @queue.empty?
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -105,8 +105,10 @@ module Arachnid
|
||||||
USAGE
|
USAGE
|
||||||
|
|
||||||
option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan"
|
option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan"
|
||||||
|
option "-d", "--depth NUM", type: Int32, desc: "maximum depth to scan"
|
||||||
option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10
|
option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10
|
||||||
option "-i", "--ignore PATTERNS", type: Array(String), desc: "url patterns to ignore (regex)"
|
option "-i", "--ignore PATTERNS", type: Array(String), desc: "url patterns to ignore (regex)"
|
||||||
|
option "-a", "--match PATTERNS", type: Array(String), desc: "url patterns to match (regex)"
|
||||||
option "-o DIR", "--outdir=DIR", type: String, desc: "directory to save images to", default: "./imgd-downloads"
|
option "-o DIR", "--outdir=DIR", type: String, desc: "directory to save images to", default: "./imgd-downloads"
|
||||||
option "-m NUM", "--minsize=NUM", type: Int32, desc: "image minimum size (in bytes)"
|
option "-m NUM", "--minsize=NUM", type: Int32, desc: "image minimum size (in bytes)"
|
||||||
option "-x NUM", "--maxsize=NUM", type: Int32, desc: "image maximum size (in bytes)"
|
option "-x NUM", "--maxsize=NUM", type: Int32, desc: "image maximum size (in bytes)"
|
||||||
|
|
|
@ -13,7 +13,7 @@ module Arachnid
|
||||||
count = 0
|
count = 0
|
||||||
outdir = File.expand_path(opts.outdir, __DIR__)
|
outdir = File.expand_path(opts.outdir, __DIR__)
|
||||||
|
|
||||||
spider = Arachnid::Agent.new(limit: opts.limit, fibers: opts.fibers)
|
spider = Arachnid::Agent.new(limit: opts.limit, max_depth: opts.depth, fibers: opts.fibers)
|
||||||
spider.visit_urls_like(Regex.new(url.to_s))
|
spider.visit_urls_like(Regex.new(url.to_s))
|
||||||
|
|
||||||
opts.ignore.each do |pattern|
|
opts.ignore.each do |pattern|
|
||||||
|
@ -21,6 +21,15 @@ module Arachnid
|
||||||
spider.ignore_urls_like(pattern)
|
spider.ignore_urls_like(pattern)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
opts.match.each do |pattern|
|
||||||
|
pattern = Regex.new(pattern)
|
||||||
|
spider.visit_urls_like(pattern)
|
||||||
|
end
|
||||||
|
|
||||||
|
spider.every_html_page do |page|
|
||||||
|
spinner.message = "Scanning #{page.url.to_s}"
|
||||||
|
end
|
||||||
|
|
||||||
spider.every_image do |res|
|
spider.every_image do |res|
|
||||||
next if opts.minsize && res.body.bytesize < opts.minsize.not_nil!
|
next if opts.minsize && res.body.bytesize < opts.minsize.not_nil!
|
||||||
next if opts.maxsize && res.body.bytesize > opts.maxsize.not_nil!
|
next if opts.maxsize && res.body.bytesize > opts.maxsize.not_nil!
|
||||||
|
@ -52,7 +61,7 @@ module Arachnid
|
||||||
ext = extensions.first? || ".unknown"
|
ext = extensions.first? || ".unknown"
|
||||||
end
|
end
|
||||||
|
|
||||||
filename = basename + ext
|
"#{basename}-#{index}#{ext}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -16,7 +16,7 @@ module Arachnid
|
||||||
spinner = Spinner::Spinner.new("Wait...")
|
spinner = Spinner::Spinner.new("Wait...")
|
||||||
|
|
||||||
spider = Arachnid::Agent.new(fibers: opts.fibers)
|
spider = Arachnid::Agent.new(fibers: opts.fibers)
|
||||||
spider.visit_urls_like(Regex.new(Regex.escape(url.to_s)))
|
spider.visit_urls_like(Regex.new(url.to_s))
|
||||||
|
|
||||||
opts.ignore.each do |pattern|
|
opts.ignore.each do |pattern|
|
||||||
pattern = Regex.new(pattern)
|
pattern = Regex.new(pattern)
|
||||||
|
|
|
@ -6,14 +6,12 @@ module Arachnid
|
||||||
class Cli < Clim
|
class Cli < Clim
|
||||||
class Summarize < Cli::Action
|
class Summarize < Cli::Action
|
||||||
|
|
||||||
def run(opts, urls)
|
def run(opts, args)
|
||||||
|
url = URI.parse(args[0])
|
||||||
spinner = Spinner::Spinner.new("Wait...")
|
spinner = Spinner::Spinner.new("Wait...")
|
||||||
|
|
||||||
spider = Arachnid::Agent.new(limit: opts.limit, fibers: opts.fibers)
|
spider = Arachnid::Agent.new(limit: opts.limit, fibers: opts.fibers)
|
||||||
|
spider.visit_urls_like(Regex.new(url.to_s))
|
||||||
urls.each do |url|
|
|
||||||
spider.visit_urls_like(Regex.new(url))
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.ignore.each do |pattern|
|
opts.ignore.each do |pattern|
|
||||||
pattern = Regex.new(pattern)
|
pattern = Regex.new(pattern)
|
||||||
|
@ -50,7 +48,7 @@ module Arachnid
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
spider.start_at(urls[0])
|
spider.start_at(url)
|
||||||
spinner.stop("Finished scanning!\n")
|
spinner.stop("Finished scanning!\n")
|
||||||
|
|
||||||
generate_report(
|
generate_report(
|
||||||
|
|
Loading…
Reference in New Issue