Updated cli to use fibers

This commit is contained in:
Chris Watson 2019-07-01 09:17:41 -07:00
parent 4db9350336
commit bfe66ad290
No known key found for this signature in database
GPG Key ID: 37DAEF5F446370A4
4 changed files with 8 additions and 2 deletions

View File

@ -96,6 +96,9 @@ module Arachnid
@fetch_delay = fetch_delay || 0
@history = history || Set(URI).new
@failures = Set(URI).new
raise "Cannot have less than 1 fiber" unless fibers.nil? || fibers > 0
@queue = Queue(URI).new(queue, fibers)
@limit = limit

View File

@ -8,6 +8,7 @@ module Arachnid
desc "Arachnid CLI - Simple utilities for scanning the web."
usage "arachnid [options] [subcommand] [arguments] ..."
version Arachnid::VERSION
run do |opts, args|
puts opts.help_string # => help string.
end
@ -37,6 +38,7 @@ module Arachnid
option "-c CODES", "--codes=CODES", type: Array(Int32), desc: "generate a map of status codes to pages \
that responded with that code"
option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan"
option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10
option "-o FILE", "--output=FILE", type: String, desc: "file to write the report to (if undefined \
output will be printed to STDOUT"
@ -72,6 +74,7 @@ module Arachnid
option "--json", type: Bool, desc: "generate the sitemap in JSON format"
option "-o FILE", "--output=FILE", type: String, desc: "filename to write the report to. \
default is the hostname + .json or .xml"
option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10
run do |opts, args|
if args.size != 1

View File

@ -15,7 +15,7 @@ module Arachnid
date = Time.now
spinner = Spinner::Spinner.new("Wait...")
spider = Arachnid::Agent.new
spider = Arachnid::Agent.new(fibers: opts.fibers)
spider.visit_urls_like(Regex.new(Regex.escape(url.to_s)))
map = {

View File

@ -10,7 +10,7 @@ module Arachnid
def run(opts, urls)
spinner = Spinner::Spinner.new("Wait...")
spider = Arachnid::Agent.new(limit: opts.limit)
spider = Arachnid::Agent.new(limit: opts.limit, fibers: opts.fibers)
urls.each do |url|
spider.visit_urls_like(Regex.new(url))