Added cli
This commit is contained in:
parent
6f63c92780
commit
611a0091cb
|
@ -8,6 +8,15 @@ dependencies:
|
|||
halite:
|
||||
github: icyleaf/halite
|
||||
version: ~> 0.10.2
|
||||
termspinner:
|
||||
github: eliobr/termspinner
|
||||
clim:
|
||||
github: at-grandpa/clim
|
||||
version: 0.7.0
|
||||
|
||||
targets:
|
||||
arachnid:
|
||||
main: src/arachnid/cli.cr
|
||||
|
||||
crystal: 0.29.0
|
||||
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
require "../spec_helper"
|
||||
|
||||
describe Arachnid::Rules do
|
||||
it "should create a new Rules object" do
|
||||
rules = Arachnid::Rules(String).new
|
||||
rules.should_not be_nil
|
||||
rules.accept.should be_empty
|
||||
rules.reject.should be_empty
|
||||
end
|
||||
|
||||
it "should allow values for 'accept' and 'reject' in initializer" do
|
||||
accept_proc = ->(string : String) { true }
|
||||
reject_proc = ->(string : String) { false }
|
||||
|
||||
rules = Arachnid::Rules(String).new(accept: [accept_proc], reject: [reject_proc])
|
||||
rules.accept.should contain accept_proc
|
||||
rules.reject.should contain reject_proc
|
||||
end
|
||||
|
||||
describe "#accept?" do
|
||||
|
||||
|
||||
|
||||
end
|
||||
|
||||
describe "#reject?" do
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,5 @@
|
|||
require "./spec_helper"
|
||||
|
||||
describe Arachnid do
|
||||
|
||||
end
|
|
@ -1,9 +0,0 @@
|
|||
require "./spec_helper"
|
||||
|
||||
describe Arachnid do
|
||||
# TODO: Write tests
|
||||
|
||||
it "works" do
|
||||
false.should eq(true)
|
||||
end
|
||||
end
|
|
@ -1,2 +1 @@
|
|||
require "./arachnid/version"
|
||||
require "./arachnid/arachnid"
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
require "./version"
|
||||
require "./resource"
|
||||
require "./agent"
|
||||
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
require "clim"
|
||||
require "./version"
|
||||
require "./cli/**"
|
||||
|
||||
module Arachnid
|
||||
class Cli < Clim
|
||||
main do
|
||||
desc "Arachnid CLI - Simple utilities for scanning the web."
|
||||
usage "arachnid [options] [subcommand] [arguments] ..."
|
||||
version Arachnid::VERSION
|
||||
run do |opts, args|
|
||||
puts opts.help_string # => help string.
|
||||
end
|
||||
|
||||
sub "summarize" do
|
||||
desc "Scan a site (or sites) and generate a JSON report"
|
||||
usage "arachnid summarize [sites] [options] ..."
|
||||
|
||||
option "-l", "--ilinks", type: Bool, desc: "generate a map of pages to internal links"
|
||||
option "-L", "--elinks", type: Bool, desc: "generate a map of pages to external links"
|
||||
option "-c CODES", "--codes=CODES", type: Array(Int32), desc: "generate a map of status codes to pages \
|
||||
that responded with that code"
|
||||
option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan"
|
||||
option "-o FILE", "--output=FILE", type: String, desc: "file to write the report to", default: "arachnid.json"
|
||||
|
||||
run do |opts, args|
|
||||
count = Arachnid::Cli::Count.new
|
||||
if args.empty?
|
||||
STDERR.puts "At least one site is required"
|
||||
else
|
||||
count.run(opts, args)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Arachnid::Cli.start(ARGV)
|
|
@ -0,0 +1,9 @@
|
|||
module Arachnid
|
||||
class Cli < Clim
|
||||
abstract class Action
|
||||
|
||||
abstract def run(opts, args) : Nil
|
||||
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,72 @@
|
|||
require "./action"
|
||||
require "../arachnid"
|
||||
require "termspinner"
|
||||
require "json"
|
||||
|
||||
module Arachnid
|
||||
class Cli < Clim
|
||||
class Count < Cli::Action
|
||||
|
||||
def run(opts, urls)
|
||||
spinner = Spinner::Spinner.new("Wait...")
|
||||
|
||||
spider = Arachnid::Agent.new(limit: opts.limit)
|
||||
|
||||
urls.each do |url|
|
||||
spider.visit_urls_like(Regex.new(url))
|
||||
end
|
||||
|
||||
pages = 0
|
||||
internal_links = Hash(String, Array(String)).new
|
||||
external_links = Hash(String, Array(String)).new
|
||||
codes = Hash(Int32, Array(String)).new
|
||||
|
||||
spinner.start("Crawling...")
|
||||
|
||||
spider.every_resource do |page|
|
||||
pages += 1
|
||||
|
||||
if opts.codes.includes?(page.code)
|
||||
codes[page.code] ||= [] of String
|
||||
codes[page.code] << page.url.to_s
|
||||
end
|
||||
|
||||
spinner.message = "Scanning #{page.url.to_s}"
|
||||
end
|
||||
|
||||
spider.every_link do |orig, dest|
|
||||
if dest.to_s.includes?(orig.to_s) || dest.relative?
|
||||
internal_links[orig.to_s] ||= [] of String
|
||||
internal_links[orig.to_s] << dest.to_s
|
||||
else
|
||||
external_links[orig.to_s] ||= [] of String
|
||||
external_links[orig.to_s] << dest.to_s
|
||||
end
|
||||
end
|
||||
|
||||
spider.start_at(urls[0])
|
||||
spinner.stop("Finished scanning!\n")
|
||||
|
||||
generate_report(
|
||||
opts.output,
|
||||
pages,
|
||||
opts.ilinks ? internal_links : nil,
|
||||
opts.elinks ? external_links : nil,
|
||||
opts.codes.empty? ? nil : codes
|
||||
)
|
||||
end
|
||||
|
||||
def generate_report(outfile, pages, internal_links, external_links, codes)
|
||||
report = {} of String => Hash(String, Array(String)) | Hash(Int32, Array(String)) | Int32
|
||||
|
||||
report["pages"] = pages
|
||||
report["internal_links"] = internal_links if internal_links
|
||||
report["external_links"] = external_links if external_links
|
||||
report["codes"] = codes if codes
|
||||
|
||||
File.write(outfile, report.to_json, mode: "w+")
|
||||
puts "Report saved to #{outfile}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue