Added cli

This commit is contained in:
Chris Watson 2019-06-30 14:07:41 -07:00
parent 6f63c92780
commit 611a0091cb
No known key found for this signature in database
GPG Key ID: 37DAEF5F446370A4
16 changed files with 165 additions and 11 deletions

View File

@ -8,6 +8,15 @@ dependencies:
halite:
github: icyleaf/halite
version: ~> 0.10.2
termspinner:
github: eliobr/termspinner
clim:
github: at-grandpa/clim
version: 0.7.0
targets:
arachnid:
main: src/arachnid/cli.cr
crystal: 0.29.0

View File

View File

View File

View File

View File

View File

@ -0,0 +1,29 @@
require "../spec_helper"
describe Arachnid::Rules do
it "should create a new Rules object" do
rules = Arachnid::Rules(String).new
rules.should_not be_nil
rules.accept.should be_empty
rules.reject.should be_empty
end
it "should allow values for 'accept' and 'reject' in initializer" do
accept_proc = ->(string : String) { true }
reject_proc = ->(string : String) { false }
rules = Arachnid::Rules(String).new(accept: [accept_proc], reject: [reject_proc])
rules.accept.should contain accept_proc
rules.reject.should contain reject_proc
end
describe "#accept?" do
end
describe "#reject?" do
end
end

View File

5
spec/arachnid_spec.cr Normal file
View File

@ -0,0 +1,5 @@
require "./spec_helper"
describe Arachnid do
end

View File

@ -1,9 +0,0 @@
require "./spec_helper"
describe Arachnid do
# TODO: Write tests
it "works" do
false.should eq(true)
end
end

View File

@ -1,2 +1 @@
require "./arachnid/version"
require "./arachnid/arachnid"

View File

@ -1,3 +1,4 @@
require "./version"
require "./resource"
require "./agent"

View File

@ -18,7 +18,7 @@ module Arachnid
return nil unless paths
# longest path first
ordered_paths = paths.keys.sort { |path_key| -path_key.size }
ordered_paths = paths.keys.sort { |path_key| -path_key.size }
# directories of the path
path_dirs = URI.expand_path(url.path).split('/').reject(&.empty?)

39
src/arachnid/cli.cr Normal file
View File

@ -0,0 +1,39 @@
require "clim"
require "./version"
require "./cli/**"
module Arachnid
class Cli < Clim
main do
desc "Arachnid CLI - Simple utilities for scanning the web."
usage "arachnid [options] [subcommand] [arguments] ..."
version Arachnid::VERSION
run do |opts, args|
puts opts.help_string # => help string.
end
sub "summarize" do
desc "Scan a site (or sites) and generate a JSON report"
usage "arachnid summarize [sites] [options] ..."
option "-l", "--ilinks", type: Bool, desc: "generate a map of pages to internal links"
option "-L", "--elinks", type: Bool, desc: "generate a map of pages to external links"
option "-c CODES", "--codes=CODES", type: Array(Int32), desc: "generate a map of status codes to pages \
that responded with that code"
option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan"
option "-o FILE", "--output=FILE", type: String, desc: "file to write the report to", default: "arachnid.json"
run do |opts, args|
count = Arachnid::Cli::Count.new
if args.empty?
STDERR.puts "At least one site is required"
else
count.run(opts, args)
end
end
end
end
end
end
Arachnid::Cli.start(ARGV)

View File

@ -0,0 +1,9 @@
module Arachnid
class Cli < Clim
abstract class Action
abstract def run(opts, args) : Nil
end
end
end

72
src/arachnid/cli/count.cr Normal file
View File

@ -0,0 +1,72 @@
require "./action"
require "../arachnid"
require "termspinner"
require "json"
module Arachnid
class Cli < Clim
class Count < Cli::Action
def run(opts, urls)
spinner = Spinner::Spinner.new("Wait...")
spider = Arachnid::Agent.new(limit: opts.limit)
urls.each do |url|
spider.visit_urls_like(Regex.new(url))
end
pages = 0
internal_links = Hash(String, Array(String)).new
external_links = Hash(String, Array(String)).new
codes = Hash(Int32, Array(String)).new
spinner.start("Crawling...")
spider.every_resource do |page|
pages += 1
if opts.codes.includes?(page.code)
codes[page.code] ||= [] of String
codes[page.code] << page.url.to_s
end
spinner.message = "Scanning #{page.url.to_s}"
end
spider.every_link do |orig, dest|
if dest.to_s.includes?(orig.to_s) || dest.relative?
internal_links[orig.to_s] ||= [] of String
internal_links[orig.to_s] << dest.to_s
else
external_links[orig.to_s] ||= [] of String
external_links[orig.to_s] << dest.to_s
end
end
spider.start_at(urls[0])
spinner.stop("Finished scanning!\n")
generate_report(
opts.output,
pages,
opts.ilinks ? internal_links : nil,
opts.elinks ? external_links : nil,
opts.codes.empty? ? nil : codes
)
end
def generate_report(outfile, pages, internal_links, external_links, codes)
report = {} of String => Hash(String, Array(String)) | Hash(Int32, Array(String)) | Int32
report["pages"] = pages
report["internal_links"] = internal_links if internal_links
report["external_links"] = external_links if external_links
report["codes"] = codes if codes
File.write(outfile, report.to_json, mode: "w+")
puts "Report saved to #{outfile}"
end
end
end
end