From 611a0091cbdb82f0bb79a8c9208eab4465bf60b3 Mon Sep 17 00:00:00 2001 From: Chris Watson Date: Sun, 30 Jun 2019 14:07:41 -0700 Subject: [PATCH] Added cli --- shard.yml | 9 ++++ spec/arachnid/agent_spec.cr | 0 spec/arachnid/auth_credential_spec.cr | 0 spec/arachnid/auth_store_spec.cr | 0 spec/arachnid/cookie_jar_spec.cr | 0 spec/arachnid/resource_spec.cr | 0 spec/arachnid/rules_spec.cr | 29 +++++++++++ spec/arachnid/session_cache_spec.cr | 0 spec/arachnid_spec.cr | 5 ++ spec/crepe_spec.cr | 9 ---- src/arachnid.cr | 1 - src/arachnid/arachnid.cr | 1 + src/arachnid/auth_store.cr | 2 +- src/arachnid/cli.cr | 39 +++++++++++++++ src/arachnid/cli/action.cr | 9 ++++ src/arachnid/cli/count.cr | 72 +++++++++++++++++++++++++++ 16 files changed, 165 insertions(+), 11 deletions(-) create mode 100644 spec/arachnid/agent_spec.cr create mode 100644 spec/arachnid/auth_credential_spec.cr create mode 100644 spec/arachnid/auth_store_spec.cr create mode 100644 spec/arachnid/cookie_jar_spec.cr create mode 100644 spec/arachnid/resource_spec.cr create mode 100644 spec/arachnid/rules_spec.cr create mode 100644 spec/arachnid/session_cache_spec.cr create mode 100644 spec/arachnid_spec.cr delete mode 100644 spec/crepe_spec.cr create mode 100644 src/arachnid/cli.cr create mode 100644 src/arachnid/cli/action.cr create mode 100644 src/arachnid/cli/count.cr diff --git a/shard.yml b/shard.yml index 37b3c96..f7c1ac7 100644 --- a/shard.yml +++ b/shard.yml @@ -8,6 +8,15 @@ dependencies: halite: github: icyleaf/halite version: ~> 0.10.2 + termspinner: + github: eliobr/termspinner + clim: + github: at-grandpa/clim + version: 0.7.0 + +targets: + arachnid: + main: src/arachnid/cli.cr crystal: 0.29.0 diff --git a/spec/arachnid/agent_spec.cr b/spec/arachnid/agent_spec.cr new file mode 100644 index 0000000..e69de29 diff --git a/spec/arachnid/auth_credential_spec.cr b/spec/arachnid/auth_credential_spec.cr new file mode 100644 index 0000000..e69de29 diff --git a/spec/arachnid/auth_store_spec.cr b/spec/arachnid/auth_store_spec.cr new file mode 100644 index 0000000..e69de29 diff --git a/spec/arachnid/cookie_jar_spec.cr b/spec/arachnid/cookie_jar_spec.cr new file mode 100644 index 0000000..e69de29 diff --git a/spec/arachnid/resource_spec.cr b/spec/arachnid/resource_spec.cr new file mode 100644 index 0000000..e69de29 diff --git a/spec/arachnid/rules_spec.cr b/spec/arachnid/rules_spec.cr new file mode 100644 index 0000000..df88765 --- /dev/null +++ b/spec/arachnid/rules_spec.cr @@ -0,0 +1,29 @@ +require "../spec_helper" + +describe Arachnid::Rules do + it "should create a new Rules object" do + rules = Arachnid::Rules(String).new + rules.should_not be_nil + rules.accept.should be_empty + rules.reject.should be_empty + end + + it "should allow values for 'accept' and 'reject' in initializer" do + accept_proc = ->(string : String) { true } + reject_proc = ->(string : String) { false } + + rules = Arachnid::Rules(String).new(accept: [accept_proc], reject: [reject_proc]) + rules.accept.should contain accept_proc + rules.reject.should contain reject_proc + end + + describe "#accept?" do + + + + end + + describe "#reject?" do + + end +end diff --git a/spec/arachnid/session_cache_spec.cr b/spec/arachnid/session_cache_spec.cr new file mode 100644 index 0000000..e69de29 diff --git a/spec/arachnid_spec.cr b/spec/arachnid_spec.cr new file mode 100644 index 0000000..60fa79e --- /dev/null +++ b/spec/arachnid_spec.cr @@ -0,0 +1,5 @@ +require "./spec_helper" + +describe Arachnid do + +end diff --git a/spec/crepe_spec.cr b/spec/crepe_spec.cr deleted file mode 100644 index 7f2574a..0000000 --- a/spec/crepe_spec.cr +++ /dev/null @@ -1,9 +0,0 @@ -require "./spec_helper" - -describe Arachnid do - # TODO: Write tests - - it "works" do - false.should eq(true) - end -end diff --git a/src/arachnid.cr b/src/arachnid.cr index 21b7dd2..5ed08a4 100644 --- a/src/arachnid.cr +++ b/src/arachnid.cr @@ -1,2 +1 @@ -require "./arachnid/version" require "./arachnid/arachnid" diff --git a/src/arachnid/arachnid.cr b/src/arachnid/arachnid.cr index 44ceea1..33b8ebd 100644 --- a/src/arachnid/arachnid.cr +++ b/src/arachnid/arachnid.cr @@ -1,3 +1,4 @@ +require "./version" require "./resource" require "./agent" diff --git a/src/arachnid/auth_store.cr b/src/arachnid/auth_store.cr index bd41f66..39250b9 100644 --- a/src/arachnid/auth_store.cr +++ b/src/arachnid/auth_store.cr @@ -18,7 +18,7 @@ module Arachnid return nil unless paths # longest path first - ordered_paths = paths.keys.sort { |path_key| -path_key.size } + ordered_paths = paths.keys.sort { |path_key| -path_key.size } # directories of the path path_dirs = URI.expand_path(url.path).split('/').reject(&.empty?) diff --git a/src/arachnid/cli.cr b/src/arachnid/cli.cr new file mode 100644 index 0000000..d4071f7 --- /dev/null +++ b/src/arachnid/cli.cr @@ -0,0 +1,39 @@ +require "clim" +require "./version" +require "./cli/**" + +module Arachnid + class Cli < Clim + main do + desc "Arachnid CLI - Simple utilities for scanning the web." + usage "arachnid [options] [subcommand] [arguments] ..." + version Arachnid::VERSION + run do |opts, args| + puts opts.help_string # => help string. + end + + sub "summarize" do + desc "Scan a site (or sites) and generate a JSON report" + usage "arachnid summarize [sites] [options] ..." + + option "-l", "--ilinks", type: Bool, desc: "generate a map of pages to internal links" + option "-L", "--elinks", type: Bool, desc: "generate a map of pages to external links" + option "-c CODES", "--codes=CODES", type: Array(Int32), desc: "generate a map of status codes to pages \ + that responded with that code" + option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan" + option "-o FILE", "--output=FILE", type: String, desc: "file to write the report to", default: "arachnid.json" + + run do |opts, args| + count = Arachnid::Cli::Count.new + if args.empty? + STDERR.puts "At least one site is required" + else + count.run(opts, args) + end + end + end + end + end +end + +Arachnid::Cli.start(ARGV) diff --git a/src/arachnid/cli/action.cr b/src/arachnid/cli/action.cr new file mode 100644 index 0000000..6cd05c4 --- /dev/null +++ b/src/arachnid/cli/action.cr @@ -0,0 +1,9 @@ +module Arachnid + class Cli < Clim + abstract class Action + + abstract def run(opts, args) : Nil + + end + end +end diff --git a/src/arachnid/cli/count.cr b/src/arachnid/cli/count.cr new file mode 100644 index 0000000..8572363 --- /dev/null +++ b/src/arachnid/cli/count.cr @@ -0,0 +1,72 @@ +require "./action" +require "../arachnid" +require "termspinner" +require "json" + +module Arachnid + class Cli < Clim + class Count < Cli::Action + + def run(opts, urls) + spinner = Spinner::Spinner.new("Wait...") + + spider = Arachnid::Agent.new(limit: opts.limit) + + urls.each do |url| + spider.visit_urls_like(Regex.new(url)) + end + + pages = 0 + internal_links = Hash(String, Array(String)).new + external_links = Hash(String, Array(String)).new + codes = Hash(Int32, Array(String)).new + + spinner.start("Crawling...") + + spider.every_resource do |page| + pages += 1 + + if opts.codes.includes?(page.code) + codes[page.code] ||= [] of String + codes[page.code] << page.url.to_s + end + + spinner.message = "Scanning #{page.url.to_s}" + end + + spider.every_link do |orig, dest| + if dest.to_s.includes?(orig.to_s) || dest.relative? + internal_links[orig.to_s] ||= [] of String + internal_links[orig.to_s] << dest.to_s + else + external_links[orig.to_s] ||= [] of String + external_links[orig.to_s] << dest.to_s + end + end + + spider.start_at(urls[0]) + spinner.stop("Finished scanning!\n") + + generate_report( + opts.output, + pages, + opts.ilinks ? internal_links : nil, + opts.elinks ? external_links : nil, + opts.codes.empty? ? nil : codes + ) + end + + def generate_report(outfile, pages, internal_links, external_links, codes) + report = {} of String => Hash(String, Array(String)) | Hash(Int32, Array(String)) | Int32 + + report["pages"] = pages + report["internal_links"] = internal_links if internal_links + report["external_links"] = external_links if external_links + report["codes"] = codes if codes + + File.write(outfile, report.to_json, mode: "w+") + puts "Report saved to #{outfile}" + end + end + end +end