Added cli
This commit is contained in:
parent
6f63c92780
commit
611a0091cb
|
@ -8,6 +8,15 @@ dependencies:
|
||||||
halite:
|
halite:
|
||||||
github: icyleaf/halite
|
github: icyleaf/halite
|
||||||
version: ~> 0.10.2
|
version: ~> 0.10.2
|
||||||
|
termspinner:
|
||||||
|
github: eliobr/termspinner
|
||||||
|
clim:
|
||||||
|
github: at-grandpa/clim
|
||||||
|
version: 0.7.0
|
||||||
|
|
||||||
|
targets:
|
||||||
|
arachnid:
|
||||||
|
main: src/arachnid/cli.cr
|
||||||
|
|
||||||
crystal: 0.29.0
|
crystal: 0.29.0
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
require "../spec_helper"
|
||||||
|
|
||||||
|
describe Arachnid::Rules do
|
||||||
|
it "should create a new Rules object" do
|
||||||
|
rules = Arachnid::Rules(String).new
|
||||||
|
rules.should_not be_nil
|
||||||
|
rules.accept.should be_empty
|
||||||
|
rules.reject.should be_empty
|
||||||
|
end
|
||||||
|
|
||||||
|
it "should allow values for 'accept' and 'reject' in initializer" do
|
||||||
|
accept_proc = ->(string : String) { true }
|
||||||
|
reject_proc = ->(string : String) { false }
|
||||||
|
|
||||||
|
rules = Arachnid::Rules(String).new(accept: [accept_proc], reject: [reject_proc])
|
||||||
|
rules.accept.should contain accept_proc
|
||||||
|
rules.reject.should contain reject_proc
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "#accept?" do
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "#reject?" do
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,5 @@
|
||||||
|
require "./spec_helper"
|
||||||
|
|
||||||
|
describe Arachnid do
|
||||||
|
|
||||||
|
end
|
|
@ -1,9 +0,0 @@
|
||||||
require "./spec_helper"
|
|
||||||
|
|
||||||
describe Arachnid do
|
|
||||||
# TODO: Write tests
|
|
||||||
|
|
||||||
it "works" do
|
|
||||||
false.should eq(true)
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,2 +1 @@
|
||||||
require "./arachnid/version"
|
|
||||||
require "./arachnid/arachnid"
|
require "./arachnid/arachnid"
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
require "./version"
|
||||||
require "./resource"
|
require "./resource"
|
||||||
require "./agent"
|
require "./agent"
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ module Arachnid
|
||||||
return nil unless paths
|
return nil unless paths
|
||||||
|
|
||||||
# longest path first
|
# longest path first
|
||||||
ordered_paths = paths.keys.sort { |path_key| -path_key.size }
|
ordered_paths = paths.keys.sort { |path_key| -path_key.size }
|
||||||
|
|
||||||
# directories of the path
|
# directories of the path
|
||||||
path_dirs = URI.expand_path(url.path).split('/').reject(&.empty?)
|
path_dirs = URI.expand_path(url.path).split('/').reject(&.empty?)
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
require "clim"
|
||||||
|
require "./version"
|
||||||
|
require "./cli/**"
|
||||||
|
|
||||||
|
module Arachnid
|
||||||
|
class Cli < Clim
|
||||||
|
main do
|
||||||
|
desc "Arachnid CLI - Simple utilities for scanning the web."
|
||||||
|
usage "arachnid [options] [subcommand] [arguments] ..."
|
||||||
|
version Arachnid::VERSION
|
||||||
|
run do |opts, args|
|
||||||
|
puts opts.help_string # => help string.
|
||||||
|
end
|
||||||
|
|
||||||
|
sub "summarize" do
|
||||||
|
desc "Scan a site (or sites) and generate a JSON report"
|
||||||
|
usage "arachnid summarize [sites] [options] ..."
|
||||||
|
|
||||||
|
option "-l", "--ilinks", type: Bool, desc: "generate a map of pages to internal links"
|
||||||
|
option "-L", "--elinks", type: Bool, desc: "generate a map of pages to external links"
|
||||||
|
option "-c CODES", "--codes=CODES", type: Array(Int32), desc: "generate a map of status codes to pages \
|
||||||
|
that responded with that code"
|
||||||
|
option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan"
|
||||||
|
option "-o FILE", "--output=FILE", type: String, desc: "file to write the report to", default: "arachnid.json"
|
||||||
|
|
||||||
|
run do |opts, args|
|
||||||
|
count = Arachnid::Cli::Count.new
|
||||||
|
if args.empty?
|
||||||
|
STDERR.puts "At least one site is required"
|
||||||
|
else
|
||||||
|
count.run(opts, args)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
Arachnid::Cli.start(ARGV)
|
|
@ -0,0 +1,9 @@
|
||||||
|
module Arachnid
|
||||||
|
class Cli < Clim
|
||||||
|
abstract class Action
|
||||||
|
|
||||||
|
abstract def run(opts, args) : Nil
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,72 @@
|
||||||
|
require "./action"
|
||||||
|
require "../arachnid"
|
||||||
|
require "termspinner"
|
||||||
|
require "json"
|
||||||
|
|
||||||
|
module Arachnid
|
||||||
|
class Cli < Clim
|
||||||
|
class Count < Cli::Action
|
||||||
|
|
||||||
|
def run(opts, urls)
|
||||||
|
spinner = Spinner::Spinner.new("Wait...")
|
||||||
|
|
||||||
|
spider = Arachnid::Agent.new(limit: opts.limit)
|
||||||
|
|
||||||
|
urls.each do |url|
|
||||||
|
spider.visit_urls_like(Regex.new(url))
|
||||||
|
end
|
||||||
|
|
||||||
|
pages = 0
|
||||||
|
internal_links = Hash(String, Array(String)).new
|
||||||
|
external_links = Hash(String, Array(String)).new
|
||||||
|
codes = Hash(Int32, Array(String)).new
|
||||||
|
|
||||||
|
spinner.start("Crawling...")
|
||||||
|
|
||||||
|
spider.every_resource do |page|
|
||||||
|
pages += 1
|
||||||
|
|
||||||
|
if opts.codes.includes?(page.code)
|
||||||
|
codes[page.code] ||= [] of String
|
||||||
|
codes[page.code] << page.url.to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
spinner.message = "Scanning #{page.url.to_s}"
|
||||||
|
end
|
||||||
|
|
||||||
|
spider.every_link do |orig, dest|
|
||||||
|
if dest.to_s.includes?(orig.to_s) || dest.relative?
|
||||||
|
internal_links[orig.to_s] ||= [] of String
|
||||||
|
internal_links[orig.to_s] << dest.to_s
|
||||||
|
else
|
||||||
|
external_links[orig.to_s] ||= [] of String
|
||||||
|
external_links[orig.to_s] << dest.to_s
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
spider.start_at(urls[0])
|
||||||
|
spinner.stop("Finished scanning!\n")
|
||||||
|
|
||||||
|
generate_report(
|
||||||
|
opts.output,
|
||||||
|
pages,
|
||||||
|
opts.ilinks ? internal_links : nil,
|
||||||
|
opts.elinks ? external_links : nil,
|
||||||
|
opts.codes.empty? ? nil : codes
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
def generate_report(outfile, pages, internal_links, external_links, codes)
|
||||||
|
report = {} of String => Hash(String, Array(String)) | Hash(Int32, Array(String)) | Int32
|
||||||
|
|
||||||
|
report["pages"] = pages
|
||||||
|
report["internal_links"] = internal_links if internal_links
|
||||||
|
report["external_links"] = external_links if external_links
|
||||||
|
report["codes"] = codes if codes
|
||||||
|
|
||||||
|
File.write(outfile, report.to_json, mode: "w+")
|
||||||
|
puts "Report saved to #{outfile}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue