Added ignore option to cli
This commit is contained in:
parent
19e022faae
commit
18eb31c0ce
|
@ -33,14 +33,15 @@ module Arachnid
|
||||||
arachnid summarize https://crystal-lang.org -c 404 500
|
arachnid summarize https://crystal-lang.org -c 404 500
|
||||||
USAGE
|
USAGE
|
||||||
|
|
||||||
option "-l", "--ilinks", type: Bool, desc: "generate a map of pages to internal links"
|
option "-l", "--ilinks", type: Bool, desc: "generate a map of pages to internal links"
|
||||||
option "-L", "--elinks", type: Bool, desc: "generate a map of pages to external links"
|
option "-L", "--elinks", type: Bool, desc: "generate a map of pages to external links"
|
||||||
option "-c CODES", "--codes=CODES", type: Array(Int32), desc: "generate a map of status codes to pages \
|
option "-c CODES", "--codes=CODES", type: Array(Int32), desc: "generate a map of status codes to pages \
|
||||||
that responded with that code"
|
that responded with that code"
|
||||||
option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan"
|
option "-n", "--limit NUM", type: Int32, desc: "maximum number of pages to scan"
|
||||||
option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10
|
option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10
|
||||||
option "-o FILE", "--output=FILE", type: String, desc: "file to write the report to (if undefined \
|
option "-i", "--ignore PATTERNS", type: Array(String), desc: "url patterns to ignore (regex)"
|
||||||
output will be printed to STDOUT"
|
option "-o FILE", "--output=FILE", type: String, desc: "file to write the report to (if undefined \
|
||||||
|
output will be printed to STDOUT"
|
||||||
|
|
||||||
run do |opts, args|
|
run do |opts, args|
|
||||||
if args.empty?
|
if args.empty?
|
||||||
|
@ -70,11 +71,12 @@ module Arachnid
|
||||||
USAGE
|
USAGE
|
||||||
|
|
||||||
|
|
||||||
option "--xml", type: Bool, desc: "generate the sitemap in XML format"
|
option "--xml", type: Bool, desc: "generate the sitemap in XML format"
|
||||||
option "--json", type: Bool, desc: "generate the sitemap in JSON format"
|
option "--json", type: Bool, desc: "generate the sitemap in JSON format"
|
||||||
option "-o FILE", "--output=FILE", type: String, desc: "filename to write the report to. \
|
option "-o FILE", "--output=FILE", type: String, desc: "filename to write the report to. \
|
||||||
default is the hostname + .json or .xml"
|
default is the hostname + .json or .xml"
|
||||||
option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10
|
option "-f", "--fibers NUM", type: Int32, desc: "maximum amount of fibers to spin up", default: 10
|
||||||
|
option "-i", "--ignore PATTERNS", type: Array(String), desc: "url patterns to ignore (regex)"
|
||||||
|
|
||||||
run do |opts, args|
|
run do |opts, args|
|
||||||
if args.size != 1
|
if args.size != 1
|
||||||
|
|
|
@ -18,6 +18,12 @@ module Arachnid
|
||||||
spider = Arachnid::Agent.new(fibers: opts.fibers)
|
spider = Arachnid::Agent.new(fibers: opts.fibers)
|
||||||
spider.visit_urls_like(Regex.new(Regex.escape(url.to_s)))
|
spider.visit_urls_like(Regex.new(Regex.escape(url.to_s)))
|
||||||
|
|
||||||
|
opts.ignore.each do |pattern|
|
||||||
|
pattern = Regex.new(pattern)
|
||||||
|
pp pattern
|
||||||
|
spider.ignore_urls_like(pattern)
|
||||||
|
end
|
||||||
|
|
||||||
map = {
|
map = {
|
||||||
domain: url.to_s,
|
domain: url.to_s,
|
||||||
lastmod: {
|
lastmod: {
|
||||||
|
|
|
@ -15,6 +15,12 @@ module Arachnid
|
||||||
spider.visit_urls_like(Regex.new(url))
|
spider.visit_urls_like(Regex.new(url))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
opts.ignore.each do |pattern|
|
||||||
|
pattern = Regex.new(pattern)
|
||||||
|
pp pattern
|
||||||
|
spider.ignore_urls_like(pattern)
|
||||||
|
end
|
||||||
|
|
||||||
pages = 0
|
pages = 0
|
||||||
internal_links = Hash(String, Array(String)).new
|
internal_links = Hash(String, Array(String)).new
|
||||||
external_links = Hash(String, Array(String)).new
|
external_links = Hash(String, Array(String)).new
|
||||||
|
|
Loading…
Reference in New Issue