diff --git a/README.md b/README.md index 13ec901..45fcd9c 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ Arachnid is a fast and powerful web scraping framework for Crystal. It provides - [Arachnid](#Arachnid) - [Installation](#Installation) + - [The CLI](#The-CLI) + - [Summarize](#Summarize) + - [Sitemap](#Sitemap) - [Examples](#Examples) - [Usage](#Usage) - [Configuration](#Configuration) @@ -65,6 +68,45 @@ Arachnid is a fast and powerful web scraping framework for Crystal. It provides 2. Run `shards install` +To build the CLI + +1. Run `shards build --release` + +2. Add the `./bin` directory to your path or symlink `./bin/arachnid` with `sudo ln -s /home/path/to/arachnid /usr/local/bin` + +## The CLI + +Arachnid provides a CLI for basic scanning tasks, here is what you can do with it so far: + +### Summarize + +The `summarize` subcommand allows you to generate a report for a website. It can give you the number of pages, the internal and external links for every page, and a list of pages and their status codes (helpful for finding broken pages). + +You can use it like this: + +``` +arachnid summarize https://crystal-lang.org --ilinks --elinks -c 404 503 +``` + +This will generate a report for crystal-lang.org which will include every page and it's internal and external links, and a list of every page that returned a 404 or 503 status. For complete help use `arachnid summarize --help` + +### Sitemap + +Arachnid can also generate a XML or JSON sitemap for a website by scanning the entire site, following internal links. To do so just use the `arachnid sitemap` subcommand. + +``` +# XML sitemap +arachnid sitemap https://crystal-lang.org --xml + +# JSON sitemap +arachnid sitemap https://crystal-lang.org --json + +# Custom output file +arachnid sitemap https://crystal-lang.org --xml -o ~/Desktop/crystal-lang.org-sitemap.xml +``` + +Full help is available with `arachnid sitemap --help` + ## Examples Arachnid provides an easy to use, powerful DSL for scraping websites. diff --git a/src/arachnid/cli.cr b/src/arachnid/cli.cr index d00020e..7db4bf0 100644 --- a/src/arachnid/cli.cr +++ b/src/arachnid/cli.cr @@ -44,8 +44,8 @@ module Arachnid if args.empty? STDERR.puts "At least one site is required" else - count = Arachnid::Cli::Count.new - count.run(opts, args) + summarize = Arachnid::Cli::Summarize.new + summarize.run(opts, args) end end end diff --git a/src/arachnid/cli/forum.crystal-lang.org.xml b/src/arachnid/cli/forum.crystal-lang.org.xml deleted file mode 100644 index 55d79fa..0000000 --- a/src/arachnid/cli/forum.crystal-lang.org.xml +++ /dev/null @@ -1,51 +0,0 @@ - - - - https://forum.crystal-lang.org - 2019-06-30 - never - 0.5 - - - https://forum.crystal-lang.org/privacy - 2019-06-30 - never - 0.5 - - - https://forum.crystal-lang.org/tos - 2019-06-30 - never - 0.5 - - - https://forum.crystal-lang.org/guidelines - 2019-06-30 - never - 0.5 - - - https://forum.crystal-lang.org/categories - 2019-06-30 - never - 0.5 - - - https://forum.crystal-lang.org/c/offtopic - 2019-06-30 - never - 0.5 - - - https://forum.crystal-lang.org/c/offtopic?page=1 - 2019-06-30 - never - 0.5 - - - https://forum.crystal-lang.org/c/offtopic?page=2 - 2019-06-30 - never - 0.5 - - diff --git a/src/arachnid/cli/count.cr b/src/arachnid/cli/summarize.cr similarity index 94% rename from src/arachnid/cli/count.cr rename to src/arachnid/cli/summarize.cr index 7fa2e9a..41d93bc 100644 --- a/src/arachnid/cli/count.cr +++ b/src/arachnid/cli/summarize.cr @@ -5,7 +5,7 @@ require "json" module Arachnid class Cli < Clim - class Count < Cli::Action + class Summarize < Cli::Action def run(opts, urls) spinner = Spinner::Spinner.new("Wait...") @@ -65,7 +65,7 @@ module Arachnid report["codes"] = codes if codes if outfile - File.write(outfile.to_s, report.to_json, mode: "w+") + File.write(File.expand_path(outfile.to_s, __DIR__), report.to_json, mode: "w+") puts "Report saved to #{outfile}" else pp report