From e11abaaf98f7b7fc33be1468228c1a61b7eb7825 Mon Sep 17 00:00:00 2001 From: Chris Watson Date: Wed, 26 Jun 2019 12:46:10 -0700 Subject: [PATCH] Update examples --- README.md | 12 ++++++------ src/arachnid/agent.cr | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 20c3ec4..81403a4 100644 --- a/README.md +++ b/README.md @@ -30,15 +30,15 @@ links = {} of String => String # not match on subdomains. Arachnid.host("https://crystal-lang.org") do |spider| # Ignore the API secion. It's a little big. - spider.ignore_urls_like(/.*\/api.*/) + spider.ignore_urls_like(/\/(api)\//) - spider.every_resource do |resource| - puts "Visiting #{resource.url.to_s}" + spider.every_html_page do |page| + puts "Visiting #{page.url.to_s}" # Ignore redirects for our sitemap - unless resource.redirect? - # Add the url of every visited resource to our sitemap - links[resource.url.to_s] = resource.title.to_s.strip + unless page.redirect? + # Add the url of every visited page to our sitemap + links[page.url.to_s] = page.title.to_s.strip end end end diff --git a/src/arachnid/agent.cr b/src/arachnid/agent.cr index 85e7717..9c263ba 100644 --- a/src/arachnid/agent.cr +++ b/src/arachnid/agent.cr @@ -138,7 +138,7 @@ module Arachnid # at the given URL. def self.site(url, **options, &block : Agent ->) url = url.is_a?(URI) ? url : URI.parse(url) - url_regex = Regex.new(Regex.escape(url.host.to_s)) + url_regex = Regex.new(url.host.to_s) agent = new(**options, &block) agent.visit_hosts_like(url_regex)