Update examples

This commit is contained in:
Chris Watson 2019-06-26 12:46:10 -07:00
parent 8869483986
commit e11abaaf98
No known key found for this signature in database
GPG Key ID: 37DAEF5F446370A4
2 changed files with 7 additions and 7 deletions

View File

@ -30,15 +30,15 @@ links = {} of String => String
# not match on subdomains.
Arachnid.host("https://crystal-lang.org") do |spider|
# Ignore the API secion. It's a little big.
spider.ignore_urls_like(/.*\/api.*/)
spider.ignore_urls_like(/\/(api)\//)
spider.every_resource do |resource|
puts "Visiting #{resource.url.to_s}"
spider.every_html_page do |page|
puts "Visiting #{page.url.to_s}"
# Ignore redirects for our sitemap
unless resource.redirect?
# Add the url of every visited resource to our sitemap
links[resource.url.to_s] = resource.title.to_s.strip
unless page.redirect?
# Add the url of every visited page to our sitemap
links[page.url.to_s] = page.title.to_s.strip
end
end
end

View File

@ -138,7 +138,7 @@ module Arachnid
# at the given URL.
def self.site(url, **options, &block : Agent ->)
url = url.is_a?(URI) ? url : URI.parse(url)
url_regex = Regex.new(Regex.escape(url.host.to_s))
url_regex = Regex.new(url.host.to_s)
agent = new(**options, &block)
agent.visit_hosts_like(url_regex)