Update examples
This commit is contained in:
parent
8869483986
commit
e11abaaf98
12
README.md
12
README.md
|
@ -30,15 +30,15 @@ links = {} of String => String
|
|||
# not match on subdomains.
|
||||
Arachnid.host("https://crystal-lang.org") do |spider|
|
||||
# Ignore the API secion. It's a little big.
|
||||
spider.ignore_urls_like(/.*\/api.*/)
|
||||
spider.ignore_urls_like(/\/(api)\//)
|
||||
|
||||
spider.every_resource do |resource|
|
||||
puts "Visiting #{resource.url.to_s}"
|
||||
spider.every_html_page do |page|
|
||||
puts "Visiting #{page.url.to_s}"
|
||||
|
||||
# Ignore redirects for our sitemap
|
||||
unless resource.redirect?
|
||||
# Add the url of every visited resource to our sitemap
|
||||
links[resource.url.to_s] = resource.title.to_s.strip
|
||||
unless page.redirect?
|
||||
# Add the url of every visited page to our sitemap
|
||||
links[page.url.to_s] = page.title.to_s.strip
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -138,7 +138,7 @@ module Arachnid
|
|||
# at the given URL.
|
||||
def self.site(url, **options, &block : Agent ->)
|
||||
url = url.is_a?(URI) ? url : URI.parse(url)
|
||||
url_regex = Regex.new(Regex.escape(url.host.to_s))
|
||||
url_regex = Regex.new(url.host.to_s)
|
||||
|
||||
agent = new(**options, &block)
|
||||
agent.visit_hosts_like(url_regex)
|
||||
|
|
Loading…
Reference in New Issue