From 217a22bb2c297fa151c08556db78973845855193 Mon Sep 17 00:00:00 2001 From: Mauro Asprea Date: Tue, 29 Nov 2011 20:31:20 +0100 Subject: [PATCH] Add support for base HTML element Add support in the Page's to_absolute method for the base HTML element. This way it can correctly convert relative links for a given page document. --- lib/anemone/page.rb | 17 ++++++++++++++- spec/fakeweb_helper.rb | 7 ++++++- spec/page_spec.rb | 47 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/lib/anemone/page.rb b/lib/anemone/page.rb index ed5b3a96..30aff37b 100644 --- a/lib/anemone/page.rb +++ b/lib/anemone/page.rb @@ -131,6 +131,21 @@ def not_found? 404 == @code end + # + # Base URI from the HTML doc head element + # http://www.w3.org/TR/html4/struct/links.html#edef-BASE + # + def base + @base = if doc + href = doc.search('//head/base/@href') + URI(href.to_s) unless href.nil? rescue nil + end unless @base + + return nil if @base && @base.to_s().empty? + @base + end + + # # Converts relative URL *link* into an absolute URL based on the # location of the page @@ -142,7 +157,7 @@ def to_absolute(link) link = URI.encode(URI.decode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))) relative = URI(link) - absolute = @url.merge(relative) + absolute = base ? base.merge(relative) : @url.merge(relative) absolute.path = '/' if absolute.path.empty? diff --git a/spec/fakeweb_helper.rb b/spec/fakeweb_helper.rb index 6894f96a..5153d7b2 100644 --- a/spec/fakeweb_helper.rb +++ b/spec/fakeweb_helper.rb @@ -15,6 +15,7 @@ def initialize(name = '', options = {}) @hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs) @redirect = options[:redirect] if options.has_key?(:redirect) @auth = options[:auth] if options.has_key?(:auth) + @base = options[:base] if options.has_key?(:base) @content_type = options[:content_type] || "text/html" @body = options[:body] @@ -33,7 +34,11 @@ def auth_url private def create_body - @body = "" + if @base + @body = "" + else + @body = "" + end @links.each{|l| @body += ""} if @links @hrefs.each{|h| @body += ""} if @hrefs @body += "" diff --git a/spec/page_spec.rb b/spec/page_spec.rb index aa1e2293..63bd9a70 100644 --- a/spec/page_spec.rb +++ b/spec/page_spec.rb @@ -126,5 +126,52 @@ module Anemone end end + it "should detect, store and expose the base url for the page head" do + base = "#{SPEC_DOMAIN}path/to/base_url/" + page = @http.fetch_page(FakePage.new('body_test', {:base => base}).url) + page.base.should == URI(base) + @page.base.should be_nil + end + + it "should have a method to convert a relative url to an absolute one" do + @page.should respond_to(:to_absolute) + + # Identity + @page.to_absolute(@page.url).should == @page.url + @page.to_absolute("").should == @page.url + + # Root-ness + @page.to_absolute("/").should == URI("#{SPEC_DOMAIN}") + + # Relativeness + relative_path = "a/relative/path" + @page.to_absolute(relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}") + + deep_page = @http.fetch_page(FakePage.new('home/deep', :links => '1').url) + upward_relative_path = "../a/relative/path" + deep_page.to_absolute(upward_relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}") + + # The base URL case + base_path = "path/to/base_url/" + base = "#{SPEC_DOMAIN}#{base_path}" + page = @http.fetch_page(FakePage.new('home', {:base => base}).url) + + # Identity + page.to_absolute(page.url).should == page.url + # It should revert to the base url + page.to_absolute("").should_not == page.url + + # Root-ness + page.to_absolute("/").should == URI("#{SPEC_DOMAIN}") + + # Relativeness + relative_path = "a/relative/path" + page.to_absolute(relative_path).should == URI("#{base}#{relative_path}") + + upward_relative_path = "../a/relative/path" + upward_base = "#{SPEC_DOMAIN}path/to/" + page.to_absolute(upward_relative_path).should == URI("#{upward_base}#{relative_path}") + end + end end