Skip to content

Commit

Permalink
pythongh-125926: Fix urllib.parse.urljoin() for base URI with undefin…
Browse files Browse the repository at this point in the history
…ed authority

Although this goes beyond the application of RFC 3986, urljoin()
should support relative base URIs for backward compatibility.
  • Loading branch information
serhiy-storchaka committed Oct 25, 2024
1 parent 58241e8 commit 09f28b6
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 1 deletion.
72 changes: 72 additions & 0 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,78 @@ def test_urljoins(self):
self.checkJoin(RFC1808_BASE, 'https:;', 'https:;')
self.checkJoin(RFC1808_BASE, 'https:;x', 'https:;x')

def test_urljoins_relative_base(self):
# According to RFC 3986, Section 5.1, a base URI must conform to
# the absolute-URI syntax rule (Section 4.3). But urljoin() lacks
# a context to establish missed components of the relative base URI.
# It still has to return a sensitible result for backward compatibility.
# The following tests are figments of the imagination and artifacts
# of the current implementation that are not based on any standard.
self.checkJoin('', '', '')
self.checkJoin('', '//', '//', relroundtrip=False)
self.checkJoin('', '//v', '//v')
self.checkJoin('', '//v/w', '//v/w')
self.checkJoin('', '/w', '/w')
self.checkJoin('', '///w', '///w', relroundtrip=False)
self.checkJoin('', 'w', 'w')

self.checkJoin('//', '', '//')
self.checkJoin('//', '//', '//')
self.checkJoin('//', '//v', '//v')
self.checkJoin('//', '//v/w', '//v/w')
self.checkJoin('//', '/w', '/w')
self.checkJoin('//', '///w', '///w', relroundtrip=False)
self.checkJoin('//', 'w', '/w')

self.checkJoin('//a', '', '//a')
self.checkJoin('//a', '//', '//', relroundtrip=False)
self.checkJoin('//a', '//v', '//v')
self.checkJoin('//a', '//v/w', '//v/w')
self.checkJoin('//a', '/w', '/w')
self.checkJoin('//a', '///w', '///w', relroundtrip=False)
self.checkJoin('//a', 'w', '/w')

for scheme in '', 'http:':
self.checkJoin('http:', scheme + '', 'http:')
self.checkJoin('http:', scheme + '//', 'http:')
self.checkJoin('http:', scheme + '//v', 'http://v')
self.checkJoin('http:', scheme + '//v/w', 'http://v/w')
self.checkJoin('http:', scheme + '/w', 'http:/w')
self.checkJoin('http:', scheme + '///w', 'http:/w')
self.checkJoin('http:', scheme + 'w', 'http:/w')

self.checkJoin('http://', scheme + '', 'http://')
self.checkJoin('http://', scheme + '//', 'http://')
self.checkJoin('http://', scheme + '//v', 'http://v')
self.checkJoin('http://', scheme + '//v/w', 'http://v/w')
self.checkJoin('http://', scheme + '/w', 'http:///w')
self.checkJoin('http://', scheme + '///w', 'http:///w')
self.checkJoin('http://', scheme + 'w', 'http:///w')

self.checkJoin('http://a', scheme + '', 'http://a')
self.checkJoin('http://a', scheme + '//', 'http://a')
self.checkJoin('http://a', scheme + '//v', 'http://v')
self.checkJoin('http://a', scheme + '//v/w', 'http://v/w')
self.checkJoin('http://a', scheme + '/w', 'http://a/w')
self.checkJoin('http://a', scheme + '///w', 'http://a/w')
self.checkJoin('http://a', scheme + 'w', 'http://a/w')

self.checkJoin('/b/c', '', '/b/c')
self.checkJoin('/b/c', '//', '///b/c', relroundtrip=False)
self.checkJoin('/b/c', '//v', '//v/b/c')
self.checkJoin('/b/c', '//v/w', '//v/w')
self.checkJoin('/b/c', '/w', '/w')
self.checkJoin('/b/c', '///w', '///w', relroundtrip=False)
self.checkJoin('/b/c', 'w', '/b/w')

self.checkJoin('///b/c', '', '///b/c')
self.checkJoin('///b/c', '//', '///b/c')
self.checkJoin('///b/c', '//v', '//v/b/c')
self.checkJoin('///b/c', '//v/w', '//v/w')
self.checkJoin('///b/c', '/w', '/w')
self.checkJoin('///b/c', '///w', '///w', relroundtrip=False)
self.checkJoin('///b/c', 'w', '/b/w')

def test_RFC2732(self):
str_cases = [
('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
Expand Down
2 changes: 1 addition & 1 deletion Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ def urljoin(base, url, allow_fragments=True):

if scheme is None:
scheme = bscheme
if scheme != bscheme or scheme not in uses_relative:
if scheme != bscheme or (scheme and scheme not in uses_relative):
return _coerce_result(url)
if scheme in uses_netloc:
if netloc:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix :func:`urllib.parse.urljoin` for base URI with undefined authority.
Although :rfc:`3986` only specify reference resolution for absolute base
URI, :func:`!urljoin` should continue to return sensible result for relative
base URI.

0 comments on commit 09f28b6

Please sign in to comment.