Skip to content

Commit

Permalink
GH-85168: Use filesystem encoding when converting to/from file URIs
Browse files Browse the repository at this point in the history
Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the
filesystem encoding when quoting and unquoting file URIs, rather than
forcing use of UTF-8.

No changes are needed in the `nturl2path` module because Windows always
uses UTF-8, per PEP 529.
  • Loading branch information
barneygale committed Nov 15, 2024
1 parent 3fecbe9 commit 11b9602
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 11 deletions.
17 changes: 13 additions & 4 deletions Lib/test/test_urllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,10 +709,6 @@ def tearDown(self):

def constructLocalFileUrl(self, filePath):
filePath = os.path.abspath(filePath)
try:
filePath.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("filePath is not encodable to utf8")
return "file://%s" % urllib.request.pathname2url(filePath)

def createNewTempFile(self, data=b""):
Expand Down Expand Up @@ -1561,6 +1557,13 @@ def test_pathname2url_posix(self):
self.assertEqual(fn('/'), '/')
self.assertEqual(fn('/a/b.c'), '/a/b.c')
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
try:
expect = os.fsencode('\xe9')
except UnicodeEncodeError:
pass
else:
expect = urllib.parse.quote_from_bytes(expect)
self.assertEqual(fn('\xe9'), expect)

@unittest.skipUnless(sys.platform == 'win32',
'test specific to Windows pathnames.')
Expand Down Expand Up @@ -1611,6 +1614,12 @@ def test_url2pathname_posix(self):
self.assertEqual(fn('///foo/bar'), '/foo/bar')
self.assertEqual(fn('////foo/bar'), '//foo/bar')
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
try:
expect = os.fsdecode(b'\xe9')
except UnicodeDecodeError:
pass
else:
self.assertEqual(fn('%e9'), expect)

class Utility_Tests(unittest.TestCase):
"""Testcase to test the various utility functions in the urllib."""
Expand Down
4 changes: 0 additions & 4 deletions Lib/test/test_urllib2.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,10 +718,6 @@ def test_processors(self):


def sanepathname2url(path):
try:
path.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("path is not encodable to utf8")
urlpath = urllib.request.pathname2url(path)
if os.name == "nt" and urlpath.startswith("///"):
urlpath = urlpath[2:]
Expand Down
6 changes: 3 additions & 3 deletions Lib/urllib/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
_splittype, _splithost, _splitport, _splituser, _splitpasswd,
_splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
unquote_to_bytes, urlunparse)
quote_from_bytes, unquote_to_bytes, urlunparse)
from urllib.response import addinfourl, addclosehook

# check for SSL
Expand Down Expand Up @@ -1660,12 +1660,12 @@ def url2pathname(pathname):
# URL has an empty authority section, so the path begins on the
# third character.
pathname = pathname[2:]
return unquote(pathname)
return os.fsdecode(unquote_to_bytes(pathname))

def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL
of the 'file' scheme; not recommended for general use."""
return quote(pathname)
return quote_from_bytes(os.fsencode(pathname))


ftpcache = {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix issue where :func:`urllib.request.url2pathname` and
:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
unquoting file URIs. They now use the :term:`filesystem encoding and error
handler`.

0 comments on commit 11b9602

Please sign in to comment.