Updates py2 str/unicode and py3 bytes/str support with custom encodin…

…g/decoding (Fixes #100) (#101) * Updates py2 str/unicode and py3 bytes/str support with custom encoding/decoding (Fixes #100) * Fix bytes decoding test on windows
justinfx · Mar 24, 2021 · 95e9a8f · 95e9a8f
1 parent a839256
commit 95e9a8f
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 15 deletions.
diff --git a/src/fileseq/filesequence.py b/src/fileseq/filesequence.py
@@ -30,7 +30,6 @@
 from fileseq import utils
 
 
-@futils.python_2_unicode_compatible
 class FileSequence(object):
     """:class:`FileSequence` represents an ordered sequence of files.
 
@@ -628,16 +627,24 @@ def __str__(self):
             str:
         """
         frameSet = utils.asString(self._frameSet or "")
-        return "".join((
+        parts = [
             self._dir,
             self._base,
             frameSet,
             self._pad if frameSet else "",
-            self._ext))
+            self._ext,
+        ]
+
+        if futils.PY2:
+            for i, part in enumerate(parts):
+                if isinstance(part, futils.text_type):
+                    parts[i] = futils.native(part.encode(utils.FILESYSTEM_ENCODING))
+
+        return "".join(parts)
 
     def __repr__(self):
         try:
-            return "<%s: %r>" % (self.__class__.__name__, str(self))
+            return "<%s: %r>" % (self.__class__.__name__, self.__str__())
         except TypeError:
             return super(self.__class__, self).__repr__()
 

diff --git a/src/fileseq/utils.py b/src/fileseq/utils.py
@@ -11,12 +11,16 @@
 import future.utils as futils
 
 import decimal
-import os
 from itertools import chain, count, islice
+import os
+import sys
 
 from fileseq import exceptions
 
 
+FILESYSTEM_ENCODING = sys.getfilesystemencoding() or 'utf-8'
+
+
 def quantize(number, decimal_places, rounding=decimal.ROUND_HALF_EVEN):
     """
     Round a decimal value to given number of decimal places
@@ -309,11 +313,16 @@ def asString(obj):
     Returns:
         str or unicode:
     """
-    if type(obj) in _STR_TYPES:
+    typ = type(obj)
+    # explicit type check as faster path
+    if typ in _STR_TYPES:
+        if not futils.PY2 and typ is futils.binary_type:
+            obj = os.fsdecode(obj)
         return obj
+    # derived type check
     elif isinstance(obj, bytes):
         if not futils.PY2:
-            obj = obj.decode("utf-8")
+            obj = obj.decode(FILESYSTEM_ENCODING)
     else:
         obj = futils.text_type(obj)
     return futils.native(obj)
diff --git a/test/test_unit.py b/test/test_unit.py
@@ -12,7 +12,7 @@
     standard_library.install_aliases()
 
 from builtins import map
-from future.utils import string_types, native_str, integer_types, text_type
+from future.utils import string_types, native_str, integer_types, text_type, PY2
 
 try:
     import cPickle as pickle
@@ -1057,13 +1057,31 @@ def testIgnoreFrameSetStrings(self):
             self.assertEquals(str(fs), "/path/to/file{0}1-1x1#.exr".format(char))
 
     def testStrUnicode(self):
-        """https://github.com/justinfx/fileseq/issues/99"""
-        ret = FileSequence(u'file_カ_Z.01.txt')
-        # make sure none of these raise a unicode exception
-        _ = str(ret)
-        _ = text_type(ret)
-        _ = repr(ret)
-        _ = ret.format()
+        """
+        https://github.com/justinfx/fileseq/issues/99
+        https://github.com/justinfx/fileseq/issues/100
+        """
+        def check(seq):
+            # make sure none of these raise a unicode exception
+            s = str(seq)
+            _ = repr(seq)
+            _ = seq.format()
+
+        utf8 = u'file_カ_Z.01.txt'
+        latin1 = b'/proj/kenny/fil\xe9'
+        latin1_to_utf8 = latin1.decode('latin1').encode(utils.FILESYSTEM_ENCODING)
+
+        check(FileSequence(utf8))
+        check(FileSequence(utf8.encode(utils.FILESYSTEM_ENCODING)))
+        check(FileSequence(latin1_to_utf8))
+        try:
+            check(FileSequence(latin1))
+        except UnicodeDecodeError:
+            # Windows os.fsdecode() uses 'strict' error handling
+            # instead of 'surrogateescape'. So just assume bytes
+            # decoding error is expected for this case.
+            if os.name != 'nt':
+                raise
 
 
 class TestFindSequencesOnDisk(TestBase):