diff --git a/sh_scrapy/compat.py b/sh_scrapy/compat.py index 31275cb..cd1b5fa 100644 --- a/sh_scrapy/compat.py +++ b/sh_scrapy/compat.py @@ -1,62 +1,45 @@ -import sys +import warnings +from scrapy.exceptions import ScrapyDeprecationWarning +from scrapy.utils.decorators import deprecated +from scrapy.utils.python import ( + to_bytes as scrapy_to_bytes, + to_unicode as scrapy_to_unicode, +) -IS_PYTHON2 = sys.version_info < (3,) -if IS_PYTHON2: - STRING_TYPE = basestring - TEXT_TYPE = unicode - BINARY_TYPE = str -else: - STRING_TYPE = str - TEXT_TYPE = str - BINARY_TYPE = bytes - -def is_string(var): - return isinstance(var, STRING_TYPE) +IS_PYTHON2 = False +STRING_TYPE = str +TEXT_TYPE = str +BINARY_TYPE = bytes -def to_unicode(text, encoding=None, errors='strict'): - """Return the unicode representation of `text`. +warnings.warn( + "The sh_scrapy.compat module is deprecated, use the functions in scrapy.utils.python instead", + category=ScrapyDeprecationWarning, + stacklevel=2, +) - If `text` is already a ``unicode`` object, return it as-is. - If `text` is a ``bytes`` object, decode it using `encoding`. - Otherwise, raise an error. - - """ - if isinstance(text, TEXT_TYPE): - return text - if not isinstance(text, BINARY_TYPE): - raise TypeError('to_unicode must receive a bytes, str or unicode ' - 'object, got %s' % type(text).__name__) - if encoding is None: - encoding = 'utf-8' - return text.decode(encoding, errors) +def is_string(var): + warnings.warn( + "is_string(var) is deprecated, please use isinstance(var, str) instead", + category=ScrapyDeprecationWarning, + stacklevel=2, + ) + return isinstance(var, str) +@deprecated("scrapy.utils.python.to_bytes") def to_bytes(text, encoding=None, errors='strict'): - """Return the binary representation of `text`. - - If `text` is already a ``bytes`` object, return it as-is. - If `text` is a ``unicode`` object, encode it using `encoding`. - - Otherwise, raise an error.""" - if isinstance(text, BINARY_TYPE): - return text - if not isinstance(text, TEXT_TYPE): - raise TypeError('to_bytes must receive a unicode, str or bytes ' - 'object, got %s' % type(text).__name__) - if encoding is None: - encoding = 'utf-8' - return text.encode(encoding, errors) + return scrapy_to_bytes(text, encoding, errors) +@deprecated("scrapy.utils.python.to_unicode") def to_native_str(text, encoding=None, errors='strict'): - """Return ``str`` representation of `text`. + return scrapy_to_unicode(text, encoding, errors) - ``str`` representation means ``bytes`` in PY2 and ``unicode`` in PY3. - """ - if IS_PYTHON2: - return to_bytes(text, encoding, errors) - return to_unicode(text, encoding, errors) + +@deprecated("scrapy.utils.python.to_unicode") +def to_unicode(text, encoding=None, errors='strict'): + return scrapy_to_unicode(text, encoding, errors) diff --git a/sh_scrapy/env.py b/sh_scrapy/env.py index 29796d0..0c5498d 100644 --- a/sh_scrapy/env.py +++ b/sh_scrapy/env.py @@ -2,7 +2,8 @@ import json import codecs from base64 import b64decode -from sh_scrapy.compat import to_bytes, to_native_str, is_string + +from scrapy.utils.python import to_bytes, to_unicode def _make_scrapy_args(arg, args_dict): @@ -11,7 +12,7 @@ def _make_scrapy_args(arg, args_dict): args = [] for k, v in sorted(dict(args_dict).items()): args += [arg, "{}={}".format( - to_native_str(k), to_native_str(v) if is_string(v) else v)] + to_unicode(k), to_unicode(v) if isinstance(v, str) else v)] return args @@ -36,7 +37,7 @@ def _job_args_and_env(msg): cmd = msg.get('job_cmd') if not isinstance(cmd, list): cmd = [str(cmd)] - return cmd, {to_native_str(k): to_native_str(v) if is_string(v) else v + return cmd, {to_unicode(k): to_unicode(v) if isinstance(v, str) else v for k, v in sorted(dict(env).items())} @@ -51,7 +52,7 @@ def _jobname(msg): def _jobauth(msg): auth_data = to_bytes('{0[key]}:{0[auth]}'.format(msg)) - return to_native_str(codecs.encode(auth_data, 'hex_codec')) + return to_unicode(codecs.encode(auth_data, 'hex_codec')) def get_args_and_env(msg): diff --git a/sh_scrapy/extension.py b/sh_scrapy/extension.py index f2f1f0d..4d67368 100644 --- a/sh_scrapy/extension.py +++ b/sh_scrapy/extension.py @@ -11,7 +11,6 @@ from scrapy.utils.request import request_fingerprint from sh_scrapy import hsref -from sh_scrapy.compat import IS_PYTHON2 from sh_scrapy.crawl import ignore_warnings from sh_scrapy.exceptions import SHScrapyDeprecationWarning from sh_scrapy.middlewares import HS_PARENT_ID_KEY, request_id_sequence @@ -43,7 +42,7 @@ def __init__(self, crawler): self.logger = logging.getLogger(__name__) self._write_item = self.pipe_writer.write_item # https://github.com/scrapy/scrapy/commit/c76190d491fca9f35b6758bdc06c34d77f5d9be9 - exporter_kwargs = {'binary': False} if not IS_PYTHON2 else {} + exporter_kwargs = {'binary': False} with ignore_warnings(category=ScrapyDeprecationWarning): self.exporter = PythonItemExporter(**exporter_kwargs) diff --git a/sh_scrapy/hsref.py b/sh_scrapy/hsref.py index 9a273ea..a8fec9f 100644 --- a/sh_scrapy/hsref.py +++ b/sh_scrapy/hsref.py @@ -3,7 +3,8 @@ """ import os from codecs import decode -from sh_scrapy.compat import to_native_str + +from scrapy.utils.python import to_unicode class _HubstorageRef(object): @@ -24,7 +25,7 @@ def __init__(self): @property def auth(self): - return to_native_str(decode(os.environ['SHUB_JOBAUTH'], 'hex_codec')) + return to_unicode(decode(os.environ['SHUB_JOBAUTH'], 'hex_codec')) @property def endpoint(self): diff --git a/sh_scrapy/log.py b/sh_scrapy/log.py index d21bb82..2ac225e 100644 --- a/sh_scrapy/log.py +++ b/sh_scrapy/log.py @@ -2,10 +2,10 @@ import sys import warnings -from twisted.python import log as txlog from scrapy import __version__ +from scrapy.utils.python import to_unicode +from twisted.python import log as txlog -from sh_scrapy.compat import to_native_str from sh_scrapy.writer import pipe_writer @@ -120,7 +120,7 @@ def _get_log_item(self, ev): msg = ev.get('message') if msg: - msg = to_native_str(msg[0]) + msg = to_unicode(msg[0]) failure = ev.get('failure', None) if failure: @@ -156,7 +156,7 @@ def _logprefixed(self, msg): _logfn(message=self.prefix + msg, level=self.loglevel) def write(self, data): - data = to_native_str(data, self.encoding) + data = to_unicode(data, self.encoding) d = (self.buf + data).split('\n') self.buf = d[-1] @@ -166,5 +166,5 @@ def write(self, data): def writelines(self, lines): for line in lines: - line = to_native_str(line, self.encoding) + line = to_unicode(line, self.encoding) self._logprefixed(line) diff --git a/sh_scrapy/settings.py b/sh_scrapy/settings.py index b170d14..8da1fb7 100644 --- a/sh_scrapy/settings.py +++ b/sh_scrapy/settings.py @@ -2,10 +2,11 @@ import sys import logging import tempfile -from sh_scrapy.compat import to_native_str, is_string + from scrapy.settings import Settings from scrapy.utils.misc import load_object from scrapy.utils.project import get_project_settings +from scrapy.utils.python import to_unicode logger = logging.getLogger(__name__) @@ -54,8 +55,8 @@ def __init__(self): def set(self, name, value, priority='project'): super(EntrypointSettings, self).set( - to_native_str(name), - to_native_str(value) if is_string(value) else value, + to_unicode(name), + value if isinstance(value, str) else value, priority=priority) def copy_to_dict(self): @@ -110,7 +111,7 @@ def _update_old_classpaths(settings): elif not isinstance(setting_value, dict): continue for path in setting_value.keys(): - if not is_string(path): + if not isinstance(path, str): continue updated_path = update_classpath(path) if updated_path != path: diff --git a/tests/conftest.py b/tests/conftest.py index d4cb2e4..3c7d40c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,15 +5,16 @@ import tempfile import pytest +from scrapy.utils.python import to_unicode, to_bytes TEMP_DIR = tempfile.mkdtemp() SHUB_FIFO_PATH = os.path.join(TEMP_DIR, 'scrapinghub') os.environ['SHUB_FIFO_PATH'] = SHUB_FIFO_PATH -from sh_scrapy.writer import pipe_writer -from sh_scrapy.compat import to_native_str, to_bytes +from sh_scrapy.writer import pipe_writer # should go after setting SHUB_FIFO_PATH -TEST_AUTH = to_native_str(codecs.encode(to_bytes('1/2/3:authstr'), 'hex_codec')) + +TEST_AUTH = to_unicode(codecs.encode(to_bytes('1/2/3:authstr'), 'hex_codec')) @pytest.fixture(scope='session', autouse=True) diff --git a/tests/test_compat.py b/tests/test_compat.py index 8d62409..9f67285 100644 --- a/tests/test_compat.py +++ b/tests/test_compat.py @@ -1,32 +1,82 @@ +import warnings + import pytest -from sh_scrapy.compat import to_bytes -from sh_scrapy.compat import to_unicode +from scrapy.exceptions import ScrapyDeprecationWarning + +from sh_scrapy.compat import is_string, to_bytes, to_unicode, to_native_str + + +# test deprecation messages + +def test_deprecated_is_string(): + with warnings.catch_warnings(record=True) as caught: + assert is_string("foo") + assert not is_string(b"foo") + assert not is_string(1) + assert ( + "is_string(var) is deprecated, please use isinstance(var, str) instead" + == str(caught[0].message) + ) + assert caught[0].category is ScrapyDeprecationWarning + + +def test_deprecated_to_unicode(): + with warnings.catch_warnings(record=True) as caught: + assert to_unicode("foo") == "foo" + assert to_unicode(b"foo") == "foo" + assert ( + "Call to deprecated function to_unicode. Use scrapy.utils.python.to_unicode instead." + == str(caught[0].message) + ) + assert caught[0].category is ScrapyDeprecationWarning + + +def test_deprecated_to_native_str(): + with warnings.catch_warnings(record=True) as caught: + assert to_native_str("foo") == "foo" + assert to_native_str(b"foo") == "foo" + assert ( + "Call to deprecated function to_native_str. Use scrapy.utils.python.to_unicode instead." + == str(caught[0].message) + ) + assert caught[0].category is ScrapyDeprecationWarning + + +def test_deprecated_to_bytes(): + with warnings.catch_warnings(record=True) as caught: + assert to_bytes("foo") == b"foo" + assert to_bytes(b"foo") == b"foo" + assert ( + "Call to deprecated function to_bytes. Use scrapy.utils.python.to_bytes instead." + == str(caught[0].message) + ) + assert caught[0].category is ScrapyDeprecationWarning -# Testing to_unicode conversion +# Testing to_unicode conversion -def test_to_unicode_an_utf8_encoded_string_to_unicode(): +def test_to_str_an_utf8_encoded_string_to_str(): assert to_unicode(b'lel\xc3\xb1e') == u'lel\xf1e' -def test_to_unicode_a_latin_1_encoded_string_to_unicode(): +def test_to_str_a_latin_1_encoded_string_to_str(): assert to_unicode(b'lel\xf1e', 'latin-1') == u'lel\xf1e' -def test_to_unicode_a_unicode_to_unicode_should_return_the_same_object(): +def test_to_str_a_unicode_to_str_should_return_the_same_object(): assert to_unicode(u'\xf1e\xf1e\xf1e') == u'\xf1e\xf1e\xf1e' -def test_to_unicode_a_strange_object_should_raise_TypeError(): +def test_to_str_a_strange_object_should_raise_TypeError(): with pytest.raises(TypeError) as excinfo: to_unicode(123) -def test_to_unicode_errors_argument(): +def test_to_str_errors_argument(): assert to_unicode(b'a\xedb', 'utf-8', errors='replace') == u'a\ufffdb' -# Testing to_unicode conversion +# Testing to_bytes conversion def test_to_bytes_a_unicode_object_to_an_utf_8_encoded_string(): assert to_bytes(u'\xa3 49') == b'\xc2\xa3 49' diff --git a/tests/test_env.py b/tests/test_env.py index b8ba42b..dae7f2f 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -5,7 +5,7 @@ import pytest import tempfile -from sh_scrapy.compat import to_bytes, to_native_str +from scrapy.utils.python import to_bytes, to_unicode from sh_scrapy.env import _jobauth from sh_scrapy.env import _jobname @@ -72,7 +72,7 @@ def test_jobname(): def test_jobauth(): msg = {'key': '1/2/3', 'auth': 'authstring'} expected = codecs.encode(to_bytes('1/2/3:authstring'), 'hex_codec') - assert _jobauth(msg) == to_native_str(expected) + assert _jobauth(msg) == to_unicode(expected) def test_get_args_and_env_run_spider(): @@ -88,7 +88,7 @@ def test_get_args_and_env_run_spider(): assert result[1] == {'SCRAPY_JOB': '1/2/3', 'SCRAPY_PROJECT_ID': '1', 'SCRAPY_SPIDER': 'test', - 'SHUB_JOBAUTH': to_native_str(expected_auth), + 'SHUB_JOBAUTH': to_unicode(expected_auth), 'SHUB_JOBKEY': '1/2/3', 'SHUB_JOBNAME': 'test', 'SHUB_JOB_TAGS': '', @@ -109,7 +109,7 @@ def test_get_args_and_env_run_script(): assert len(result) == 2 assert result[0] == ['custom.py', 'arg1'] assert result[1] == { - 'SHUB_JOBAUTH': to_native_str(expected_auth), + 'SHUB_JOBAUTH': to_unicode(expected_auth), 'SHUB_JOBKEY': '1/2/3', 'SHUB_JOBNAME': 'custom.py', 'SHUB_JOB_TAGS': ''} diff --git a/tests/test_settings.py b/tests/test_settings.py index dab470c..5f03a2d 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -1,10 +1,12 @@ - import os import sys import mock + import pytest from scrapy import version_info as scrapy_version from scrapy.settings import Settings +from scrapy.utils.python import to_unicode + from sh_scrapy.settings import EntrypointSettings from sh_scrapy.settings import _enforce_required_settings from sh_scrapy.settings import _maybe_load_autoscraping_project @@ -16,8 +18,6 @@ from sh_scrapy.settings import _update_old_classpaths from sh_scrapy.settings import populate_settings -from sh_scrapy.compat import to_native_str - TEST_ADDON = { 'addon_id': 'test_addon', @@ -91,7 +91,7 @@ def test_update_settings_check_unicode_in_py2_key(): test = EntrypointSettings() test.setdict({'\xf1e\xf1e\xf1e': 'test'}, 10) assert test['\xf1e\xf1e\xf1e'] == 'test' - assert test[to_native_str('\xf1e\xf1e\xf1e')] == 'test' + assert test[to_unicode('\xf1e\xf1e\xf1e')] == 'test' @pytest.mark.skipif(sys.version_info[0] == 3, reason="requires python2") @@ -100,8 +100,8 @@ def test_update_settings_check_unicode_in_py2_key_value(): test = EntrypointSettings() test.setdict({'\xf1e\xf1e\xf1e': '\xf1e\xf1e'}, 10) assert test['\xf1e\xf1e\xf1e'] == '\xf1e\xf1e' - native_key = to_native_str('\xf1e\xf1e\xf1e') - assert test[native_key] == to_native_str('\xf1e\xf1e') + native_key = to_unicode('\xf1e\xf1e\xf1e') + assert test[native_key] == to_unicode('\xf1e\xf1e') @pytest.mark.skipif(sys.version_info < (3,), reason="requires python3")