Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate sh_scrapy.compat module #67

Merged
merged 11 commits into from
Jan 30, 2023
79 changes: 31 additions & 48 deletions sh_scrapy/compat.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,45 @@
import sys
import warnings

from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.decorators import deprecated
from scrapy.utils.python import (
to_bytes as scrapy_to_bytes,
to_unicode as scrapy_to_unicode,
)

IS_PYTHON2 = sys.version_info < (3,)
if IS_PYTHON2:
STRING_TYPE = basestring
TEXT_TYPE = unicode
BINARY_TYPE = str
else:
STRING_TYPE = str
TEXT_TYPE = str
BINARY_TYPE = bytes


def is_string(var):
return isinstance(var, STRING_TYPE)
IS_PYTHON2 = False
STRING_TYPE = str
TEXT_TYPE = str
BINARY_TYPE = bytes


def to_unicode(text, encoding=None, errors='strict'):
"""Return the unicode representation of `text`.
warnings.warn(
"The sh_scrapy.compat module is deprecated, use the functions in scrapy.utils.python instead",
category=ScrapyDeprecationWarning,
stacklevel=2,
)

If `text` is already a ``unicode`` object, return it as-is.
If `text` is a ``bytes`` object, decode it using `encoding`.

Otherwise, raise an error.

"""
if isinstance(text, TEXT_TYPE):
return text
if not isinstance(text, BINARY_TYPE):
raise TypeError('to_unicode must receive a bytes, str or unicode '
'object, got %s' % type(text).__name__)
if encoding is None:
encoding = 'utf-8'
return text.decode(encoding, errors)
def is_string(var):
warnings.warn(
"is_string(var) is deprecated, please use isinstance(var, str) instead",
category=ScrapyDeprecationWarning,
stacklevel=2,
)
return isinstance(var, str)


@deprecated("scrapy.utils.python.to_bytes")
def to_bytes(text, encoding=None, errors='strict'):
"""Return the binary representation of `text`.

If `text` is already a ``bytes`` object, return it as-is.
If `text` is a ``unicode`` object, encode it using `encoding`.

Otherwise, raise an error."""
if isinstance(text, BINARY_TYPE):
return text
if not isinstance(text, TEXT_TYPE):
raise TypeError('to_bytes must receive a unicode, str or bytes '
'object, got %s' % type(text).__name__)
if encoding is None:
encoding = 'utf-8'
return text.encode(encoding, errors)
return scrapy_to_bytes(text, encoding, errors)


@deprecated("scrapy.utils.python.to_unicode")
def to_native_str(text, encoding=None, errors='strict'):
"""Return ``str`` representation of `text`.
return scrapy_to_unicode(text, encoding, errors)

``str`` representation means ``bytes`` in PY2 and ``unicode`` in PY3.
"""
if IS_PYTHON2:
return to_bytes(text, encoding, errors)
return to_unicode(text, encoding, errors)

@deprecated("scrapy.utils.python.to_unicode")
def to_unicode(text, encoding=None, errors='strict'):
return scrapy_to_unicode(text, encoding, errors)
9 changes: 5 additions & 4 deletions sh_scrapy/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import json
import codecs
from base64 import b64decode
from sh_scrapy.compat import to_bytes, to_native_str, is_string

from scrapy.utils.python import to_bytes, to_unicode


def _make_scrapy_args(arg, args_dict):
Expand All @@ -11,7 +12,7 @@ def _make_scrapy_args(arg, args_dict):
args = []
for k, v in sorted(dict(args_dict).items()):
args += [arg, "{}={}".format(
to_native_str(k), to_native_str(v) if is_string(v) else v)]
to_unicode(k), to_unicode(v) if isinstance(v, str) else v)]
return args


Expand All @@ -36,7 +37,7 @@ def _job_args_and_env(msg):
cmd = msg.get('job_cmd')
if not isinstance(cmd, list):
cmd = [str(cmd)]
return cmd, {to_native_str(k): to_native_str(v) if is_string(v) else v
return cmd, {to_unicode(k): to_unicode(v) if isinstance(v, str) else v
for k, v in sorted(dict(env).items())}


Expand All @@ -51,7 +52,7 @@ def _jobname(msg):

def _jobauth(msg):
auth_data = to_bytes('{0[key]}:{0[auth]}'.format(msg))
return to_native_str(codecs.encode(auth_data, 'hex_codec'))
return to_unicode(codecs.encode(auth_data, 'hex_codec'))


def get_args_and_env(msg):
Expand Down
3 changes: 1 addition & 2 deletions sh_scrapy/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from scrapy.utils.request import request_fingerprint

from sh_scrapy import hsref
from sh_scrapy.compat import IS_PYTHON2
from sh_scrapy.crawl import ignore_warnings
from sh_scrapy.exceptions import SHScrapyDeprecationWarning
from sh_scrapy.middlewares import HS_PARENT_ID_KEY, request_id_sequence
Expand Down Expand Up @@ -43,7 +42,7 @@ def __init__(self, crawler):
self.logger = logging.getLogger(__name__)
self._write_item = self.pipe_writer.write_item
# https://github.com/scrapy/scrapy/commit/c76190d491fca9f35b6758bdc06c34d77f5d9be9
exporter_kwargs = {'binary': False} if not IS_PYTHON2 else {}
exporter_kwargs = {'binary': False}
with ignore_warnings(category=ScrapyDeprecationWarning):
self.exporter = PythonItemExporter(**exporter_kwargs)

Expand Down
5 changes: 3 additions & 2 deletions sh_scrapy/hsref.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"""
import os
from codecs import decode
from sh_scrapy.compat import to_native_str

from scrapy.utils.python import to_unicode


class _HubstorageRef(object):
Expand All @@ -24,7 +25,7 @@ def __init__(self):

@property
def auth(self):
return to_native_str(decode(os.environ['SHUB_JOBAUTH'], 'hex_codec'))
return to_unicode(decode(os.environ['SHUB_JOBAUTH'], 'hex_codec'))

@property
def endpoint(self):
Expand Down
10 changes: 5 additions & 5 deletions sh_scrapy/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import sys
import warnings

from twisted.python import log as txlog
from scrapy import __version__
from scrapy.utils.python import to_unicode
from twisted.python import log as txlog

from sh_scrapy.compat import to_native_str
from sh_scrapy.writer import pipe_writer


Expand Down Expand Up @@ -120,7 +120,7 @@ def _get_log_item(self, ev):

msg = ev.get('message')
if msg:
msg = to_native_str(msg[0])
msg = to_unicode(msg[0])

failure = ev.get('failure', None)
if failure:
Expand Down Expand Up @@ -156,7 +156,7 @@ def _logprefixed(self, msg):
_logfn(message=self.prefix + msg, level=self.loglevel)

def write(self, data):
data = to_native_str(data, self.encoding)
data = to_unicode(data, self.encoding)

d = (self.buf + data).split('\n')
self.buf = d[-1]
Expand All @@ -166,5 +166,5 @@ def write(self, data):

def writelines(self, lines):
for line in lines:
line = to_native_str(line, self.encoding)
line = to_unicode(line, self.encoding)
self._logprefixed(line)
9 changes: 5 additions & 4 deletions sh_scrapy/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import sys
import logging
import tempfile
from sh_scrapy.compat import to_native_str, is_string

from scrapy.settings import Settings
from scrapy.utils.misc import load_object
from scrapy.utils.project import get_project_settings
from scrapy.utils.python import to_unicode


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -54,8 +55,8 @@ def __init__(self):

def set(self, name, value, priority='project'):
super(EntrypointSettings, self).set(
to_native_str(name),
to_native_str(value) if is_string(value) else value,
to_unicode(name),
value if isinstance(value, str) else value,
priority=priority)

def copy_to_dict(self):
Expand Down Expand Up @@ -110,7 +111,7 @@ def _update_old_classpaths(settings):
elif not isinstance(setting_value, dict):
continue
for path in setting_value.keys():
if not is_string(path):
if not isinstance(path, str):
continue
updated_path = update_classpath(path)
if updated_path != path:
Expand Down
7 changes: 4 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
import tempfile

import pytest
from scrapy.utils.python import to_unicode, to_bytes

TEMP_DIR = tempfile.mkdtemp()
SHUB_FIFO_PATH = os.path.join(TEMP_DIR, 'scrapinghub')
os.environ['SHUB_FIFO_PATH'] = SHUB_FIFO_PATH

from sh_scrapy.writer import pipe_writer
from sh_scrapy.compat import to_native_str, to_bytes
from sh_scrapy.writer import pipe_writer # should go after setting SHUB_FIFO_PATH

TEST_AUTH = to_native_str(codecs.encode(to_bytes('1/2/3:authstr'), 'hex_codec'))

TEST_AUTH = to_unicode(codecs.encode(to_bytes('1/2/3:authstr'), 'hex_codec'))


@pytest.fixture(scope='session', autouse=True)
Expand Down
68 changes: 59 additions & 9 deletions tests/test_compat.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,82 @@
import warnings

import pytest
from sh_scrapy.compat import to_bytes
from sh_scrapy.compat import to_unicode
from scrapy.exceptions import ScrapyDeprecationWarning

from sh_scrapy.compat import is_string, to_bytes, to_unicode, to_native_str


# test deprecation messages

def test_deprecated_is_string():
with warnings.catch_warnings(record=True) as caught:
assert is_string("foo")
assert not is_string(b"foo")
assert not is_string(1)
assert (
"is_string(var) is deprecated, please use isinstance(var, str) instead"
== str(caught[0].message)
)
assert caught[0].category is ScrapyDeprecationWarning


def test_deprecated_to_unicode():
with warnings.catch_warnings(record=True) as caught:
assert to_unicode("foo") == "foo"
assert to_unicode(b"foo") == "foo"
assert (
"Call to deprecated function to_unicode. Use scrapy.utils.python.to_unicode instead."
== str(caught[0].message)
)
assert caught[0].category is ScrapyDeprecationWarning


def test_deprecated_to_native_str():
with warnings.catch_warnings(record=True) as caught:
assert to_native_str("foo") == "foo"
assert to_native_str(b"foo") == "foo"
assert (
"Call to deprecated function to_native_str. Use scrapy.utils.python.to_unicode instead."
== str(caught[0].message)
)
assert caught[0].category is ScrapyDeprecationWarning


def test_deprecated_to_bytes():
with warnings.catch_warnings(record=True) as caught:
assert to_bytes("foo") == b"foo"
assert to_bytes(b"foo") == b"foo"
assert (
"Call to deprecated function to_bytes. Use scrapy.utils.python.to_bytes instead."
== str(caught[0].message)
)
assert caught[0].category is ScrapyDeprecationWarning

# Testing to_unicode conversion

# Testing to_unicode conversion

def test_to_unicode_an_utf8_encoded_string_to_unicode():
def test_to_str_an_utf8_encoded_string_to_str():
assert to_unicode(b'lel\xc3\xb1e') == u'lel\xf1e'


def test_to_unicode_a_latin_1_encoded_string_to_unicode():
def test_to_str_a_latin_1_encoded_string_to_str():
assert to_unicode(b'lel\xf1e', 'latin-1') == u'lel\xf1e'


def test_to_unicode_a_unicode_to_unicode_should_return_the_same_object():
def test_to_str_a_unicode_to_str_should_return_the_same_object():
assert to_unicode(u'\xf1e\xf1e\xf1e') == u'\xf1e\xf1e\xf1e'


def test_to_unicode_a_strange_object_should_raise_TypeError():
def test_to_str_a_strange_object_should_raise_TypeError():
with pytest.raises(TypeError) as excinfo:
to_unicode(123)


def test_to_unicode_errors_argument():
def test_to_str_errors_argument():
assert to_unicode(b'a\xedb', 'utf-8', errors='replace') == u'a\ufffdb'

# Testing to_unicode conversion

# Testing to_bytes conversion

def test_to_bytes_a_unicode_object_to_an_utf_8_encoded_string():
assert to_bytes(u'\xa3 49') == b'\xc2\xa3 49'
Expand Down
8 changes: 4 additions & 4 deletions tests/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
import tempfile

from sh_scrapy.compat import to_bytes, to_native_str
from scrapy.utils.python import to_bytes, to_unicode

from sh_scrapy.env import _jobauth
from sh_scrapy.env import _jobname
Expand Down Expand Up @@ -72,7 +72,7 @@ def test_jobname():
def test_jobauth():
msg = {'key': '1/2/3', 'auth': 'authstring'}
expected = codecs.encode(to_bytes('1/2/3:authstring'), 'hex_codec')
assert _jobauth(msg) == to_native_str(expected)
assert _jobauth(msg) == to_unicode(expected)


def test_get_args_and_env_run_spider():
Expand All @@ -88,7 +88,7 @@ def test_get_args_and_env_run_spider():
assert result[1] == {'SCRAPY_JOB': '1/2/3',
'SCRAPY_PROJECT_ID': '1',
'SCRAPY_SPIDER': 'test',
'SHUB_JOBAUTH': to_native_str(expected_auth),
'SHUB_JOBAUTH': to_unicode(expected_auth),
'SHUB_JOBKEY': '1/2/3',
'SHUB_JOBNAME': 'test',
'SHUB_JOB_TAGS': '',
Expand All @@ -109,7 +109,7 @@ def test_get_args_and_env_run_script():
assert len(result) == 2
assert result[0] == ['custom.py', 'arg1']
assert result[1] == {
'SHUB_JOBAUTH': to_native_str(expected_auth),
'SHUB_JOBAUTH': to_unicode(expected_auth),
'SHUB_JOBKEY': '1/2/3',
'SHUB_JOBNAME': 'custom.py',
'SHUB_JOB_TAGS': ''}
Expand Down
Loading