From 821e5a993c66a4075f4c383c15f09e29b8df0f5f Mon Sep 17 00:00:00 2001 From: "sx.slex@gmail.com" Date: Tue, 1 Sep 2015 12:54:26 -0300 Subject: [PATCH 1/9] add windows support --- rows/localization.py | 2 +- rows/plugins/txt.py | 1 - rows/utils.py | 16 +++++++---- tests/tests_fields.py | 55 +++++++++++++++++++++++++------------ tests/tests_localization.py | 7 ++++- 5 files changed, 55 insertions(+), 26 deletions(-) diff --git a/rows/localization.py b/rows/localization.py index dfdf3472..118e979c 100644 --- a/rows/localization.py +++ b/rows/localization.py @@ -13,7 +13,7 @@ def locale_context(name, category=locale.LC_ALL): old_name = locale.getlocale(category) - if type(name) is types.UnicodeType: + if isinstance(name, types.UnicodeType) and '.' in name: name = name.split('.') locale.setlocale(category, name) rows.fields.SHOULD_NOT_USE_LOCALE = False diff --git a/rows/plugins/txt.py b/rows/plugins/txt.py index be18ee5e..98c58256 100644 --- a/rows/plugins/txt.py +++ b/rows/plugins/txt.py @@ -20,7 +20,6 @@ from rows.operations import serialize from rows.utils import get_filename_and_fobj - DASH, PLUS, PIPE = '-', '+', '|' def _max_column_sizes(table, encoding, *args, **kwargs): diff --git a/rows/utils.py b/rows/utils.py index 1df75f71..f933db31 100644 --- a/rows/utils.py +++ b/rows/utils.py @@ -21,7 +21,11 @@ from unicodedata import normalize -import magic +try: + import magic +except ImportError: + magic = None + import requests import rows @@ -103,7 +107,6 @@ def create_table(data, meta=None, force_headers=None, fields=None, else: header = make_header(fields.keys()) - # TODO: may reuse max_columns from html max_columns = max(len(row) for row in table_rows) assert len(fields) == max_columns @@ -137,9 +140,12 @@ def download_file(uri): content_type = response.headers['content-type'] plugin_name = content_type.split('/')[-1] except (KeyError, IndexError): - with magic.Magic() as file_type_guesser: - file_type = file_type_guesser.id_buffer(content) - plugin_name = file_type.strip().split()[0] + if magic: + with magic.Magic() as file_type_guesser: + file_type = file_type_guesser.id_buffer(content) + plugin_name = file_type.strip().split()[0] + else: + plugin_name = uri.split('/')[-1].split('.')[-1].lower() tmp = tempfile.NamedTemporaryFile() filename = '{}.{}'.format(tmp.name, plugin_name) diff --git a/tests/tests_fields.py b/tests/tests_fields.py index ef56c96a..e2693b88 100644 --- a/tests/tests_fields.py +++ b/tests/tests_fields.py @@ -25,9 +25,14 @@ from decimal import Decimal import rows +import platform from rows import fields +if platform.system() == 'Windows': + locale_name = str('ptb_bra') +else: + locale_name = 'pt_BR.UTF-8' class FieldsTestCase(unittest.TestCase): @@ -94,7 +99,7 @@ def test_IntegerField(self): types.UnicodeType) self.assertEqual(fields.IntegerField.deserialize(None), None) - with rows.locale_context('pt_BR.UTF-8'): + with rows.locale_context(locale_name): self.assertEqual(fields.IntegerField.serialize(42000), '42000') self.assertIs(type(fields.IntegerField.serialize(42000)), types.UnicodeType) @@ -121,7 +126,7 @@ def test_FloatField(self): self.assertIs(type(fields.FloatField.serialize(42.0)), types.UnicodeType) - with rows.locale_context('pt_BR.UTF-8'): + with rows.locale_context(locale_name): self.assertEqual(fields.FloatField.serialize(42000.0), '42000,000000') self.assertIs(type(fields.FloatField.serialize(42000.0)), @@ -152,9 +157,11 @@ def test_DecimalField(self): Decimal('21.21657469231')) self.assertEqual(fields.DecimalField.deserialize(None), None) - with rows.locale_context('pt_BR.UTF-8'): - self.assertEqual(types.UnicodeType, - type(fields.DecimalField.serialize(deserialized))) + with rows.locale_context(locale_name): + self.assertEqual( + types.UnicodeType, + type(fields.DecimalField.serialize(deserialized)) + ) self.assertEqual(fields.DecimalField.serialize(Decimal('4200')), '4200') self.assertEqual(fields.DecimalField.serialize(Decimal('42.0')), @@ -163,9 +170,13 @@ def test_DecimalField(self): '42000,0') self.assertEqual(fields.DecimalField.deserialize('42.000,00'), Decimal('42000.00')) - self.assertEqual(fields.DecimalField.serialize(Decimal('42000.0'), - grouping=True), - '42.000,0') + self.assertEqual( + fields.DecimalField.serialize( + Decimal('42000.0'), + grouping=True + ), + '42.000,0' + ) def test_PercentField(self): deserialized = Decimal('0.42010') @@ -186,10 +197,11 @@ def test_PercentField(self): self.assertEqual(fields.PercentField.serialize(Decimal('42.010')), '4201.0%') self.assertEqual(fields.PercentField.serialize(Decimal('0.01')), '1%') - - with rows.locale_context('pt_BR.UTF-8'): - self.assertEqual(type(fields.PercentField.serialize(deserialized)), - types.UnicodeType) + with rows.locale_context(locale_name): + self.assertEqual( + type(fields.PercentField.serialize(deserialized)), + types.UnicodeType + ) self.assertEqual(fields.PercentField.serialize(Decimal('42.0')), '4200%') self.assertEqual(fields.PercentField.serialize(Decimal('42000.0')), @@ -256,9 +268,13 @@ def test_UnicodeField(self): types.UnicodeType) self.assertIs(type(fields.UnicodeField.deserialize('test')), fields.UnicodeField.TYPE) - self.assertEqual(fields.UnicodeField.deserialize('Álvaro'.encode('utf-8'), - encoding='utf-8'), - 'Álvaro') + self.assertEqual( + fields.UnicodeField.deserialize( + 'Álvaro'.encode('utf-8'), + encoding='utf-8' + ), + 'Álvaro' + ) self.assertEqual(fields.UnicodeField.deserialize('Álvaro'), 'Álvaro') self.assertIs(fields.UnicodeField.deserialize(None), None) @@ -286,11 +302,14 @@ def setUp(self): 'date_column': fields.DateField, 'datetime_column': fields.DatetimeField, 'unicode_column': fields.UnicodeField, - 'null_column': fields.ByteField,} + 'null_column': fields.ByteField, } def test_detect_types_utf8(self): - result = fields.detect_types(self.fields, self.data, - encoding='utf-8') + result = fields.detect_types( + self.fields, + self.data, + encoding='utf-8' + ) self.assertEqual(type(result), collections.OrderedDict) self.assertEqual(result.keys(), self.fields) self.assertDictEqual(dict(result), self.expected) diff --git a/tests/tests_localization.py b/tests/tests_localization.py index d5a027cb..f7a5923b 100644 --- a/tests/tests_localization.py +++ b/tests/tests_localization.py @@ -18,6 +18,7 @@ from __future__ import unicode_literals import unittest +import platform import rows import rows.fields @@ -33,6 +34,10 @@ def test_locale_context_present_in_main_namespace(self): def test_locale_context(self): self.assertTrue(rows.fields.SHOULD_NOT_USE_LOCALE) - with locale_context('pt_BR.UTF-8'): + if platform.system() == 'Windows': + name = str('ptb_bra') + else: + name = 'pt_BR.UTF-8' + with locale_context(name): self.assertFalse(rows.fields.SHOULD_NOT_USE_LOCALE) self.assertTrue(rows.fields.SHOULD_NOT_USE_LOCALE) From 5a62a26342d60b9402f80361cf2edc10d4731569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?= Date: Wed, 2 Sep 2015 20:25:50 -0300 Subject: [PATCH 2/9] Fix localization name --- rows/localization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rows/localization.py b/rows/localization.py index 118e979c..0d528fe2 100644 --- a/rows/localization.py +++ b/rows/localization.py @@ -13,8 +13,8 @@ def locale_context(name, category=locale.LC_ALL): old_name = locale.getlocale(category) - if isinstance(name, types.UnicodeType) and '.' in name: - name = name.split('.') + if isinstance(name, types.UnicodeType): + name = str(name) locale.setlocale(category, name) rows.fields.SHOULD_NOT_USE_LOCALE = False try: From 7cfc4b8c690b2c21ad9055bcadef8252b6c479cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?= Date: Wed, 2 Sep 2015 20:26:05 -0300 Subject: [PATCH 3/9] Force default locales to be POSIX --- rows/cli.py | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/rows/cli.py b/rows/cli.py index 758ebb9f..29f7e1f1 100755 --- a/rows/cli.py +++ b/rows/cli.py @@ -26,22 +26,25 @@ from rows.utils import import_from_uri, export_to_uri +DEFAULT_INPUT_ENCODING = 'utf-8' +DEFAULT_OUTPUT_ENCODING = 'utf-8' +DEFAULT_INPUT_LOCALE = 'POSIX' +DEFAULT_OUTPUT_LOCALE = 'POSIX' + @click.group() def cli(): pass @cli.command(help='Convert table on `source` URI to `destination`') -@click.option('--input-encoding', default='utf-8') -@click.option('--output-encoding', default='utf-8') -@click.option('--input-locale', default='en_US.UTF-8') -@click.option('--output-locale', default='en_US.UTF-8') +@click.option('--input-encoding', default=DEFAULT_INPUT_ENCODING) +@click.option('--output-encoding', default=DEFAULT_OUTPUT_ENCODING) +@click.option('--input-locale', default=DEFAULT_INPUT_LOCALE) +@click.option('--output-locale', default=DEFAULT_OUTPUT_LOCALE) @click.argument('source') @click.argument('destination') def convert(input_encoding, output_encoding, input_locale, output_locale, source, destination): - input_locale = input_locale.split('.') - output_locale = output_locale.split('.') with rows.locale_context(input_locale): table = import_from_uri(source) @@ -51,18 +54,16 @@ def convert(input_encoding, output_encoding, input_locale, output_locale, @cli.command(help='Join tables from `source` URIs using `key(s)` to group rows and save into `destination`') -@click.option('--input-encoding', default='utf-8') -@click.option('--output-encoding', default='utf-8') -@click.option('--input-locale', default='en_US.UTF-8') -@click.option('--output-locale', default='en_US.UTF-8') +@click.option('--input-encoding', default=DEFAULT_INPUT_ENCODING) +@click.option('--output-encoding', default=DEFAULT_OUTPUT_ENCODING) +@click.option('--input-locale', default=DEFAULT_INPUT_LOCALE) +@click.option('--output-locale', default=DEFAULT_OUTPUT_LOCALE) @click.argument('keys') @click.argument('sources', nargs=-1, required=True) @click.argument('destination') def join(input_encoding, output_encoding, input_locale, output_locale, keys, sources, destination): keys = [key.strip() for key in keys.split(',')] - input_locale = input_locale.split('.') - output_locale = output_locale.split('.') with rows.locale_context(input_locale): tables = [import_from_uri(source) for source in sources] @@ -74,17 +75,15 @@ def join(input_encoding, output_encoding, input_locale, output_locale, keys, @cli.command(help='Sort from `source` by `key(s)` and save into `destination`') -@click.option('--input-encoding', default='utf-8') -@click.option('--output-encoding', default='utf-8') -@click.option('--input-locale', default='en_US.UTF-8') -@click.option('--output-locale', default='en_US.UTF-8') +@click.option('--input-encoding', default=DEFAULT_INPUT_ENCODING) +@click.option('--output-encoding', default=DEFAULT_OUTPUT_ENCODING) +@click.option('--input-locale', default=DEFAULT_INPUT_LOCALE) +@click.option('--output-locale', default=DEFAULT_OUTPUT_LOCALE) @click.argument('key') @click.argument('source') @click.argument('destination') def sort(input_encoding, output_encoding, input_locale, output_locale, key, source, destination): - input_locale = input_locale.split('.') - output_locale = output_locale.split('.') key = key.replace('^', '-') with rows.locale_context(input_locale): @@ -96,16 +95,14 @@ def sort(input_encoding, output_encoding, input_locale, output_locale, key, @cli.command(help='Sum tables from `source` URIs and save into `destination`') -@click.option('--input-encoding', default='utf-8') -@click.option('--output-encoding', default='utf-8') -@click.option('--input-locale', default='en_US.UTF-8') -@click.option('--output-locale', default='en_US.UTF-8') +@click.option('--input-encoding', default=DEFAULT_INPUT_ENCODING) +@click.option('--output-encoding', default=DEFAULT_OUTPUT_ENCODING) +@click.option('--input-locale', default=DEFAULT_INPUT_LOCALE) +@click.option('--output-locale', default=DEFAULT_OUTPUT_LOCALE) @click.argument('sources', nargs=-1, required=True) @click.argument('destination') def sum(input_encoding, output_encoding, input_locale, output_locale, sources, destination): - input_locale = input_locale.split('.') - output_locale = output_locale.split('.') with rows.locale_context(input_locale): tables = [import_from_uri(source) for source in sources] From db2f37a32293b5f901853141e4c85ca85d4ecc84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?= Date: Thu, 3 Sep 2015 06:59:31 -0300 Subject: [PATCH 4/9] Do not set locale if already set --- rows/localization.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rows/localization.py b/rows/localization.py index 0d528fe2..5fe0f3ea 100644 --- a/rows/localization.py +++ b/rows/localization.py @@ -13,12 +13,17 @@ def locale_context(name, category=locale.LC_ALL): old_name = locale.getlocale(category) + if None not in old_name: + old_name = '.'.join(old_name) if isinstance(name, types.UnicodeType): name = str(name) - locale.setlocale(category, name) + + if old_name != name: + locale.setlocale(category, name) rows.fields.SHOULD_NOT_USE_LOCALE = False try: yield finally: - locale.setlocale(category, old_name) + if old_name != name: + locale.setlocale(category, old_name) rows.fields.SHOULD_NOT_USE_LOCALE = True From 61dcc73edde2d40d3706add5c2874cb46b50128a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?= Date: Thu, 3 Sep 2015 06:59:54 -0300 Subject: [PATCH 5/9] Set 'C' as default locale --- rows/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rows/cli.py b/rows/cli.py index 29f7e1f1..ff54f8c7 100755 --- a/rows/cli.py +++ b/rows/cli.py @@ -28,8 +28,8 @@ DEFAULT_INPUT_ENCODING = 'utf-8' DEFAULT_OUTPUT_ENCODING = 'utf-8' -DEFAULT_INPUT_LOCALE = 'POSIX' -DEFAULT_OUTPUT_LOCALE = 'POSIX' +DEFAULT_INPUT_LOCALE = 'C' +DEFAULT_OUTPUT_LOCALE = 'C' @click.group() def cli(): From f8c258e85cd4877d548beb714b82892b778a4de1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?= Date: Thu, 3 Sep 2015 07:38:38 -0300 Subject: [PATCH 6/9] Remove `filemagic` dependency It was used only in some special cases and the package is not available on Debian so keeping it would delay the rows package creation. --- requirements/production.txt | 1 - rows/utils.py | 13 +++---------- setup.py | 6 +++--- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/requirements/production.txt b/requirements/production.txt index 3cef7c0d..efcf6e4a 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -1,5 +1,4 @@ click -filemagic lxml requests unicodecsv diff --git a/rows/utils.py b/rows/utils.py index f933db31..d83782d9 100644 --- a/rows/utils.py +++ b/rows/utils.py @@ -21,11 +21,6 @@ from unicodedata import normalize -try: - import magic -except ImportError: - magic = None - import requests import rows @@ -140,12 +135,10 @@ def download_file(uri): content_type = response.headers['content-type'] plugin_name = content_type.split('/')[-1] except (KeyError, IndexError): - if magic: - with magic.Magic() as file_type_guesser: - file_type = file_type_guesser.id_buffer(content) - plugin_name = file_type.strip().split()[0] - else: + try: plugin_name = uri.split('/')[-1].split('.')[-1].lower() + except IndexError: + raise RuntimeError('Could not identify file type.') tmp = tempfile.NamedTemporaryFile() filename = '{}.{}'.format(tmp.name, plugin_name) diff --git a/setup.py b/setup.py index 6a9ce41b..1a24f253 100644 --- a/setup.py +++ b/setup.py @@ -11,15 +11,15 @@ author_email='alvarojusten@gmail.com', url='https://github.com/turicas/rows/', packages=['rows', 'rows.plugins'], - install_requires=['unicodecsv', 'click', 'filemagic', 'requests'], + install_requires=['unicodecsv', 'click', 'requests'], extras_require = { 'csv': ['unicodecsv'], 'html': ['lxml'], # apt: libxslt-dev libxml2-dev - 'cli': ['click', 'filemagic', 'requests'], + 'cli': ['click', 'requests'], 'xls': ['xlrd', 'xlwt'], 'all': ['unicodecsv', 'lxml', - 'click', 'filemagic', 'requests', + 'click', 'requests', 'xlrd', 'xlwt'], }, keywords=['tabular', 'table', 'csv', 'xls', 'html', 'rows'], From c46d05467ecd91e511abc340081174e32d430cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?= Date: Tue, 1 Sep 2015 21:15:07 -0300 Subject: [PATCH 7/9] Add log of changes --- CHANGELOG.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..8e272a81 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,15 @@ +# rows' Log of Changes + +## Version 0.2.0 + +- Add plugin JSON (thanks [@sxslex](https://github.com/sxslex)) +- Add Windows support (thanks [@sxslex](https://github.com/sxslex)) +- Add `samples` parameter to `create_table` +- Support lazy objects on `create_table` +- Refactor `export_to_txt` +- Enhance README + + +## Version 0.1.0 + +- First public release From 05c1087ee6fff739ab3006c3f3e39abb82c65e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?= Date: Thu, 3 Sep 2015 16:07:09 -0300 Subject: [PATCH 8/9] Add 0.1.0 and 0.1.1 to changelog --- CHANGELOG.md | 56 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e272a81..03cda357 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,55 @@ # rows' Log of Changes -## Version 0.2.0 +## Version `0.2.0` (under development) + +**Released on: (under development)** -- Add plugin JSON (thanks [@sxslex](https://github.com/sxslex)) -- Add Windows support (thanks [@sxslex](https://github.com/sxslex)) -- Add `samples` parameter to `create_table` -- Support lazy objects on `create_table` -- Refactor `export_to_txt` - Enhance README +- Refactor `export_to_txt` +- Support lazy objects on `create_table` +- Add `samples` parameter to `create_table` +- Add plugin JSON (thanks [@sxslex](https://github.com/sxslex)) + + +## Version `0.1.1` + +**Released on: 2015-09-03** + +- Fix code to run on Windows (thanks [@sxslex](https://github.com/sxslex)) +- Fix locale (name, default name etc.) +- Remove `filemagic` dependency (waiting for `python-magic` to be available on + PyPI) +- Write log of changes for `0.1.0` and `0.1.1` + +## Version `0.1.0` -## Version 0.1.0 +**Released on: 2015-08-29** -- First public release +- Implement `Table` and its basic methods +- Implement basic plugin support with many utilities and the following formats: + - `csv` (input/output) + - `html` (input/output) + - `txt` (output) + - `xls` (input/output) +- Implement the following field types - many of them with locale support: + - `ByteField` + - `BoolField` + - `IntegerField` + - `FloatField` + - `DecimalField` + - `PercentField` + - `DateField` + - `DatetimeField` + - `UnicodeField` +- Implement basic `Table` operations: + - `sum` + - `join` + - `transform` + - `serialize` +- Implement a command-line interface with the following subcommands: + - `convert` + - `join` + - `sort` + - `sum` +- Add examples to the repository From 9dfac85ea7e5b9043d864dec042773de2cf35d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?= Date: Thu, 3 Sep 2015 16:16:29 -0300 Subject: [PATCH 9/9] Change version number to 0.1.1 --- rows/__init__.py | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/rows/__init__.py b/rows/__init__.py index 2713d7a1..b95d19db 100644 --- a/rows/__init__.py +++ b/rows/__init__.py @@ -25,3 +25,6 @@ from rows.plugins.html import import_from_html, export_to_html except ImportError: pass + + +__version__ = '0.1.1' diff --git a/setup.py b/setup.py index 1a24f253..d8af051e 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup(name='rows', description='Import and export tabular data easily with Python', long_description='', - version='0.1.0', + version='0.1.1', author=u'Álvaro Justen', author_email='alvarojusten@gmail.com', url='https://github.com/turicas/rows/',