Skip to content

Commit

Permalink
Merge branch 'release/0.2.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
turicas committed Aug 10, 2016
2 parents f2b7fc3 + 7188f71 commit 0263e3c
Show file tree
Hide file tree
Showing 35 changed files with 1,200 additions and 569 deletions.
45 changes: 45 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,50 @@
# rows' Log of Changes

## Version `0.2.1`

**Released on: 2016-08-10**

### Backwards Incompatible Changes

- `rows.utils.export_to_uri` signature is now like `rows.export_to_*` (first
the `rows.Table` object, then the URI)
- Changed default table name in `import_from_sqlite` and `export_to_sqlite`
(from `rows` and `rows_{number}` to `table{number}`)


### Bug Fixes

- [#170](https://github.com/turicas/rows/issues/170) (SQLite plugin) Error
converting `int` and `float` when value is `None`.
- [#168](https://github.com/turicas/rows/issues/168) Use `Field.serialize`
if does not know the field type (affecting: XLS, XLSX and SQLite plugins).
- [#167](https://github.com/turicas/rows/issues/167) Use more data to detect
dialect, delimit the possible delimiters and fallback to excel if can't
detect.
- [#176](https://github.com/turicas/rows/issues/176) Problem using quotes on
CSV plugin.
- [#179](https://github.com/turicas/rows/issues/179) Fix double underscore
problem on `rows.utils.slug`
- [#175](https://github.com/turicas/rows/issues/175) Fix `None`
serialization/deserialization in all plugins (and also field types)
- [#172](https://github.com/turicas/rows/issues/172) Expose all tables in `rows
query` for SQLite databases
- Fix `examples/cli/convert.sh` (missing `-`)
- Avoids SQL injection in table name


### Enhancements and Refactorings

- Refactor `rows.utils.import_from_uri`
- Encoding and file type are better detected on `rows.utils.import_from_uri`
- Added helper functions to `rows.utils` regarding encoding and file
type/plugin detection
- There's a better description of plugin metadata (MIME types accepted) on
`rows.utils` (should be refactored to be inside each plugin)
- Moved `slug` and `ipartition` functions to `rows.plugins.utils`
- Optimize `rows query` when using only one SQLite source


## Version `0.2.0`

**Released on: 2016-07-15**
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -342,10 +342,10 @@ file format you want. Currently we have the following plugins:
installed by default)
- TXT: use `rows.export_to_txt` (no dependencies)
- JSON: use `rows.import_from_json` and `rows.export_to_json` (no dependencies)
- HTML: use `rows.import_from_html` and `rows.export_to_html` (denpendencies
- HTML: use `rows.import_from_html` and `rows.export_to_html` (dependencies
must be installed with `pip install rows[html]`)
- XPath: use `rows.import_from_xpath` passing the following arguments:
`filename_or_fobj`, `rows_xpath` and `fields_xpath` (denpendencies must be
`filename_or_fobj`, `rows_xpath` and `fields_xpath` (dependencies must be
installed with `pip install rows[xpath]`) -- see an example in
`examples/library/ecuador_radiodifusoras.py`.
- Parquet: use `rows.import_from_parquet` passing the filename (dependencies
Expand Down
5 changes: 3 additions & 2 deletions examples/cli/convert.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ rows convert --input-locale=$LOCALE --input-encoding=utf-8 $URL $FILENAME.csv
rows convert $FILENAME.csv $FILENAME.html
rows convert $FILENAME.html $FILENAME.xls
rows convert $FILENAME.xls $FILENAME.txt
rows convert $FILENAME.txt $FILENAME.sqlite
rows convert $FILENAME.txt $FILENAME.xlsx
rows convert $FILENAME.xlsx $FILENAME.sqlite
rows convert $FILENAME.sqlite $FILENAME.json
# When converting to JSON we cannot guarantee field order!

# `convert` can also sort the data before saving it into the CSV file
rows convert --input-encoding=utf-8 --input-locale=$LOCALE \
rows convert --input-locale=$LOCALE --input-encoding=utf-8 \
--order-by=^pessoas $URL $FILENAME-sorted.csv
2 changes: 1 addition & 1 deletion rows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@
pass


__version__ = '0.2.0'
__version__ = '0.2.1'
88 changes: 55 additions & 33 deletions rows/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding: utf-8

# Copyright 2014-2015 Álvaro Justen <https://github.com/turicas/rows/>
# Copyright 2014-2016 Álvaro Justen <https://github.com/turicas/rows/>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand All @@ -17,11 +17,13 @@

# TODO: define exit codes
# TODO: move default options to base command
# TODO: may move all 'destination' to '--output'
# TODO: test this whole module
# TODO: add option to pass 'create_table' options in command-line (like force
# fields)

import shlex
import sqlite3
import sys

from io import BytesIO
Expand All @@ -31,7 +33,8 @@

import rows

from rows.utils import import_from_uri, export_to_uri
from rows.utils import (detect_source, export_to_uri, import_from_source,
import_from_uri)
from rows.plugins.utils import make_header


Expand Down Expand Up @@ -68,13 +71,7 @@ def _get_field_names(field_names, table_field_names, permit_not=False):
click.echo('Table does not have fields: {}'.format(missing), err=True)
sys.exit(1)
else:
result = []
for field_name in table_field_names:
if field_name in new_field_names:
result.append(field_name)
elif '^' + field_name in new_field_names:
result.append('^' + field_name)
return result
return new_field_names


@click.group()
Expand Down Expand Up @@ -115,9 +112,9 @@ def convert(input_encoding, output_encoding, input_locale, output_locale,

if output_locale is not None:
with rows.locale_context(output_locale):
export_to_uri(destination, table, encoding=output_encoding)
export_to_uri(table, destination, encoding=output_encoding)
else:
export_to_uri(destination, table, encoding=output_encoding)
export_to_uri(table, destination, encoding=output_encoding)


@cli.command(help='Join tables from `source` URIs using `key(s)` to group '
Expand Down Expand Up @@ -159,9 +156,9 @@ def join(input_encoding, output_encoding, input_locale, output_locale,

if output_locale is not None:
with rows.locale_context(output_locale):
export_to_uri(destination, result, encoding=output_encoding)
export_to_uri(result, destination, encoding=output_encoding)
else:
export_to_uri(destination, result, encoding=output_encoding)
export_to_uri(result, destination, encoding=output_encoding)


@cli.command(name='sum',
Expand Down Expand Up @@ -201,9 +198,9 @@ def sum_(input_encoding, output_encoding, input_locale, output_locale,

if output_locale is not None:
with rows.locale_context(output_locale):
export_to_uri(destination, result, encoding=output_encoding)
export_to_uri(result, destination, encoding=output_encoding)
else:
export_to_uri(destination, result, encoding=output_encoding)
export_to_uri(result, destination, encoding=output_encoding)


@cli.command(name='print', help='Print a table')
Expand All @@ -225,7 +222,6 @@ def print_(input_encoding, output_encoding, input_locale, output_locale,
err=True)
sys.exit(20)

# TODO: may use sys.stdout.encoding if output_file = '-'
output_encoding = output_encoding or sys.stdout.encoding or \
DEFAULT_OUTPUT_ENCODING

Expand All @@ -246,6 +242,8 @@ def print_(input_encoding, output_encoding, input_locale, output_locale,
if fields_except is not None:
fields_except = _get_field_names(fields_except, table_field_names)

# TODO: should set `export_fields = None` if `--fields` and
# `--fields-except` are `None`
if fields is not None and fields_except is None:
export_fields = fields
elif fields is not None and fields_except is not None:
Expand Down Expand Up @@ -289,35 +287,59 @@ def print_(input_encoding, output_encoding, input_locale, output_locale,
@click.argument('query', required=True)
@click.argument('sources', nargs=-1, required=True)
def query(input_encoding, output_encoding, input_locale, output_locale,
verify_ssl, fields, output, query, sources):
verify_ssl, fields, output, query, sources):

# TODO: may move all 'destination' to '--output'
# TODO: may use sys.stdout.encoding if output_file = '-'
output_encoding = output_encoding or sys.stdout.encoding or \
DEFAULT_OUTPUT_ENCODING

if not query.lower().startswith('select'):
field_names = '*' if fields is None else fields
table_names = ', '.join(['table{}'.format(index)
for index in range(1, len(sources) + 1)])
query = 'SELECT {} FROM {} WHERE {}'.format(field_names, table_names,
query)
if input_locale is not None:
with rows.locale_context(input_locale):

if len(sources) == 1:
source = detect_source(sources[0], verify_ssl=verify_ssl)

if source.plugin_name != 'sqlite':
if input_locale is not None:
with rows.locale_context(input_locale):
table = import_from_source(source, DEFAULT_INPUT_ENCODING)
else:
table = import_from_source(source, DEFAULT_INPUT_ENCODING)

sqlite_connection = sqlite3.Connection(':memory:')
rows.export_to_sqlite(table,
sqlite_connection,
table_name='table1')
result = rows.import_from_sqlite(sqlite_connection, query=query)

else:
# Optimization: query the SQLite database directly
result = import_from_source(source,
DEFAULT_INPUT_ENCODING,
query=query)

else:
if input_locale is not None:
with rows.locale_context(input_locale):
tables = [_import_table(source, encoding=input_encoding,
verify_ssl=verify_ssl)
for source in sources]
else:
tables = [_import_table(source, encoding=input_encoding,
verify_ssl=verify_ssl)
for source in sources]
else:
tables = [_import_table(source, encoding=input_encoding,
verify_ssl=verify_ssl)
for source in sources]
for source in sources]

sqlite_connection = rows.export_to_sqlite(tables[0], ':memory:',
table_name='table1')
for index, table in enumerate(tables[1:], start=2):
rows.export_to_sqlite(table, sqlite_connection,
table_name='table{}'.format(index))
sqlite_connection = sqlite3.Connection(':memory:')
for index, table in enumerate(tables, start=1):
rows.export_to_sqlite(table,
sqlite_connection,
table_name='table{}'.format(index))

result = rows.import_from_sqlite(sqlite_connection, query=query)
result = rows.import_from_sqlite(sqlite_connection, query=query)

if output is None:
fobj = BytesIO()
Expand All @@ -331,9 +353,9 @@ def query(input_encoding, output_encoding, input_locale, output_locale,
else:
if output_locale is not None:
with rows.locale_context(output_locale):
export_to_uri(output, result, encoding=output_encoding)
export_to_uri(result, output, encoding=output_encoding)
else:
export_to_uri(output, result, encoding=output_encoding)
export_to_uri(result, output, encoding=output_encoding)


if __name__ == '__main__':
Expand Down
16 changes: 4 additions & 12 deletions rows/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,7 @@ def serialize(cls, value, *args, **kwargs):
@classmethod
def deserialize(cls, value, *args, **kwargs):
value = super(DecimalField, cls).deserialize(value)
if is_null(value):
return None
elif isinstance(value, cls.TYPE):
if value is None or isinstance(value, cls.TYPE):
return value
elif type(value) in (int, float):
return Decimal(str(value))
Expand Down Expand Up @@ -370,11 +368,7 @@ class TextField(Field):

@classmethod
def deserialize(cls, value, *args, **kwargs):
value = super(TextField, cls).deserialize(value)
if value is None:
return None

if isinstance(value, cls.TYPE):
if value is None or isinstance(value, cls.TYPE):
return value
elif 'encoding' in kwargs:
return as_string(value).decode(kwargs['encoding'])
Expand Down Expand Up @@ -428,15 +422,12 @@ def deserialize(cls, value, *args, **kwargs):
if isinstance(value, types.UnicodeType):
value = value.encode('utf-8')

if value is None:
return None
elif isinstance(value, cls.TYPE):
if value is None or isinstance(value, cls.TYPE):
return value
else:
return json.loads(value)



AVAILABLE_FIELD_TYPES = [locals()[element] for element in __all__
if 'Field' in element and element != 'Field']

Expand Down Expand Up @@ -472,6 +463,7 @@ def detect_types(field_names, field_values, field_types=AVAILABLE_FIELD_TYPES,
*args, **kwargs):
"""Where the magic happens"""

# TODO: look strategy of csv.Sniffer.has_header
# TODO: may receive 'type hints'
# TODO: should support receiving unicode objects directly
# TODO: should expect data in unicode or will be able to use binary data?
Expand Down
22 changes: 17 additions & 5 deletions rows/plugins/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import decimal
import json

from rows.fields import DateField, DatetimeField, DecimalField, PercentField
from rows import fields
from rows.plugins.utils import (create_table, export_data,
get_filename_and_fobj, prepare_to_export)

Expand All @@ -42,10 +42,21 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):


def _convert(value, field_type, *args, **kwargs):
if field_type in (DateField, DatetimeField, DecimalField, PercentField):
value = field_type.serialize(value, *args, **kwargs)

return value
if value is None or field_type in (
fields.BinaryField,
fields.BoolField,
fields.FloatField,
fields.IntegerField,
fields.JSONField,
fields.TextField,
):
# If the field_type is one of those, the value can be passed directly
# to the JSON encoder
return value
else:
# The field type is not represented natively in JSON, then it needs to
# be serialized (converted to a string)
return field_type.serialize(value, *args, **kwargs)


def export_to_json(table, filename_or_fobj=None, encoding='utf-8', indent=None,
Expand All @@ -61,6 +72,7 @@ def export_to_json(table, filename_or_fobj=None, encoding='utf-8', indent=None,

result = json.dumps(data, indent=indent)
if indent is not None:
# clean up empty spaces at the end of lines
result = '\n'.join(line.rstrip() for line in result.splitlines())

return export_data(filename_or_fobj, result)
14 changes: 10 additions & 4 deletions rows/plugins/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,21 @@
from rows.plugins.utils import create_table, get_filename_and_fobj, serialize


def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None, *args,
**kwargs):
sniffer = unicodecsv.Sniffer()

def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None,
sample_size=8192, *args, **kwargs):
'Import data from a CSV file'

filename, fobj = get_filename_and_fobj(filename_or_fobj)

if dialect is None:
sample = fobj.readline().decode(encoding)
dialect = unicodecsv.Sniffer().sniff(sample)
sample = fobj.read(sample_size)
try:
dialect = sniffer.sniff(sample, delimiters=(',', ';', '\t'))
except unicodecsv.Error:
# Could not detect dialect, fall back to 'excel'
dialect = unicodecsv.excel
fobj.seek(0)

kwargs['encoding'] = encoding
Expand Down
Loading

0 comments on commit 0263e3c

Please sign in to comment.