Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return TIMESTAMP columns as native Python datetime objects to improve support for Apache Superset #395

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0212487
return a list instead of a set on get_pk_constraint
Aymaru Feb 22, 2021
f9bd08c
Changed the sql_path
Aymaru Feb 22, 2021
66e7b51
Transform dates from crate to python datetime
Aymaru Feb 22, 2021
3dc5cb7
Datetime conversion implemented using map and generator
Aymaru Mar 2, 2021
8dfcf80
Updated tests
Aymaru Mar 2, 2021
b08cb16
Using generators to work with large datasets
Aymaru Mar 3, 2021
efe8bfb
changed tests
Aymaru Mar 3, 2021
2d1f41f
fix
Aymaru Mar 3, 2021
246d917
test
Aymaru Mar 3, 2021
94b3d1e
updated datetime transformation using generators and updated test cases
Aymaru Mar 4, 2021
85b7c7f
cleaning debug prints
Aymaru Mar 4, 2021
2cf32e2
Passing a generator of the flags instead of passing the list of values
Aymaru Mar 4, 2021
fbef448
Removed tests
Aymaru Mar 4, 2021
3b66626
Merge branch 'master' into datetime-format
Aymaru Mar 4, 2021
57ec090
updated conversion of timestamps
Aymaru Mar 30, 2021
9b82ac2
Added pandas dependency
Aymaru Mar 30, 2021
5b4d5ec
Changed pandas timestamp to python datetime && deleted pandas dependecy
Aymaru Mar 30, 2021
e9788ea
fixed - E226 missing whitespace around arithmetic operator
Aymaru Mar 30, 2021
eebd670
Changed yield value
Aymaru Mar 31, 2021
187ed3f
Adjust badges, bump SQLAlchemy test version and add "workflow_dispatch"
amotl Feb 23, 2021
e3b8d50
updated conversion of timestamps
Aymaru Mar 30, 2021
32a4826
Added pandas dependency
Aymaru Mar 30, 2021
2c9e3aa
Changed pandas timestamp to python datetime && deleted pandas dependecy
Aymaru Mar 30, 2021
2adf833
fixed - E226 missing whitespace around arithmetic operator
Aymaru Mar 30, 2021
9bc5f51
Changed yield value
Aymaru Mar 31, 2021
8e53da5
Merge branch 'master' into datetime-format
Aymaru Mar 31, 2021
5ddced3
Merge remote-tracking branch 'origin/datetime-format' into datetime-f…
Aymaru Mar 31, 2021
aaaf7ee
Validate date type in processors
Aymaru Mar 31, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: Tests

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
workflow_dispatch:
schedule:
- cron: '0 2 * * *'


jobs:
test:
name: "Python: ${{ matrix.python-version }}
SQLA: ${{ matrix.sqla-version }}
CrateDB: ${{ matrix.crate-version }}
on ${{ matrix.os }}"
runs-on: ${{ matrix.os }}
strategy:
matrix:
crate-version: [nightly]
os: [ubuntu-latest]
sqla-version: ['1.1.18', '1.2.19', '1.3.23']
python-version: [3.5, 3.6, 3.7, 3.8, 3.9]

steps:
- uses: actions/checkout@master
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python bootstrap.py

# replace SQLAlchemy version
sed -ir 's/SQLAlchemy.*/SQLAlchemy = ${{ matrix.sqla-version }}/g' versions.cfg

# replace CrateDB version
if [ ${{ matrix.crate-version }} = "nightly" ]; then
sed -ir 's/releases/releases\/nightly/g' base.cfg
sed -ir 's/crate_server.*/crate_server = latest/g' versions.cfg
else
sed -ir 's/crate-/crate_/g' base.cfg
sed -ir 's/crate_server.*/crate_server = ${{ matrix.crate-version }}/g' versions.cfg
fi

bin/buildout -n -c base.cfg

- name: Test
run: |
bin/flake8
bin/coverage run bin/test -vv1
40 changes: 39 additions & 1 deletion src/crate/client/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .exceptions import ProgrammingError
from distutils.version import StrictVersion
import warnings
from datetime import datetime

BULK_INSERT_MIN_VERSION = StrictVersion("0.42.0")

Expand Down Expand Up @@ -52,8 +53,45 @@ def execute(self, sql, parameters=None, bulk_parameters=None):

self._result = self.connection.client.sql(sql, parameters,
bulk_parameters)

if "rows" in self._result:
self.rows = iter(self._result["rows"])
transformed_result = False
if "col_types" in self._result:
transformed_result = True
self.rows = self.result_set_transformed()

if not transformed_result:
self.rows = iter(self._result["rows"])

def result_set_transformed(self):
"""
Generator that iterates over each row from the result set
"""
rows_to_convert = [True if col_type == 11 or col_type == 15 else False for col_type in
self._result["col_types"]]
for row in self._result["rows"]:
gen_flags = (flag for flag in rows_to_convert)
yield [t_row for t_row in self._transform_date_columns(row, gen_flags)]

@staticmethod
def _transform_date_columns(row, gen_flags):
"""
Generates iterates over each value from a row and converts timestamps to pandas TIMESTAMP
"""
for value in row:
try:
flag = next(gen_flags)
except StopIteration:
break

if not flag or value is None:
yield value
else:
if value < 0:
yield None
else:
value = datetime.fromtimestamp(value / 1000)
yield value

def executemany(self, sql, seq_of_parameters):
"""
Expand Down
2 changes: 1 addition & 1 deletion src/crate/client/doctests/client.txt
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ supported, all other fields are 'None'::
>>> result = cursor.fetchone()
>>> pprint(result)
['Aldebaran',
1373932800000,
datetime.datetime(2013, 7, 15, 18, 0),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This currently raises an exception on CI, see [1]. The current code expects datetime.datetime(2013, 7, 16, 0, 0) here.

[1] https://github.com/crate/crate-python/runs/2114206441?check_suite_focus=true#step:5:388

None,
None,
None,
Expand Down
3 changes: 2 additions & 1 deletion src/crate/client/doctests/http.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ Issue a select statement against our with test data pre-filled crate instance::
>>> http_client = HttpClient(crate_host)
>>> result = http_client.sql('select name from locations order by name')
>>> pprint(result)
{'cols': ['name'],
{'col_types': [4],
'cols': ['name'],
'duration': ...,
'rowcount': 13,
'rows': [['Aldebaran'],
Expand Down
2 changes: 1 addition & 1 deletion src/crate/client/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ class Client(object):
Crate connection client using CrateDB's HTTP API.
"""

SQL_PATH = '/_sql'
SQL_PATH = '/_sql?types'
"""Crate URI path for issuing SQL statements."""

retry_interval = 30
Expand Down
9 changes: 7 additions & 2 deletions src/crate/client/sqlalchemy/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"smallint": sqltypes.SmallInteger,
"timestamp": sqltypes.TIMESTAMP,
"timestamp with time zone": sqltypes.TIMESTAMP,
"timestamp without time zone": sqltypes.TIMESTAMP,
"object": Object,
"integer": sqltypes.Integer,
"long": sqltypes.NUMERIC,
Expand All @@ -64,6 +65,7 @@
TYPES_MAP["smallint_array"] = ARRAY(sqltypes.SmallInteger)
TYPES_MAP["timestamp_array"] = ARRAY(sqltypes.TIMESTAMP)
TYPES_MAP["timestamp with time zone_array"] = ARRAY(sqltypes.TIMESTAMP)
TYPES_MAP["timestamp without time zone_array"] = ARRAY(sqltypes.TIMESTAMP)
TYPES_MAP["long_array"] = ARRAY(sqltypes.NUMERIC)
TYPES_MAP["bigint_array"] = ARRAY(sqltypes.NUMERIC)
TYPES_MAP["double_array"] = ARRAY(sqltypes.DECIMAL)
Expand All @@ -75,7 +77,6 @@
except Exception:
pass


log = logging.getLogger(__name__)


Expand All @@ -91,6 +92,8 @@ def result_processor(self, dialect, coltype):
def process(value):
if not value:
return
if isinstance(value, datetime):
return value.date()
try:
return datetime.utcfromtimestamp(value / 1e3).date()
except TypeError:
Expand Down Expand Up @@ -130,6 +133,8 @@ def result_processor(self, dialect, coltype):
def process(value):
if not value:
return
if isinstance(value, datetime):
return value
try:
return datetime.utcfromtimestamp(value / 1e3)
except TypeError:
Expand Down Expand Up @@ -261,7 +266,7 @@ def get_pk_constraint(self, engine, table_name, schema=None, **kw):

def result_fun(result):
rows = result.fetchall()
return set(map(lambda el: el[0], rows))
return list(set(map(lambda el: el[0], rows)))
Comment on lines -264 to +269
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I don't know about the specific background why there has been a set used here, can you elaborate why this should now return a list?

Copy link
Member

@amotl amotl Jun 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dear @Aymaru,

apologies for the late reply. We've split off this amendment into #426. For you, it apparently solved this issue:

Changing the return type from set to list on get_pk_constraint(), solves a compatibility issue from Apache Superset that doesn't show the metadata from CrateDB tables in the SQL Lab.

With the patch at #426, we are observing a regression. Maybe due to the upgrade to SQLAlchemy 1.4, and maybe other changes in Apache Superset, this is not needed anymore?

With kind regards,
Andreas.

else:
query = """SELECT constraint_name
FROM information_schema.table_constraints
Expand Down
4 changes: 2 additions & 2 deletions src/crate/client/sqlalchemy/tests/dialect_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_pks_are_retrieved_depending_on_version_set(self):
self.engine.dialect.server_version_info = (0, 54, 0)
fake_cursor.rowcount = 1
fake_cursor.fetchone = MagicMock(return_value=[["id", "id2", "id3"]])
eq_(insp.get_pk_constraint("characters")['constrained_columns'], {"id", "id2", "id3"})
eq_(insp.get_pk_constraint("characters")['constrained_columns'], ["id", "id2", "id3"])
fake_cursor.fetchone.assert_called_once_with()
in_("information_schema.table_constraints", self.executed_statement)

Expand All @@ -76,7 +76,7 @@ def test_pks_are_retrieved_depending_on_version_set(self):
self.engine.dialect.server_version_info = (2, 3, 0)
fake_cursor.rowcount = 3
fake_cursor.fetchall = MagicMock(return_value=[["id"], ["id2"], ["id3"]])
eq_(insp.get_pk_constraint("characters")['constrained_columns'], {"id", "id2", "id3"})
eq_(insp.get_pk_constraint("characters")['constrained_columns'], ["id", "id2", "id3"])
fake_cursor.fetchall.assert_called_once_with()
in_("information_schema.key_column_usage", self.executed_statement)

Expand Down
5 changes: 3 additions & 2 deletions src/crate/client/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,12 +428,13 @@ def test_params(self):
client = Client(['127.0.0.1:4200'], error_trace=True)
parsed = urlparse(client.path)
params = parse_qs(parsed.query)
self.assertEqual(params["error_trace"], ["true"])
print(params)
self.assertEqual(params["types?error_trace"], ["true"])
client.close()

def test_no_params(self):
client = Client()
self.assertEqual(client.path, "/_sql")
self.assertEqual(client.path, "/_sql?types")
client.close()


Expand Down