diff --git a/cumulus_library/actions/exporter.py b/cumulus_library/actions/exporter.py index b62000bb..087db566 100644 --- a/cumulus_library/actions/exporter.py +++ b/cumulus_library/actions/exporter.py @@ -1,7 +1,7 @@ import pathlib -import pyarrow -from pyarrow import csv, parquet +import pandas +import rich from rich.progress import track from cumulus_library import base_utils, study_manifest @@ -24,16 +24,6 @@ def reset_counts_exports( file.unlink() -def _write_chunk(writer, chunk, arrow_schema): - writer.write( - pyarrow.Table.from_pandas( - chunk.sort_values(by=list(chunk.columns), ascending=False, na_position="first"), - preserve_index=False, - schema=arrow_schema, - ) - ) - - def export_study( config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, @@ -41,15 +31,16 @@ def export_study( data_path: pathlib.Path, archive: bool, chunksize: int = 1000000, -) -> list: +) -> None: """Exports csvs/parquet extracts of tables listed in export_list :param config: a StudyConfig object :param manifest: a StudyManifest object :keyword data_path: the path to the place on disk to save data :keyword archive: If true, get all study data and zip with timestamp :keyword chunksize: number of rows to export in a single transaction - :returns: a list of queries, (only for unit tests) """ + + skipped_tables = [] reset_counts_exports(manifest) if manifest.get_dedicated_schema(): prefix = f"{manifest.get_dedicated_schema()}." @@ -64,34 +55,32 @@ def export_study( table_list.append(study_manifest.ManifestExport(name=row[0], export_type="archive")) else: table_list = manifest.get_export_table_list() - queries = [] path = pathlib.Path(f"{data_path}/{manifest.get_study_prefix()}/") path.mkdir(parents=True, exist_ok=True) for table in track( table_list, description=f"Exporting {manifest.get_study_prefix()} data...", ): - query = f"SELECT * FROM {table.name}" # noqa: S608 - query = base_utils.update_query_if_schema_specified(query, manifest) - dataframe_chunks, db_schema = config.db.execute_as_pandas(query, chunksize=chunksize) - path.mkdir(parents=True, exist_ok=True) - arrow_schema = pyarrow.schema(config.db.col_pyarrow_types_from_sql(db_schema)) - with parquet.ParquetWriter( - f"{path}/{table.name}.{table.export_type}.parquet", arrow_schema - ) as p_writer: - with csv.CSVWriter( - f"{path}/{table.name}.{table.export_type}.csv", - arrow_schema, - write_options=csv.WriteOptions( - # Note that this quoting style is not exactly csv.QUOTE_MINIMAL - # https://github.com/apache/arrow/issues/42032 - quoting_style="needed" - ), - ) as c_writer: - for chunk in dataframe_chunks: - _write_chunk(p_writer, chunk, arrow_schema) # pragma: no cover - _write_chunk(c_writer, chunk, arrow_schema) # pragma: no cover - queries.append(query) + table.name = base_utils.update_query_if_schema_specified(table.name, manifest) + file_name = f"{table.name}.{table.export_type}.parquet" + if config.db.export_table_as_parquet(table.name, file_name, path): + parquet_path = path / file_name + + df = pandas.read_parquet(parquet_path) + df = df.sort_values( + by=list(df.columns), ascending=False, ignore_index=True, na_position="first" + ) + df.to_parquet(parquet_path) + df.to_csv( + (parquet_path).with_suffix(".csv"), + index=False, + ) + else: + skipped_tables.append(table.name) + + if len(skipped_tables) > 0: + rich.print("The following tables were empty and were not exported:") + for table in skipped_tables: + rich.print(f" - {table}") if archive: base_utils.zip_dir(path, data_path, manifest.get_study_prefix()) - return queries diff --git a/cumulus_library/databases/athena.py b/cumulus_library/databases/athena.py index 8cf1b5bc..b57f5c2b 100644 --- a/cumulus_library/databases/athena.py +++ b/cumulus_library/databases/athena.py @@ -8,11 +8,11 @@ import os import pathlib +import awswrangler import boto3 import botocore import numpy import pandas -import pyarrow import pyathena from pyathena.common import BaseCursor as AthenaCursor from pyathena.pandas.cursor import PandasCursor as AthenaPandasCursor @@ -33,6 +33,7 @@ def __init__(self, region: str, work_group: str, profile: str, schema_name: str) self.profile = profile self.schema_name = schema_name self.connection = None + self.connect_kwargs = {} def init_errors(self): # pragma: no cover return ["COLUMN_NOT_FOUND", "TABLE_NOT_FOUND"] @@ -40,22 +41,21 @@ def init_errors(self): # pragma: no cover def connect(self): # the profile may not be required, provided the above three AWS env vars # are set. If both are present, the env vars take precedence - connect_kwargs = {} if self.profile is not None: - connect_kwargs["profile_name"] = self.profile + self.connect_kwargs["profile_name"] = self.profile for aws_env_name in [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN", ]: if aws_env_val := os.environ.get(aws_env_name): - connect_kwargs[aws_env_name.lower()] = aws_env_val + self.connect_kwargs[aws_env_name.lower()] = aws_env_val self.connection = pyathena.connect( region_name=self.region, work_group=self.work_group, schema_name=self.schema_name, - **connect_kwargs, + **self.connect_kwargs, ) def cursor(self) -> AthenaCursor: @@ -96,33 +96,6 @@ def col_parquet_types_from_pandas(self, field_types: list) -> list: ) return output - def col_pyarrow_types_from_sql(self, columns: list[tuple]) -> list: - output = [] - for column in columns: - match column[1]: - case "varchar": - output.append((column[0], pyarrow.string())) - case "bigint": - output.append((column[0], pyarrow.int64())) - case "integer": - output.append((column[0], pyarrow.int64())) - case "double": - output.append((column[0], pyarrow.float64())) - # This is future proofing - we don't see this type currently. - case "decimal": - output.append( # pragma: no cover - (column[0], pyarrow.decimal128(column[4], column[5])) - ) - case "boolean": - output.append((column[0], pyarrow.bool_())) - case "date": - output.append((column[0], pyarrow.date64())) - case "timestamp": - output.append((column[0], pyarrow.timestamp("s"))) - case _: - raise errors.CumulusLibraryError(f"Unsupported SQL type '{column[1]}' found.") - return output - def upload_file( self, *, @@ -168,6 +141,45 @@ def upload_file( ) return f"s3://{bucket}/{s3_key}" + def _clean_bucket_path(self, client, bucket, res): + for file in res["Contents"]: + client.delete_object(Bucket=bucket, Key=file["Key"]) + + def export_table_as_parquet( + self, table_name: str, file_name: str, location: pathlib.Path, *args, **kwargs + ) -> bool: + session = boto3.session.Session( + **self.connect_kwargs, + ) + s3_client = session.client("s3") + workgroup = self.connection._client.get_work_group(WorkGroup=self.work_group) + wg_conf = workgroup["WorkGroup"]["Configuration"]["ResultConfiguration"] + s3_path = wg_conf["OutputLocation"] + bucket = "/".join(s3_path.split("/")[2:3]) + output_path = location / f"{file_name}" + s3_path = f"s3://{bucket}/export/{file_name}" + # Cleanup location in case there was an error of some kind + res = s3_client.list_objects_v2(Bucket=bucket, Prefix=f"export/{file_name}") + if "Contents" in res: + self._clean_bucket_path(s3_client, bucket, res) + + self.connection.cursor().execute(f"""UNLOAD + (SELECT * FROM {table_name}) + TO '{s3_path}' + WITH (format='PARQUET', compression='SNAPPY') + """) # noqa: S608 + # UNLOAD is not guaranteed to create a single file. AWS Wrangler's read_parquet + # allows us to ignore that wrinkle + try: + df = awswrangler.s3.read_parquet(s3_path, boto3_session=session) + except awswrangler.exceptions.NoFilesFound: + return False + df = df.sort_values(by=list(df.columns), ascending=False, na_position="first") + df.to_parquet(output_path) + res = s3_client.list_objects_v2(Bucket=bucket, Prefix=f"export/{file_name}") + self._clean_bucket_path(s3_client, bucket, res) + return True + def create_schema(self, schema_name) -> None: """Creates a new schema object inside the database""" glue_client = boto3.client("glue") diff --git a/cumulus_library/databases/base.py b/cumulus_library/databases/base.py index 8e0438fa..ecc3501d 100644 --- a/cumulus_library/databases/base.py +++ b/cumulus_library/databases/base.py @@ -190,9 +190,6 @@ def col_parquet_types_from_pandas(self, field_types: list) -> list: # ) return [] - def col_pyarrow_types_from_sql(self, columns: list[tuple]) -> list: - return columns # pragma: no cover - def upload_file( self, *, @@ -208,6 +205,16 @@ def upload_file( have an API for file upload (i.e. cloud databases)""" return None + @abc.abstractmethod + def export_table_as_parquet( + self, table_name: str, file_name: str, location: pathlib.Path, *args, **kwargs + ) -> pathlib.Path | None: + """Gets a parquet file from a specified table. + + This is intended as a way to get the most database native parquet export possible, + so we don't have to infer schema information. Only do schema inferring if your + DB engine does not support parquet natively. If a table is empty, return None.""" + @abc.abstractmethod def create_schema(self, schema_name): """Creates a new schema object inside the catalog""" diff --git a/cumulus_library/databases/duckdb.py b/cumulus_library/databases/duckdb.py index 84b6bd6a..f3d2d0d4 100644 --- a/cumulus_library/databases/duckdb.py +++ b/cumulus_library/databases/duckdb.py @@ -9,14 +9,13 @@ import collections import datetime +import pathlib import re import duckdb import pandas -import pyarrow import pyarrow.dataset -from cumulus_library import errors from cumulus_library.databases import base @@ -174,30 +173,6 @@ def execute_as_pandas( return iter([result.df().convert_dtypes()]), result.description return result.df().convert_dtypes(), result.description - def col_pyarrow_types_from_sql(self, columns: list[tuple]) -> list: - output = [] - for column in columns: - match column[1]: - case "STRING": - output.append((column[0], pyarrow.string())) - case "INTEGER": - output.append((column[0], pyarrow.int64())) - case "NUMBER": - output.append((column[0], pyarrow.float64())) - case "DOUBLE": - output.append((column[0], pyarrow.float64())) - case "boolean" | "bool": - output.append((column[0], pyarrow.bool_())) - case "Date": - output.append((column[0], pyarrow.date64())) - case "TIMESTAMP" | "DATETIME": - output.append((column[0], pyarrow.timestamp("s"))) - case _: - raise errors.CumulusLibraryError( - f"{column[0], column[1]} does not have a conversion type" - ) - return output - def parser(self) -> base.DatabaseParser: return DuckDbParser() @@ -207,6 +182,21 @@ def operational_errors(self) -> tuple[type[Exception], ...]: duckdb.BinderException, ) + def export_table_as_parquet( + self, table_name: str, file_name: str, location: pathlib.Path, *args, **kwargs + ) -> bool: + parquet_path = location / f"{file_name}" + table_size = self.connection.execute(f"SELECT count(*) FROM {table_name}").fetchone() # noqa: S608 + if table_size[0] == 0: + return False + query = f"""COPY + (SELECT * FROM {table_name}) + TO '{parquet_path}' + (FORMAT parquet) + """ # noqa: S608 + self.connection.execute(query) + return True + def create_schema(self, schema_name): """Creates a new schema object inside the database""" schemas = self.connection.sql( diff --git a/pyproject.toml b/pyproject.toml index e826bded..cad2ff04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,7 @@ name = "cumulus-library" requires-python = ">= 3.11" dependencies = [ + "awswrangler >= 3.11, < 4", "cumulus-fhir-support >= 1.3.1", # 1.3.1 fixes a "load all rows into memory" bug "duckdb >= 1.1.3", "Jinja2 > 3", diff --git a/tests/test_athena.py b/tests/test_athena.py index cfc51a09..d6a962ec 100644 --- a/tests/test_athena.py +++ b/tests/test_athena.py @@ -4,7 +4,9 @@ import pathlib from unittest import mock +import awswrangler import botocore +import pandas from cumulus_library import base_utils, databases, study_manifest @@ -107,3 +109,39 @@ def test_dedicated_schema_namespacing(tmp_path): query = "CREATE EXTERNAL TABLE foo.foo__bar" result = base_utils.update_query_if_schema_specified(query, manifest) assert result == "CREATE EXTERNAL TABLE foo.bar" + + +@mock.patch("botocore.client") +@mock.patch("awswrangler.s3") +def test_export_table(mock_wrangler, mock_client, tmp_path): + db = databases.AthenaDatabaseBackend( + region="test", + work_group="test", + profile="test", + schema_name="test", + ) + db.connection = mock.MagicMock() + bucket_info = { + "WorkGroup": { + "Configuration": {"ResultConfiguration": {"OutputLocation": "s3://testbucket/athena"}} + } + } + db.connection._client.get_work_group.side_effect = [bucket_info, bucket_info] + mock_clientobj = mock_client.ClientCreator.return_value.create_client.return_value + mock_clientobj.list_objects_v2.side_effect = [ + # first pass: delete found file and then cleanup + {"Contents": [{"Key": "export/file_to_delete"}]}, + {"Contents": [{"Key": "export/table.flat.parquet"}]}, + # second pass: skip deletion + {}, + ] + # file found + mock_wrangler.read_parquet.return_value = pandas.DataFrame({"A": [1, 2], "B": ["x", "y"]}) + res = db.export_table_as_parquet("table", "flat", tmp_path) + assert res is True + assert mock_clientobj.delete_object.call_args[1]["Key"] == "export/table.flat.parquet" + + # file not found + mock_wrangler.read_parquet.side_effect = awswrangler.exceptions.NoFilesFound + res = db.export_table_as_parquet("table", "flat", tmp_path) + assert res is False diff --git a/tests/test_cli.py b/tests/test_cli.py index c74b8569..021761aa 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -283,13 +283,14 @@ def test_clean(tmp_path, args, expected, raises): clear=True, ) @pytest.mark.parametrize( - "build_args,export_args,expected_tables,raises", + "build_args,export_args,expected_tables,raises,expected_missing", [ ( ["build", "-t", "core"], ["export", "-t", "core"], 73, does_not_raise(), + [], ), ( # checking that a study is loaded from a child directory @@ -304,6 +305,10 @@ def test_clean(tmp_path, args, expected, raises): ["export", "-t", "study_valid", "-s", "tests/test_data/"], 3, does_not_raise(), + [ + "study_valid__table", + "study_valid__table2", + ], ), ( # checking that a study is loaded from a child directory @@ -318,6 +323,10 @@ def test_clean(tmp_path, args, expected, raises): ["export", "-t", "study_valid", "-s", "tests/test_data/"], 3, does_not_raise(), + [ + "study_valid__table", + "study_valid__table2", + ], ), ( # checking that a study is loaded from the directory of a user-defined @@ -333,6 +342,10 @@ def test_clean(tmp_path, args, expected, raises): ["export", "-t", "study_valid", "-s", "tests/test_data/study_valid/"], 3, does_not_raise(), + [ + "study_valid__table", + "study_valid__table2", + ], ), ( [ @@ -346,6 +359,11 @@ def test_clean(tmp_path, args, expected, raises): ["export", "-t", "study_valid", "-s", "tests/test_data/study_valid/"], 3, does_not_raise(), + [ + "study_valid__table", + "study_valid__table2", + "study_valid__table", + ], ), ( [ @@ -360,6 +378,7 @@ def test_clean(tmp_path, args, expected, raises): ["export", "-t", "study_valid", "-s", "tests/test_data/study_valid/"], 2, pytest.raises(duckdb.duckdb.CatalogException), + [], ), ( [ @@ -374,6 +393,7 @@ def test_clean(tmp_path, args, expected, raises): ["export", "-t", "study_valid", "-s", "tests/test_data/study_valid/"], 2, pytest.raises(errors.StudyManifestParsingError), + [], ), ( [ @@ -392,6 +412,7 @@ def test_clean(tmp_path, args, expected, raises): ], 4, does_not_raise(), + ["study_dedicated_schema__table_raw_sql"], ), ( [ @@ -410,6 +431,12 @@ def test_clean(tmp_path, args, expected, raises): ], 5, does_not_raise(), + [ + "study_valid_all_exports__table", + "study_valid_all_exports__table2", + "study_valid_all_exports__table3", + "study_valid_all_exports__table4", + ], ), ( [ @@ -428,6 +455,7 @@ def test_clean(tmp_path, args, expected, raises): ], 2, pytest.raises(errors.StudyManifestParsingError), + [], ), ( [ @@ -446,10 +474,13 @@ def test_clean(tmp_path, args, expected, raises): ], 2, pytest.raises(errors.StudyManifestParsingError), + [], ), ], ) -def test_cli_executes_queries(tmp_path, build_args, export_args, expected_tables, raises): +def test_cli_executes_queries( + tmp_path, build_args, export_args, expected_tables, raises, expected_missing +): with raises: build_args = duckdb_args(build_args, tmp_path) cli.main(cli_args=build_args) @@ -483,7 +514,8 @@ def test_cli_executes_queries(tmp_path, build_args, export_args, expected_tables export_config = config["export_config"] for export_list in export_config.values(): for export_table in export_list: - assert any(export_table in x for x in csv_files) + if export_table not in expected_missing: + assert any(export_table in x for x in csv_files) @mock.patch.dict( diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_allergyintolerance_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_allergyintolerance_month.cube.csv index 744d269a..7459f28a 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_allergyintolerance_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_allergyintolerance_month.cube.csv @@ -1,3 +1,3 @@ -"cnt","category","recordedDate_month","code_display","reaction_manifestation_display" +cnt,category,recordedDate_month,code_display,reaction_manifestation_display 17,,,, -16,,"2018-08-01",, +16,,2018-08-01,, diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_condition_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_condition_month.cube.csv index 4e278870..0128853f 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_condition_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_condition_month.cube.csv @@ -1,3 +1,3 @@ -"cnt","category_code","recordedDate_month","code_display" +cnt,category_code,recordedDate_month,code_display 15,,, -15,"encounter-diagnosis",, +15,encounter-diagnosis,, diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_diagnosticreport_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_diagnosticreport_month.cube.csv index cd3a7949..378a89fc 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_diagnosticreport_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_diagnosticreport_month.cube.csv @@ -1,20 +1,20 @@ -"cnt","category_display","code_display","issued_month" +cnt,category_display,code_display,issued_month 13,,, -13,,"History and physical note", -13,,"Evaluation + Plan note", -13,,"CBC panel - Blood by Automated count", -13,"Laboratory",, -13,"Laboratory","CBC panel - Blood by Automated count", -13,"History and physical note",, -13,"History and physical note","History and physical note", -13,"History and physical note","Evaluation + Plan note", -13,"Evaluation + Plan note",, -13,"Evaluation + Plan note","History and physical note", -13,"Evaluation + Plan note","Evaluation + Plan note", -12,"cumulus__none",, -11,,"Generalized anxiety disorder 7 item (GAD-7)", -11,"cumulus__none","Generalized anxiety disorder 7 item (GAD-7)", -10,,"Patient Health Questionnaire 2 item (PHQ-2) [Reported]", -10,,"Alcohol Use Disorder Identification Test - Consumption [AUDIT-C]", -10,"cumulus__none","Patient Health Questionnaire 2 item (PHQ-2) [Reported]", -10,"cumulus__none","Alcohol Use Disorder Identification Test - Consumption [AUDIT-C]", +13,,History and physical note, +13,,Evaluation + Plan note, +13,,CBC panel - Blood by Automated count, +13,Laboratory,, +13,Laboratory,CBC panel - Blood by Automated count, +13,History and physical note,, +13,History and physical note,History and physical note, +13,History and physical note,Evaluation + Plan note, +13,Evaluation + Plan note,, +13,Evaluation + Plan note,History and physical note, +13,Evaluation + Plan note,Evaluation + Plan note, +12,cumulus__none,, +11,,Generalized anxiety disorder 7 item (GAD-7), +11,cumulus__none,Generalized anxiety disorder 7 item (GAD-7), +10,,Patient Health Questionnaire 2 item (PHQ-2) [Reported], +10,,Alcohol Use Disorder Identification Test - Consumption [AUDIT-C], +10,cumulus__none,Patient Health Questionnaire 2 item (PHQ-2) [Reported], +10,cumulus__none,Alcohol Use Disorder Identification Test - Consumption [AUDIT-C], diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_documentreference_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_documentreference_month.cube.csv index 2257390e..42a04ba1 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_documentreference_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_documentreference_month.cube.csv @@ -1,19 +1,19 @@ -"cnt","type_display","author_month","class_display" +cnt,type_display,author_month,class_display 50,,, -50,"Evaluation + Plan note",, -50,"Emergency department note",, -46,,,"ambulatory" -46,"Evaluation + Plan note",,"ambulatory" -46,"Emergency department note",,"ambulatory" -26,,"2018-07-01", -26,"Evaluation + Plan note","2018-07-01", -26,"Emergency department note","2018-07-01", -24,,"2018-07-01","ambulatory" -24,,"2018-06-01", -24,"Evaluation + Plan note","2018-07-01","ambulatory" -24,"Evaluation + Plan note","2018-06-01", -24,"Emergency department note","2018-07-01","ambulatory" -24,"Emergency department note","2018-06-01", -22,,"2018-06-01","ambulatory" -22,"Evaluation + Plan note","2018-06-01","ambulatory" -22,"Emergency department note","2018-06-01","ambulatory" +50,Evaluation + Plan note,, +50,Emergency department note,, +46,,,ambulatory +46,Evaluation + Plan note,,ambulatory +46,Emergency department note,,ambulatory +26,,2018-07-01, +26,Evaluation + Plan note,2018-07-01, +26,Emergency department note,2018-07-01, +24,,2018-07-01,ambulatory +24,,2018-06-01, +24,Evaluation + Plan note,2018-07-01,ambulatory +24,Evaluation + Plan note,2018-06-01, +24,Emergency department note,2018-07-01,ambulatory +24,Emergency department note,2018-06-01, +22,,2018-06-01,ambulatory +22,Evaluation + Plan note,2018-06-01,ambulatory +22,Emergency department note,2018-06-01,ambulatory diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_all_types.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_all_types.cube.csv index 2ff56f2c..7928477e 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_all_types.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_all_types.cube.csv @@ -1,9 +1,9 @@ -"cnt","class_display","type_display","serviceType_display","priority_display" +cnt,class_display,type_display,serviceType_display,priority_display 50,,,, -50,,,,"cumulus__none" -50,,,"cumulus__none", -50,,,"cumulus__none","cumulus__none" -46,"ambulatory",,, -46,"ambulatory",,,"cumulus__none" -46,"ambulatory",,"cumulus__none", -46,"ambulatory",,"cumulus__none","cumulus__none" +50,,,,cumulus__none +50,,,cumulus__none, +50,,,cumulus__none,cumulus__none +46,ambulatory,,, +46,ambulatory,,,cumulus__none +46,ambulatory,,cumulus__none, +46,ambulatory,,cumulus__none,cumulus__none diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_all_types_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_all_types_month.cube.csv index 324ac11c..749c2aaa 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_all_types_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_all_types_month.cube.csv @@ -1,25 +1,25 @@ -"cnt","class_display","type_display","serviceType_display","priority_display","period_start_month" +cnt,class_display,type_display,serviceType_display,priority_display,period_start_month 50,,,,, -50,,,,"cumulus__none", -50,,,"cumulus__none",, -50,,,"cumulus__none","cumulus__none", -46,"ambulatory",,,, -46,"ambulatory",,,"cumulus__none", -46,"ambulatory",,"cumulus__none",, -46,"ambulatory",,"cumulus__none","cumulus__none", -26,,,,,"2018-07-01" -26,,,,"cumulus__none","2018-07-01" -26,,,"cumulus__none",,"2018-07-01" -26,,,"cumulus__none","cumulus__none","2018-07-01" -24,,,,,"2018-06-01" -24,,,,"cumulus__none","2018-06-01" -24,,,"cumulus__none",,"2018-06-01" -24,,,"cumulus__none","cumulus__none","2018-06-01" -24,"ambulatory",,,,"2018-07-01" -24,"ambulatory",,,"cumulus__none","2018-07-01" -24,"ambulatory",,"cumulus__none",,"2018-07-01" -24,"ambulatory",,"cumulus__none","cumulus__none","2018-07-01" -22,"ambulatory",,,,"2018-06-01" -22,"ambulatory",,,"cumulus__none","2018-06-01" -22,"ambulatory",,"cumulus__none",,"2018-06-01" -22,"ambulatory",,"cumulus__none","cumulus__none","2018-06-01" +50,,,,cumulus__none, +50,,,cumulus__none,, +50,,,cumulus__none,cumulus__none, +46,ambulatory,,,, +46,ambulatory,,,cumulus__none, +46,ambulatory,,cumulus__none,, +46,ambulatory,,cumulus__none,cumulus__none, +26,,,,,2018-07-01 +26,,,,cumulus__none,2018-07-01 +26,,,cumulus__none,,2018-07-01 +26,,,cumulus__none,cumulus__none,2018-07-01 +24,,,,,2018-06-01 +24,,,,cumulus__none,2018-06-01 +24,,,cumulus__none,,2018-06-01 +24,,,cumulus__none,cumulus__none,2018-06-01 +24,ambulatory,,,,2018-07-01 +24,ambulatory,,,cumulus__none,2018-07-01 +24,ambulatory,,cumulus__none,,2018-07-01 +24,ambulatory,,cumulus__none,cumulus__none,2018-07-01 +22,ambulatory,,,,2018-06-01 +22,ambulatory,,,cumulus__none,2018-06-01 +22,ambulatory,,cumulus__none,,2018-06-01 +22,ambulatory,,cumulus__none,cumulus__none,2018-06-01 diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_month.cube.csv index 5c8242e6..267d1133 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_month.cube.csv @@ -1,65 +1,65 @@ -"cnt","period_start_month","class_display","age_at_visit","gender","race_display","ethnicity_display" +cnt,period_start_month,class_display,age_at_visit,gender,race_display,ethnicity_display 50,,,,,, -47,,,,,"white", -46,,"ambulatory",,,, -45,,,,,,"not hispanic or latino" -43,,,,,"white","not hispanic or latino" -43,,"ambulatory",,,"white", -42,,"ambulatory",,,,"not hispanic or latino" -40,,"ambulatory",,,"white","not hispanic or latino" -29,,,,"female",, -28,,,,"female","white", -27,,,,"female",,"not hispanic or latino" -27,,"ambulatory",,"female",, -26,,,,"female","white","not hispanic or latino" -26,,"ambulatory",,"female","white", -26,"2018-07-01",,,,, -26,"2018-07-01",,,,"white", -25,,"ambulatory",,"female",,"not hispanic or latino" -24,,"ambulatory",,"female","white","not hispanic or latino" -24,"2018-07-01",,,,,"not hispanic or latino" -24,"2018-07-01",,,,"white","not hispanic or latino" -24,"2018-07-01","ambulatory",,,, -24,"2018-07-01","ambulatory",,,"white", -24,"2018-06-01",,,,, -23,"2018-07-01","ambulatory",,,,"not hispanic or latino" -23,"2018-07-01","ambulatory",,,"white","not hispanic or latino" -22,"2018-06-01","ambulatory",,,, -21,,,,"male",, -21,"2018-06-01",,,,,"not hispanic or latino" -21,"2018-06-01",,,,"white", -19,,,,"male","white", -19,,"ambulatory",,"male",, -19,"2018-06-01",,,,"white","not hispanic or latino" -19,"2018-06-01","ambulatory",,,,"not hispanic or latino" -19,"2018-06-01","ambulatory",,,"white", -18,,,,"male",,"not hispanic or latino" -17,,,,"male","white","not hispanic or latino" -17,,"ambulatory",,"male",,"not hispanic or latino" -17,,"ambulatory",,"male","white", -17,"2018-06-01","ambulatory",,,"white","not hispanic or latino" -16,,"ambulatory",,"male","white","not hispanic or latino" -15,"2018-06-01",,,"female",, -14,"2018-07-01",,,"female",, -14,"2018-07-01",,,"female",,"not hispanic or latino" -14,"2018-07-01",,,"female","white", -14,"2018-07-01",,,"female","white","not hispanic or latino" -14,"2018-06-01",,,"female","white", -14,"2018-06-01","ambulatory",,"female",, -13,"2018-07-01","ambulatory",,"female",, -13,"2018-07-01","ambulatory",,"female",,"not hispanic or latino" -13,"2018-07-01","ambulatory",,"female","white", -13,"2018-07-01","ambulatory",,"female","white","not hispanic or latino" -13,"2018-06-01",,,"female",,"not hispanic or latino" -13,"2018-06-01","ambulatory",,"female","white", -12,"2018-07-01",,,"male",, -12,"2018-07-01",,,"male","white", -12,"2018-06-01",,,"female","white","not hispanic or latino" -12,"2018-06-01","ambulatory",,"female",,"not hispanic or latino" -11,"2018-07-01","ambulatory",,"male",, -11,"2018-07-01","ambulatory",,"male","white", -11,"2018-06-01","ambulatory",,"female","white","not hispanic or latino" -10,"2018-07-01",,,"male",,"not hispanic or latino" -10,"2018-07-01",,,"male","white","not hispanic or latino" -10,"2018-07-01","ambulatory",,"male",,"not hispanic or latino" -10,"2018-07-01","ambulatory",,"male","white","not hispanic or latino" +47,,,,,white, +46,,ambulatory,,,, +45,,,,,,not hispanic or latino +43,,,,,white,not hispanic or latino +43,,ambulatory,,,white, +42,,ambulatory,,,,not hispanic or latino +40,,ambulatory,,,white,not hispanic or latino +29,,,,female,, +28,,,,female,white, +27,,,,female,,not hispanic or latino +27,,ambulatory,,female,, +26,,,,female,white,not hispanic or latino +26,,ambulatory,,female,white, +26,2018-07-01,,,,, +26,2018-07-01,,,,white, +25,,ambulatory,,female,,not hispanic or latino +24,,ambulatory,,female,white,not hispanic or latino +24,2018-07-01,,,,,not hispanic or latino +24,2018-07-01,,,,white,not hispanic or latino +24,2018-07-01,ambulatory,,,, +24,2018-07-01,ambulatory,,,white, +24,2018-06-01,,,,, +23,2018-07-01,ambulatory,,,,not hispanic or latino +23,2018-07-01,ambulatory,,,white,not hispanic or latino +22,2018-06-01,ambulatory,,,, +21,,,,male,, +21,2018-06-01,,,,,not hispanic or latino +21,2018-06-01,,,,white, +19,,,,male,white, +19,,ambulatory,,male,, +19,2018-06-01,,,,white,not hispanic or latino +19,2018-06-01,ambulatory,,,,not hispanic or latino +19,2018-06-01,ambulatory,,,white, +18,,,,male,,not hispanic or latino +17,,,,male,white,not hispanic or latino +17,,ambulatory,,male,,not hispanic or latino +17,,ambulatory,,male,white, +17,2018-06-01,ambulatory,,,white,not hispanic or latino +16,,ambulatory,,male,white,not hispanic or latino +15,2018-06-01,,,female,, +14,2018-07-01,,,female,, +14,2018-07-01,,,female,,not hispanic or latino +14,2018-07-01,,,female,white, +14,2018-07-01,,,female,white,not hispanic or latino +14,2018-06-01,,,female,white, +14,2018-06-01,ambulatory,,female,, +13,2018-07-01,ambulatory,,female,, +13,2018-07-01,ambulatory,,female,,not hispanic or latino +13,2018-07-01,ambulatory,,female,white, +13,2018-07-01,ambulatory,,female,white,not hispanic or latino +13,2018-06-01,,,female,,not hispanic or latino +13,2018-06-01,ambulatory,,female,white, +12,2018-07-01,,,male,, +12,2018-07-01,,,male,white, +12,2018-06-01,,,female,white,not hispanic or latino +12,2018-06-01,ambulatory,,female,,not hispanic or latino +11,2018-07-01,ambulatory,,male,, +11,2018-07-01,ambulatory,,male,white, +11,2018-06-01,ambulatory,,female,white,not hispanic or latino +10,2018-07-01,,,male,,not hispanic or latino +10,2018-07-01,,,male,white,not hispanic or latino +10,2018-07-01,ambulatory,,male,,not hispanic or latino +10,2018-07-01,ambulatory,,male,white,not hispanic or latino diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_priority_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_priority_month.cube.csv index 2b16c7e2..bc2cc2d6 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_priority_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_priority_month.cube.csv @@ -1,13 +1,13 @@ -"cnt","class_display","priority_display","period_start_month" +cnt,class_display,priority_display,period_start_month 50,,, -50,,"cumulus__none", -46,"ambulatory",, -46,"ambulatory","cumulus__none", -26,,,"2018-07-01" -26,,"cumulus__none","2018-07-01" -24,,,"2018-06-01" -24,,"cumulus__none","2018-06-01" -24,"ambulatory",,"2018-07-01" -24,"ambulatory","cumulus__none","2018-07-01" -22,"ambulatory",,"2018-06-01" -22,"ambulatory","cumulus__none","2018-06-01" +50,,cumulus__none, +46,ambulatory,, +46,ambulatory,cumulus__none, +26,,,2018-07-01 +26,,cumulus__none,2018-07-01 +24,,,2018-06-01 +24,,cumulus__none,2018-06-01 +24,ambulatory,,2018-07-01 +24,ambulatory,cumulus__none,2018-07-01 +22,ambulatory,,2018-06-01 +22,ambulatory,cumulus__none,2018-06-01 diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_service_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_service_month.cube.csv index 2dc35115..1ef0dae9 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_service_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_service_month.cube.csv @@ -1,13 +1,13 @@ -"cnt","class_display","serviceType_display","period_start_month" +cnt,class_display,serviceType_display,period_start_month 50,,, -50,,"cumulus__none", -46,"ambulatory",, -46,"ambulatory","cumulus__none", -26,,,"2018-07-01" -26,,"cumulus__none","2018-07-01" -24,,,"2018-06-01" -24,,"cumulus__none","2018-06-01" -24,"ambulatory",,"2018-07-01" -24,"ambulatory","cumulus__none","2018-07-01" -22,"ambulatory",,"2018-06-01" -22,"ambulatory","cumulus__none","2018-06-01" +50,,cumulus__none, +46,ambulatory,, +46,ambulatory,cumulus__none, +26,,,2018-07-01 +26,,cumulus__none,2018-07-01 +24,,,2018-06-01 +24,,cumulus__none,2018-06-01 +24,ambulatory,,2018-07-01 +24,ambulatory,cumulus__none,2018-07-01 +22,ambulatory,,2018-06-01 +22,ambulatory,cumulus__none,2018-06-01 diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_type_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_type_month.cube.csv index 02621fa4..bb5afe09 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_type_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_encounter_type_month.cube.csv @@ -1,7 +1,7 @@ -"cnt","class_display","type_display","period_start_month" +cnt,class_display,type_display,period_start_month 50,,, -46,"ambulatory",, -26,,,"2018-07-01" -24,,,"2018-06-01" -24,"ambulatory",,"2018-07-01" -22,"ambulatory",,"2018-06-01" +46,ambulatory,, +26,,,2018-07-01 +24,,,2018-06-01 +24,ambulatory,,2018-07-01 +22,ambulatory,,2018-06-01 diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_medicationrequest_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_medicationrequest_month.cube.csv index b0299974..0c01d794 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_medicationrequest_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_medicationrequest_month.cube.csv @@ -1,13 +1,13 @@ -"cnt","status","intent","authoredon_month","medication_display" +cnt,status,intent,authoredon_month,medication_display 27,,,, -27,,"order",, -26,"stopped",,, -26,"stopped","order",, -15,,,"2018-07-01", -15,,"order","2018-07-01", -15,"stopped",,"2018-07-01", -15,"stopped","order","2018-07-01", -12,,,"2018-06-01", -12,,"order","2018-06-01", -11,"stopped",,"2018-06-01", -11,"stopped","order","2018-06-01", +27,,order,, +26,stopped,,, +26,stopped,order,, +15,,,2018-07-01, +15,,order,2018-07-01, +15,stopped,,2018-07-01, +15,stopped,order,2018-07-01, +12,,,2018-06-01, +12,,order,2018-06-01, +11,stopped,,2018-06-01, +11,stopped,order,2018-06-01, diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_observation_lab_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_observation_lab_month.cube.csv index 5555c1ec..51b9e49e 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_observation_lab_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_observation_lab_month.cube.csv @@ -1,15 +1,15 @@ -"cnt","effectiveDateTime_month","observation_code","valueCodeableConcept_display","class_display" +cnt,effectiveDateTime_month,observation_code,valueCodeableConcept_display,class_display 20,,,, -20,,,,"ambulatory" -10,,,"Urine smell ammoniacal (finding)", -10,,,"Urine smell ammoniacal (finding)","ambulatory" -10,,,"Brown color (qualifier value)", -10,,,"Brown color (qualifier value)","ambulatory" -10,,"5778-6",, -10,,"5778-6",,"ambulatory" -10,,"5778-6","Brown color (qualifier value)", -10,,"5778-6","Brown color (qualifier value)","ambulatory" -10,,"34533-0",, -10,,"34533-0",,"ambulatory" -10,,"34533-0","Urine smell ammoniacal (finding)", -10,,"34533-0","Urine smell ammoniacal (finding)","ambulatory" +20,,,,ambulatory +10,,,Urine smell ammoniacal (finding), +10,,,Urine smell ammoniacal (finding),ambulatory +10,,,Brown color (qualifier value), +10,,,Brown color (qualifier value),ambulatory +10,,5778-6,, +10,,5778-6,,ambulatory +10,,5778-6,Brown color (qualifier value), +10,,5778-6,Brown color (qualifier value),ambulatory +10,,34533-0,, +10,,34533-0,,ambulatory +10,,34533-0,Urine smell ammoniacal (finding), +10,,34533-0,Urine smell ammoniacal (finding),ambulatory diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_patient.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_patient.cube.csv index c19cc543..d9a72050 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_patient.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_patient.cube.csv @@ -1,13 +1,13 @@ -"cnt","gender","race_display","ethnicity_display" +cnt,gender,race_display,ethnicity_display 50,,, -47,,"white", -45,,,"not hispanic or latino" -43,,"white","not hispanic or latino" -29,"female",, -28,"female","white", -27,"female",,"not hispanic or latino" -26,"female","white","not hispanic or latino" -21,"male",, -19,"male","white", -18,"male",,"not hispanic or latino" -17,"male","white","not hispanic or latino" +47,,white, +45,,,not hispanic or latino +43,,white,not hispanic or latino +29,female,, +28,female,white, +27,female,,not hispanic or latino +26,female,white,not hispanic or latino +21,male,, +19,male,white, +18,male,,not hispanic or latino +17,male,white,not hispanic or latino diff --git a/tests/test_data/duckdb_data/expected_export/core/core__count_procedure_month.cube.csv b/tests/test_data/duckdb_data/expected_export/core/core__count_procedure_month.cube.csv index 449a6ddf..699959e2 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__count_procedure_month.cube.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__count_procedure_month.cube.csv @@ -1,33 +1,33 @@ -"cnt","category_display","code_display","performedDateTime_month" +cnt,category_display,code_display,performedDateTime_month 13,,, -13,,,"cumulus__none" -13,,"Medication Reconciliation (procedure)", -13,,"Medication Reconciliation (procedure)","cumulus__none" -13,"cumulus__none",, -13,"cumulus__none",,"cumulus__none" -13,"cumulus__none","Medication Reconciliation (procedure)", -13,"cumulus__none","Medication Reconciliation (procedure)","cumulus__none" -11,,"Depression screening (procedure)", -11,,"Depression screening (procedure)","cumulus__none" -11,,"Assessment of substance use (procedure)", -11,,"Assessment of substance use (procedure)","cumulus__none" -11,,"Assessment of anxiety (procedure)", -11,,"Assessment of anxiety (procedure)","cumulus__none" -11,"cumulus__none","Depression screening (procedure)", -11,"cumulus__none","Depression screening (procedure)","cumulus__none" -11,"cumulus__none","Assessment of substance use (procedure)", -11,"cumulus__none","Assessment of substance use (procedure)","cumulus__none" -11,"cumulus__none","Assessment of anxiety (procedure)", -11,"cumulus__none","Assessment of anxiety (procedure)","cumulus__none" -10,,"Depression screening using Patient Health Questionnaire Two-Item score (procedure)", -10,,"Depression screening using Patient Health Questionnaire Two-Item score (procedure)","cumulus__none" -10,,"Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure)", -10,,"Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure)","cumulus__none" -10,,"Assessment of health and social care needs (procedure)", -10,,"Assessment of health and social care needs (procedure)","cumulus__none" -10,"cumulus__none","Depression screening using Patient Health Questionnaire Two-Item score (procedure)", -10,"cumulus__none","Depression screening using Patient Health Questionnaire Two-Item score (procedure)","cumulus__none" -10,"cumulus__none","Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure)", -10,"cumulus__none","Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure)","cumulus__none" -10,"cumulus__none","Assessment of health and social care needs (procedure)", -10,"cumulus__none","Assessment of health and social care needs (procedure)","cumulus__none" +13,,,cumulus__none +13,,Medication Reconciliation (procedure), +13,,Medication Reconciliation (procedure),cumulus__none +13,cumulus__none,, +13,cumulus__none,,cumulus__none +13,cumulus__none,Medication Reconciliation (procedure), +13,cumulus__none,Medication Reconciliation (procedure),cumulus__none +11,,Depression screening (procedure), +11,,Depression screening (procedure),cumulus__none +11,,Assessment of substance use (procedure), +11,,Assessment of substance use (procedure),cumulus__none +11,,Assessment of anxiety (procedure), +11,,Assessment of anxiety (procedure),cumulus__none +11,cumulus__none,Depression screening (procedure), +11,cumulus__none,Depression screening (procedure),cumulus__none +11,cumulus__none,Assessment of substance use (procedure), +11,cumulus__none,Assessment of substance use (procedure),cumulus__none +11,cumulus__none,Assessment of anxiety (procedure), +11,cumulus__none,Assessment of anxiety (procedure),cumulus__none +10,,Depression screening using Patient Health Questionnaire Two-Item score (procedure), +10,,Depression screening using Patient Health Questionnaire Two-Item score (procedure),cumulus__none +10,,Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure), +10,,Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure),cumulus__none +10,,Assessment of health and social care needs (procedure), +10,,Assessment of health and social care needs (procedure),cumulus__none +10,cumulus__none,Depression screening using Patient Health Questionnaire Two-Item score (procedure), +10,cumulus__none,Depression screening using Patient Health Questionnaire Two-Item score (procedure),cumulus__none +10,cumulus__none,Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure), +10,cumulus__none,Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure),cumulus__none +10,cumulus__none,Assessment of health and social care needs (procedure), +10,cumulus__none,Assessment of health and social care needs (procedure),cumulus__none diff --git a/tests/test_data/duckdb_data/expected_export/core/core__meta_date.meta.csv b/tests/test_data/duckdb_data/expected_export/core/core__meta_date.meta.csv index 2e2c4f61..6d2ad6b7 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__meta_date.meta.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__meta_date.meta.csv @@ -1,2 +1,2 @@ -"min_date","max_date" +min_date,max_date 2018-06-01,2018-07-31 diff --git a/tests/test_data/duckdb_data/expected_export/core/core__meta_version.meta.csv b/tests/test_data/duckdb_data/expected_export/core/core__meta_version.meta.csv index cbd90d88..4d65fc4e 100644 --- a/tests/test_data/duckdb_data/expected_export/core/core__meta_version.meta.csv +++ b/tests/test_data/duckdb_data/expected_export/core/core__meta_version.meta.csv @@ -1,2 +1,2 @@ -"data_package_version" +data_package_version 3 diff --git a/tests/test_data/parser_mock_data.py b/tests/test_data/parser_mock_data.py index cd3b9b12..8174234e 100644 --- a/tests/test_data/parser_mock_data.py +++ b/tests/test_data/parser_mock_data.py @@ -34,5 +34,10 @@ def get_mock_toml(key: str): "sql_config": {"file_names": ["test1.sql", "test2.sql"]}, "export_config": {"export_list": ["wrong__table1", "wrong__table2"]}, }, + "invalid_bad_table_names": { + "study_prefix": "valid", + "sql_config": {"file_names": ["test1.sql", "Robert'); DROP TABLE Students;--"]}, + "export_config": {"export_list": ["wrong__table1", "wrong__table2"]}, + }, "invalid_none": "", } diff --git a/tests/test_databases.py b/tests/test_databases.py index dc4c13b3..7c71e630 100644 --- a/tests/test_databases.py +++ b/tests/test_databases.py @@ -10,7 +10,6 @@ import duckdb import pandas -import pyarrow import pyathena import pytest @@ -73,147 +72,6 @@ def test_col_types_from_pandas(db, data, expected, raises): assert set(expected) == set(vals) -@pytest.mark.parametrize( - "db,data,expected,raises", - [ - ( - databases.AthenaDatabaseBackend(**ATHENA_KWARGS), - [ - ( - "a", - "varchar", - ), - ( - "b", - "bigint", - ), - ( - "c", - "integer", - ), - ( - "d", - "double", - ), - ( - "e", - "boolean", - ), - ( - "f", - "date", - ), - ("g", "timestamp"), - ], - [ - ( - "a", - pyarrow.string(), - ), - ( - "b", - pyarrow.int64(), - ), - ( - "c", - pyarrow.int64(), - ), - ( - "d", - pyarrow.float64(), - ), - ( - "e", - pyarrow.bool_(), - ), - ( - "f", - pyarrow.date64(), - ), - ("g", pyarrow.timestamp("s")), - ], - does_not_raise(), - ), - ( - databases.AthenaDatabaseBackend(**ATHENA_KWARGS), - [("a", "other_type")], - [], - pytest.raises(errors.CumulusLibraryError), - ), - ( - databases.DuckDatabaseBackend(**DUCKDB_KWARGS), - [ - ( - "a", - "STRING", - ), - ( - "b", - "INTEGER", - ), - ( - "c", - "NUMBER", - ), - ( - "d", - "DOUBLE", - ), - ( - "e", - "boolean", - ), - ( - "f", - "Date", - ), - ("g", "TIMESTAMP"), - ], - [ - ( - "a", - pyarrow.string(), - ), - ( - "b", - pyarrow.int64(), - ), - ( - "c", - pyarrow.float64(), - ), - ( - "d", - pyarrow.float64(), - ), - ( - "e", - pyarrow.bool_(), - ), - ( - "f", - pyarrow.date64(), - ), - ("g", pyarrow.timestamp("s")), - ], - does_not_raise(), - ), - ( - databases.DuckDatabaseBackend(**DUCKDB_KWARGS), - [("a", "other_type")], - [], - pytest.raises(errors.CumulusLibraryError), - ), - ], -) -def test_pyarrow_types_from_sql(db, data, expected, raises): - with raises: - vals = db.col_pyarrow_types_from_sql(data) - assert len(expected) == len(vals) - for index in range(0, len(vals)): - assert vals[index][-1] == expected[index][-1] - - @pytest.mark.parametrize( "args,expected_type, raises", [ diff --git a/tests/test_dynamic_manifest.py b/tests/test_dynamic_manifest.py index 6b76a512..f1f07832 100644 --- a/tests/test_dynamic_manifest.py +++ b/tests/test_dynamic_manifest.py @@ -73,8 +73,6 @@ def test_cli_export_with_dynamic_prefix(tmp_path): cli.main(cli_args=duckdb_args(["export", *STUDY_ARGS, "--option=prefix:abc"], tmp_path)) assert set(os.listdir(f"{tmp_path}/export")) == {"abc"} assert set(os.listdir(f"{tmp_path}/export/abc")) == { - "abc__counts.cube.csv", - "abc__counts.cube.parquet", "abc__meta_version.cube.csv", "abc__meta_version.cube.parquet", } diff --git a/tests/test_study_parser.py b/tests/test_study_parser.py index 2c0556c9..919b46af 100644 --- a/tests/test_study_parser.py +++ b/tests/test_study_parser.py @@ -55,6 +55,7 @@ def test_load_manifest(manifest_path, expected, raises): ("valid_null_arrays", does_not_raise()), ("valid_only_prefix", does_not_raise()), ("invalid_bad_export_names", pytest.raises(errors.StudyManifestParsingError)), + ("invalid_bad_table_names", pytest.raises(errors.StudyManifestParsingError)), ("invalid_none", pytest.raises(TypeError)), ], )