From b76af35586285461189eaf9ea141db8e9c1ba5d9 Mon Sep 17 00:00:00 2001 From: Matin Nuhamunada Date: Wed, 31 Jan 2024 08:21:59 +0000 Subject: [PATCH] feat: add exporter script --- .pre-commit-config.yaml | 8 ++++---- README.md | 16 +++++++++++++++ scripts/export_duckdb.py | 43 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 scripts/export_duckdb.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 82e0c2b..463727c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/Lucas-C/pre-commit-hooks - rev: v1.1.13 + rev: v1.5.4 hooks: - id: forbid-crlf - id: remove-crlf @@ -9,7 +9,7 @@ repos: - id: remove-tabs exclude_types: [csv] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -21,7 +21,7 @@ repos: hooks: - id: isort - repo: https://github.com/ambv/black - rev: 22.3.0 + rev: 24.1.1 hooks: - id: black - language_version: python3.8 + language_version: python3.11 diff --git a/README.md b/README.md index b73c71b..f56fc38 100644 --- a/README.md +++ b/README.md @@ -48,5 +48,21 @@ dbt docs generate dbt docs serve ``` +### Exporting to newer version of DuckDB +Right now newer version of DuckDB is not backward compatible. To migrate the data to newer version, use the script [`export_duckdb.py`](scripts/export_duckdb.py): +```bash +$ python scripts/export_duckdb.py -h +usage: export_duckdb.py [-h] [--database_filename DATABASE_FILENAME] [--export_directory EXPORT_DIRECTORY] + +Export a DuckDB database. + +options: + -h, --help show this help message and exit + --database_filename DATABASE_FILENAME + The filename of the DuckDB database to export. + --export_directory EXPORT_DIRECTORY + The directory to save the exported database. +``` + # Credits This dbt template was inspired adapted from [jaffle_shop_duckdb](https://github.com/dbt-labs/jaffle_shop_duckdb) example. diff --git a/scripts/export_duckdb.py b/scripts/export_duckdb.py new file mode 100644 index 0000000..ebebc2b --- /dev/null +++ b/scripts/export_duckdb.py @@ -0,0 +1,43 @@ +import argparse +import logging + +import duckdb + +# Set up logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + + +def export_database(database_filename, export_directory): + """ + Export a DuckDB database to a specified directory. + + Args: + database_filename (str): The filename of the DuckDB database to export. + export_directory (str): The directory to save the exported database. + """ + logging.info(f"Connecting to database: {database_filename}") + conn = duckdb.connect(database_filename) + + logging.info(f"Exporting database to directory: {export_directory}") + conn.execute(f"EXPORT DATABASE '{export_directory}' (FORMAT PARQUET)") + + logging.info("Database export completed successfully") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Export a DuckDB database.") + parser.add_argument( + "--database_filename", + help="The filename of the DuckDB database to export.", + default="dbt_bgcflow.duckdb", + ) + parser.add_argument( + "--export_directory", + help="The directory to save the exported database.", + default="./exported_database", + ) + args = parser.parse_args() + + export_database(args.database_filename, args.export_directory)