From ab10e6bc58471ec3ee8870377dc2d2a0f2527406 Mon Sep 17 00:00:00 2001 From: terratrue-daniel <97548386+terratrue-daniel@users.noreply.github.com> Date: Wed, 29 Nov 2023 00:02:26 -0800 Subject: [PATCH] feat(ingest/mssql): enable TLS encryption for SQLServer using pytds (#9256) --- metadata-ingestion/docs/sources/mssql/mssql_recipe.yml | 8 ++++++++ metadata-ingestion/setup.py | 2 +- .../src/datahub/ingestion/source/sql/mssql/source.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/docs/sources/mssql/mssql_recipe.yml b/metadata-ingestion/docs/sources/mssql/mssql_recipe.yml index 5f1e24ce1e956..93be7a86d72cc 100644 --- a/metadata-ingestion/docs/sources/mssql/mssql_recipe.yml +++ b/metadata-ingestion/docs/sources/mssql/mssql_recipe.yml @@ -9,6 +9,14 @@ source: username: user password: pass + # Options + # Uncomment if you need to use encryption with pytds + # See https://python-tds.readthedocs.io/en/latest/pytds.html#pytds.connect + # options: + # connect_args: + # cafile: server-ca.pem + # validate_host: true + sink: # sink configs diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 2b002164a49b9..8d9892d8e11b1 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -350,7 +350,7 @@ "mlflow": {"mlflow-skinny>=2.3.0"}, "mode": {"requests", "tenacity>=8.0.1"} | sqllineage_lib, "mongodb": {"pymongo[srv]>=3.11", "packaging"}, - "mssql": sql_common | {"sqlalchemy-pytds>=0.3"}, + "mssql": sql_common | {"sqlalchemy-pytds>=0.3", "pyOpenSSL"}, "mssql-odbc": sql_common | {"pyodbc"}, "mysql": mysql, # mariadb should have same dependency as mysql diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 6eea5a4c31fa6..2442df595d967 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -155,7 +155,7 @@ class SQLServerSource(SQLAlchemySource): - Metadata for databases, schemas, views and tables - Column types associated with each table/view - Table, row, and column statistics via optional SQL profiling - We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install, but only PyODBC supports encrypted connections. + We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install. """ def __init__(self, config: SQLServerConfig, ctx: PipelineContext):