From 7ba1eb5565a766b71463da5797bc4888dffeb297 Mon Sep 17 00:00:00 2001 From: Daniel Fitzgibbon Alves Pereira Date: Mon, 14 Oct 2024 18:53:04 +0100 Subject: [PATCH] Adding Databricks SQL Connector --- apps/api/jupyter-requirements.txt | 3 +- apps/api/package.json | 1 + apps/api/src/auth/token.ts | 6 + apps/api/src/datasources/databrickssql.ts | 43 ++ apps/api/src/datasources/index.ts | 6 + apps/api/src/datasources/structure.ts | 30 + apps/api/src/python/query/databrickssql.ts | 64 ++ apps/api/src/python/query/index.ts | 13 + apps/api/src/python/query/sqlalchemy.ts | 3 +- apps/api/src/python/writeback/index.ts | 2 + .../workspace/data-sources/data-source.ts | 42 ++ .../workspace/data-sources/index.ts | 31 + apps/web/public/icons/databrickssql.png | Bin 0 -> 3935 bytes apps/web/src/components/DataSourceIcons.tsx | 5 + apps/web/src/components/DataSourcesList.tsx | 8 + .../src/components/forms/databrickssql.tsx | 245 ++++++++ .../src/components/v2Editor/CodeEditor/sql.ts | 1 + apps/web/src/hooks/useDatasource.ts | 5 +- .../data-sources/edit/[dataSourceId].tsx | 10 + .../data-sources/new/databrickssql.tsx | 60 ++ .../[workspaceId]/data-sources/new/index.tsx | 5 + .../migration.sql | 24 + .../migration.sql | 20 + packages/database/prisma/schema.prisma | 146 +++-- .../database/src/datasources/databrickssql.ts | 118 ++++ packages/database/src/datasources/index.ts | 28 + yarn.lock | 556 ++++++++++++++++-- 27 files changed, 1374 insertions(+), 101 deletions(-) create mode 100644 apps/api/src/datasources/databrickssql.ts create mode 100644 apps/api/src/python/query/databrickssql.ts create mode 100644 apps/web/public/icons/databrickssql.png create mode 100644 apps/web/src/components/forms/databrickssql.tsx create mode 100644 apps/web/src/pages/workspaces/[workspaceId]/data-sources/new/databrickssql.tsx create mode 100644 packages/database/prisma/migrations/20241009130735_introduce_databrickssql_data_source/migration.sql create mode 100644 packages/database/prisma/migrations/20241101141416_upgrade_databrickssql_to_use_separate_schema_table/migration.sql create mode 100644 packages/database/src/datasources/databrickssql.ts diff --git a/apps/api/jupyter-requirements.txt b/apps/api/jupyter-requirements.txt index cf5f84ff..54dccc0e 100644 --- a/apps/api/jupyter-requirements.txt +++ b/apps/api/jupyter-requirements.txt @@ -56,4 +56,5 @@ openpyxl==3.1.2 mysqlclient==2.2.4 pymongo==4.8.0 snowflake-connector-python==3.12.2 -snowflake-sqlalchemy==1.6.1 \ No newline at end of file +snowflake-sqlalchemy==1.6.1 +databricks-sql-connector[sqlalchemy]==3.4.0 diff --git a/apps/api/package.json b/apps/api/package.json index 8ef056d5..721928aa 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -13,6 +13,7 @@ "@briefer/database": "*", "@briefer/editor": "*", "@briefer/types": "*", + "@databricks/sql": "^1.8.4", "@grpc/grpc-js": "^1.11.1", "@jupyterlab/services": "^7.1.1", "@kubernetes/client-node": "^0.20.0", diff --git a/apps/api/src/auth/token.ts b/apps/api/src/auth/token.ts index e3e14ffd..bcc21249 100644 --- a/apps/api/src/auth/token.ts +++ b/apps/api/src/auth/token.ts @@ -318,5 +318,11 @@ export const isAuthorizedForDataSource = async ( const result = await prisma().snowflakeDataSource.findFirst(query) return result !== null } + case 'databrickssql': { + const result = await prisma().databricksSQLDataSource.findFirst( + query, + ) + return result !== null + } } } diff --git a/apps/api/src/datasources/databrickssql.ts b/apps/api/src/datasources/databrickssql.ts new file mode 100644 index 00000000..98468bdf --- /dev/null +++ b/apps/api/src/datasources/databrickssql.ts @@ -0,0 +1,43 @@ +import { config } from '../config/index.js' +import prisma, { DatabricksSQLDataSource } from '@briefer/database' +import { DataSourceStatus } from './index.js' +import { pingDatabricksSQL } from '../python/query/databrickssql.js' + +export async function ping(ds: DatabricksSQLDataSource): Promise { + const lastConnection = new Date() + const err = await pingDatabricksSQL(ds, config().DATASOURCES_ENCRYPTION_KEY) + + if (!err) { + return updateConnStatus(ds, { + connStatus: 'online', + lastConnection, + }) + } + + return updateConnStatus(ds, { connStatus: 'offline', connError: err }) +} + +export async function updateConnStatus( + ds: DatabricksSQLDataSource, + status: DataSourceStatus +): Promise { + const newDs = await prisma().databricksSQLDataSource.update({ + where: { id: ds.id }, + data: { + connStatus: status.connStatus, + lastConnection: + status.connStatus === 'online' ? status.lastConnection : undefined, + connError: + status.connStatus === 'offline' + ? JSON.stringify(status.connError) + : undefined, + }, + }) + + return { + ...ds, + connStatus: newDs.connStatus, + lastConnection: newDs.lastConnection?.toISOString() ?? null, + connError: newDs.connError, + } +} diff --git a/apps/api/src/datasources/index.ts b/apps/api/src/datasources/index.ts index ac27df38..d9c3063a 100644 --- a/apps/api/src/datasources/index.ts +++ b/apps/api/src/datasources/index.ts @@ -8,6 +8,7 @@ import * as mysql from './mysql.js' import * as trino from './trino.js' import * as sqlserver from './sqlserver.js' import * as snowflake from './snowflake.js' +import * as databrickssql from './databrickssql.js' import { DataSourceConnectionError } from '@briefer/types' import { IOServer } from '../websocket/index.js' import { broadcastDataSource } from '../websocket/workspace/data-sources.js' @@ -39,6 +40,8 @@ export async function ping( return trino.ping(ds.config.data) case 'snowflake': return snowflake.ping(ds.config.data) + case 'databrickssql': + return databrickssql.ping(ds.config.data) } })() broadcastDataSource(socket, ds) @@ -89,5 +92,8 @@ export async function updateConnStatus>( case 'snowflake': ds.config.data = await snowflake.updateConnStatus(ds.config.data, status) return ds + case 'databrickssql': + ds.config.data = await databrickssql.updateConnStatus(ds.config.data, status) + return ds } } diff --git a/apps/api/src/datasources/structure.ts b/apps/api/src/datasources/structure.ts index b08e9d7f..f56049a4 100644 --- a/apps/api/src/datasources/structure.ts +++ b/apps/api/src/datasources/structure.ts @@ -31,6 +31,7 @@ import { getTrinoSchema } from '../python/query/trino.js' import { getSnowflakeSchema } from '../python/query/snowflake.js' import { getAthenaSchema } from './athena.js' import { getMySQLSchema } from './mysql.js' +import { getDatabricksSQLSchema } from '../python/query/databrickssql.js' import { PythonExecutionError } from '../python/index.js' import { getSqlServerSchema } from './sqlserver.js' import { z } from 'zod' @@ -134,6 +135,14 @@ async function getV2FromCache( }) ).structure break + case 'databrickssql': + encrypted = ( + await prisma().databricksSQLDataSource.findUniqueOrThrow({ + where: { id: dataSourceId }, + select: { structure: true }, + }) + ).structure + break } if (encrypted === null) { @@ -280,6 +289,12 @@ async function assignDataSourceSchemaId( data: { dataSourceSchemaId: dbSchema.id }, }) return dbSchema.id + case 'databrickssql': + await prisma().databricksSQLDataSource.update({ + where: { id: dataSourceId }, + data: { dataSourceSchemaId: dbSchema.id }, + }) + return dbSchema.id } } @@ -364,6 +379,14 @@ async function getFromCache( }) ).dataSourceSchema break + case 'databrickssql': + schema = ( + await prisma().databricksSQLDataSource.findUniqueOrThrow({ + where: { id: dataSourceId }, + select, + }) + ).dataSourceSchema + break } if (schema === null) { @@ -584,6 +607,13 @@ async function _refreshDataSourceStructure( onTable ) break + case 'databrickssql': + await getDatabricksSQLSchema( + dataSource.config.data, + config().DATASOURCES_ENCRYPTION_KEY, + onTable + ) + break } await updateQueue.onIdle() diff --git a/apps/api/src/python/query/databrickssql.ts b/apps/api/src/python/query/databrickssql.ts new file mode 100644 index 00000000..4afd3674 --- /dev/null +++ b/apps/api/src/python/query/databrickssql.ts @@ -0,0 +1,64 @@ +import { v4 as uuidv4 } from 'uuid' +import { DatabricksSQLDataSource, getDatabaseURL } from '@briefer/database' +import { RunQueryResult, SuccessRunQueryResult } from '@briefer/types' +import { + getSQLAlchemySchema, + makeSQLAlchemyQuery, + pingSQLAlchemy, +} from './sqlalchemy.js' +import { OnTable } from '../../datasources/structure.js' + +export async function makeDatabricksSQLQuery( + workspaceId: string, + sessionId: string, + queryId: string, + dataframeName: string, + datasource: DatabricksSQLDataSource, + encryptionKey: string, + sql: string, + onProgress: (result: SuccessRunQueryResult) => void +): Promise<[Promise, () => Promise]> { + const databaseUrl = await getDatabaseURL( + { type: 'databrickssql', data: datasource }, + encryptionKey + ) + + const jobId = uuidv4() + const query = `${sql} -- Briefer jobId: ${jobId}` + + return makeSQLAlchemyQuery( + workspaceId, + sessionId, + dataframeName, + databaseUrl, + 'databrickssql', + jobId, + query, + queryId, + onProgress + ) +} + +export function pingDatabricksSQL( + ds: DatabricksSQLDataSource, + encryptionKey: string +): Promise { + return pingSQLAlchemy( + { type: 'databrickssql', data: ds }, + encryptionKey, + null + ) +} + +export function getDatabricksSQLSchema( + ds: DatabricksSQLDataSource, + encryptionKey: string, + onTable: OnTable +): Promise { + return getSQLAlchemySchema( + { type: 'databrickssql', data: ds }, + encryptionKey, + null, + onTable + ) +} diff --git a/apps/api/src/python/query/index.ts b/apps/api/src/python/query/index.ts index aa5d53f3..c1dddb1b 100644 --- a/apps/api/src/python/query/index.ts +++ b/apps/api/src/python/query/index.ts @@ -22,6 +22,7 @@ import { makeSnowflakeQuery } from './snowflake.js' import { updateConnStatus } from '../../datasources/index.js' import { getJupyterManager } from '../../jupyter/index.js' import { makeSQLServerQuery } from './sqlserver.js' +import { makeDatabricksSQLQuery } from './databrickssql.js' export async function makeSQLQuery( workspaceId: string, @@ -147,6 +148,18 @@ export async function makeSQLQuery( onProgress ) break + case 'databrickssql': + result = await makeDatabricksSQLQuery( + workspaceId, + sessionId, + queryId, + dataframeName, + datasource.data, + encryptionKey, + sql, + onProgress + ) + break } result[0].then(async (r) => { diff --git a/apps/api/src/python/query/sqlalchemy.ts b/apps/api/src/python/query/sqlalchemy.ts index f75e447e..f2c256a1 100644 --- a/apps/api/src/python/query/sqlalchemy.ts +++ b/apps/api/src/python/query/sqlalchemy.ts @@ -23,7 +23,8 @@ export async function makeSQLAlchemyQuery( | 'psql' | 'redshift' | 'trino' - | 'snowflake', + | 'snowflake' + | 'databrickssql', jobId: string, query: string, queryId: string, diff --git a/apps/api/src/python/writeback/index.ts b/apps/api/src/python/writeback/index.ts index 34a38bb0..f19517cf 100644 --- a/apps/api/src/python/writeback/index.ts +++ b/apps/api/src/python/writeback/index.ts @@ -56,5 +56,7 @@ export async function writeback( case 'snowflake': case 'trino': throw new Error(`${datasource.type} writeback not implemented`) + case 'databrickssql': + throw new Error(`${datasource.type} writeback not implemented`) } } diff --git a/apps/api/src/v1/workspaces/workspace/data-sources/data-source.ts b/apps/api/src/v1/workspaces/workspace/data-sources/data-source.ts index f31f2235..736554c7 100644 --- a/apps/api/src/v1/workspaces/workspace/data-sources/data-source.ts +++ b/apps/api/src/v1/workspaces/workspace/data-sources/data-source.ts @@ -29,6 +29,9 @@ import { getSnowflakeDataSource, updateSnowflakeDataSource, deleteSnowflakeDataSource, + getDatabricksSQLDataSource, + updateDatabricksSQLDataSource, + deleteDatabricksSQLDataSource, } from '@briefer/database' import { z } from 'zod' import { getParam } from '../../../../utils/express.js' @@ -139,6 +142,19 @@ const dataSourceRouter = (socketServer: IOServer) => { notes: z.string(), }), }), + z.object({ + type: z.literal('databrickssql'), + data: z.object({ + id: z.string().min(1), + name: z.string().min(1), + hostname: z.string().min(1), + http_path: z.string().min(1), + token: z.string().min(1), + catalog: z.string(), + schema: z.string(), + notes: z.string(), + }), + }), ]) router.put('/', async (req, res) => { @@ -162,6 +178,7 @@ const dataSourceRouter = (socketServer: IOServer) => { getSQLServerDataSource(workspaceId, dataSourceId), getTrinoDataSource(workspaceId, dataSourceId), getSnowflakeDataSource(workspaceId, dataSourceId), + getDatabricksSQLDataSource(workspaceId, dataSourceId), ]) ).find((e) => e !== null) if (!existingDb) { @@ -298,6 +315,17 @@ const dataSourceRouter = (socketServer: IOServer) => { ) break } + case 'databrickssql': { + await updateDatabricksSQLDataSource( + { + ...data.data, + id: dataSourceId, + token: data.data.token === ''? undefined : data.data.token, + }, + config().DATASOURCES_ENCRYPTION_KEY + ) + break + } } const ds = await getDatasource(workspaceId, dataSourceId, data.type) @@ -444,6 +472,19 @@ const dataSourceRouter = (socketServer: IOServer) => { } } + const targetDatabricksSQLDb = await recoverFromNotFound( + deleteDatabricksSQLDataSource(workspaceId, targetId) + ) + if (targetDatabricksSQLDb) { + return { + status: 200, + payload: { + type: 'databrickssql', + data: targetDatabricksSQLDb, + }, + } + } + return { status: 404 } } @@ -468,6 +509,7 @@ const dataSourceRouter = (socketServer: IOServer) => { z.literal('trino'), z.literal('sqlserver'), z.literal('snowflake'), + z.literal('databrickssql'), ]), }) router.post('/ping', async (req, res) => { diff --git a/apps/api/src/v1/workspaces/workspace/data-sources/index.ts b/apps/api/src/v1/workspaces/workspace/data-sources/index.ts index 3f1027a8..ffc25678 100644 --- a/apps/api/src/v1/workspaces/workspace/data-sources/index.ts +++ b/apps/api/src/v1/workspaces/workspace/data-sources/index.ts @@ -12,6 +12,7 @@ import { createTrinoDataSource, createSQLServerDataSource, createSnowflakeDataSource, + createDatabricksSQLDataSource, } from '@briefer/database' import { z } from 'zod' import { getParam } from '../../../../utils/express.js' @@ -113,6 +114,18 @@ const dataSourcePayload = z.union([ notes: z.string(), }), }), + z.object({ + type: z.literal('databrickssql'), + data: z.object({ + name: z.string().min(1), + hostname: z.string().min(1), + http_path: z.string().min(1), + token: z.string().min(1), + catalog: z.string(), + schema: z.string(), + notes: z.string(), + }), + }), ]) export type DataSourcePayload = z.infer @@ -288,6 +301,22 @@ const dataSourcesRouter = (socketServer: IOServer) => { dsRes = { type: 'snowflake', data: ds } break } + case 'databrickssql': { + const payload = { + ...data.data, + workspaceId, + connStatus: 'offline' as const, + connError: JSON.stringify(neverPingedError), + lastConnection: null, + } + + const ds = await createDatabricksSQLDataSource( + payload, + config().DATASOURCES_ENCRYPTION_KEY + ) + dsRes = { type: 'databrickssql', data: ds } + break + } } return dsRes @@ -302,6 +331,7 @@ const dataSourcesRouter = (socketServer: IOServer) => { case 'athena': case 'sqlserver': case 'snowflake': + case 'databrickssql': case 'trino': return null case 'oracle': { @@ -386,6 +416,7 @@ const dataSourcesRouter = (socketServer: IOServer) => { getDatasource(workspaceId, dataSourceId, 'sqlserver'), getDatasource(workspaceId, dataSourceId, 'trino'), getDatasource(workspaceId, dataSourceId, 'snowflake'), + getDatasource(workspaceId, dataSourceId, 'databrickssql'), ]) ).find((e) => e !== null) diff --git a/apps/web/public/icons/databrickssql.png b/apps/web/public/icons/databrickssql.png new file mode 100644 index 0000000000000000000000000000000000000000..01c738a338b8f0a45660ef1f5fc77bd9df260307 GIT binary patch literal 3935 zcmV-l51{agP)vo9{)5S|1=)|H6H&qAOAETC0jY?0000AbW%=J02wq>@U3%? z$laCxpfLad4&X^dK~#9!?Oicj8%Y+_NJvO`i2@>#Y%CIODiS+|gcuTHNQfaJ#tAVb zYa5%RJM!T>+ueU|S-{gZqwd$$qnVKcI@-@GiKkIdb-jA^-gG)|>utTQxAnHxYxNJ- zF{*6;ZoGzJ-5UL{eo-A6rSz9|i|SN8udnMB)zrR(W&XQPQB737KCVwxLj~({Z2-$a zR9dYGd=0F|t7X*jIz-h|c(I<>AL{__<)D;V1GxWS7s68d=gPs|MP=Q8>i?}6+}*yi z*CV)ID~0i(+z=6k_5z*lQ2AR7R!QBJ*mcWu4!JWnKo#x?w!#d|A6_CupM1CWE+5cyseUwQ(*V;n>Sei(e7H^rM-0z7~`xt=AJ6Zbej`%_IPlY z;r_19s8E)vus=589IX-VFJO06%1KdN{ZUs`pe(5CY9BFzv-Gi^s49TF2kv#vaK8ar z&gbeMvkP(60QUgydZ}%3UZX?swc zsNpz+Yp?F=*bO!1dYEHYhWqpS>s~0gR=lr4N3gXK%MCcTE5ZE~RJ|H&svzZ?Ps@BQ ztsgrzt~rP%%bfZ4N2k{1w(R#MZkhVaT{(M81qo1#Wm%@u%1e-mVSy~8bpv8c{`qNd zyS0twDkzdA{t!o|!OYI>U7L1ZUjPzcb{bm4r>}gR1nz(ddGjkIKCs#}uO|4d3z zRP8BB9X~m6{Vuu-VXYvlL6{ciVLh~tqfT6x z_LgoruFOGz$lqHwa^FZ{0i|iz*>_~bu)sps=C(gY*Q%c`JD6|H>M<=iL;bHgaTMZP zJ5g;EExv(Q9k%T38(OtbfZd zXCBlqT$qyKoSDX4H^Y(3)*DV8grO*U7Ll(^%f$ z%{j6)NDW@5RHYgV4KfvtWjoHc>gSbJ!dkk^9FB{?`-QMZVZt}p%ZL+T zstDyQ;jsD=v2w)(#=l>JQTN{9-VBEIguxO59Ga?ahdrhNN0f9WdGgj&vqpd@E6_<< zY1Uz4l{qCNNu0Q`1o=LR2;e$Qta`SJIB~@|J$9nHya^Mlyx5RnQp(+?QewBIh;qRt z!R`$StBxb<1462l!V-v;6LU1hqPI2CfaCDPNMT7v{fUd~9dT$e63ako#at~B0p9|!j)`171I;J&S*x9ac1 zCl-DY?n9dyIPx)pAK_c|HLOFbF|waK-~H}{9R^0QsWN!0zJ`S*T60Xmz45^6h9w$s z22`)yOIW6$P73lQw7)N$q0XBofIGArNbMBAuU4mLv?35-9xU;J)bN?Oou97zm;$z$pMG3VOlkm;`MsU*29J zZ2|Bmj%r(CniRk&)dsr|R-y4&xY_gaZa%F{DkZ~_OZk8o!z#@05TNm*_s-(Na)RK*Bpu3bt;z?WcH zn)u3z-YLA$;k00A{7S^@)jDIe>=a8XjII;_&5=2Tt5Ji!Rf>k)??--dx(oUtT4O6QK z@6;*K!|>iX7h6P3bfRN1>xgH>Q`qjac;@6s%_%iCxUU&;q9j#H>~NwZetDdTa4k?@ z!%ZI>I^H3q{v#D_5rGXVe9#s*j%Je@tkh?^3y!EX4##TDVgM;`!htbN)5DEo_%Xu$ zI4(6jGHSCJR%R$5cfYm?I45p20XGZvoFIPzZ>|L7z=6G~{SlS=!}MYsX?hrN5{Jte zd~5_tp%2s%pk^^gQr|K_-ZjFGo+_r^07rm5=;)YH3Wo_WUX2A=)|J_fP@hE=;hOPp zO(;6K8R~aNso_{qn+vKdV=Du*7Uk8@2)+Q2Lgf#d$6jXtFm5-PW-O#Ccy8pEWnh^) z>E^;5RLX>RMDgS4Vu=p`(te{h<|}}3-awdUY{mp9`R$$BXUvTN+PRU93oCO)Mf;0N z;(r!7vcwYI5nyrvA)S<<>RSP*UGdJ`V>ym{59|=u;E6o+E2ZXsg1z>Oxu39fv_?2wW}dW&(%PAfkqCb)FBsgZMo@*`>_2k0dWiVhqn5c(aM6_6y|3F#b!Iun@_1m!c#dcu>xY5>Wy#@$JJ)xO#UY1 z!tR#(i;L=CaH0}wvk;b`9}iP)A4AJL6l2I#F(fbZC~fFD&Z)U(NVg=yN!; z$MDtf_6=he*D3|vCB7%C%%1g)VMVs$5LyQSl9zd^%RxzW0A!M?Q*q#%N#evwIj*l1 z1Xmx;)>DiprPLxKsm&MAD>>^_UO+y~Ba+%Xu?%H-X((Y7tQhVqMC@j5L|L?I^P4 zyaL?o%2)`9Yz{g#FW`G=AKs_DbCuVGlGp_;Pn(aO1{S1I`~z&DMZ`2T;{PO_!(|*h zTy6p1BN^{4B^xZeoXEue756CGi#!CpB>`=S#r7Wu(4W zO0kq3pmNqB>l&kKVo_zdhq%rdjZC-m=&3En0tYav-*;MB(|99Ew@O{RMz{}%6MLq4 zaF^Jo@xtT>cRlYrEiO>GJ%9I{;{J`my)YAS4wZm$t6^d5iZ1QxZEEyDLkns)#d(t; zHRxHV^#y~7u*BD=TFx7VdylyqW{R!ixq|Y%fa@`p9YyG0? tWVYTXaMi|EFFSARZN06x^|sb){S9N`_lYHbzdHZ`002ovPDHLkV1i$Hp|Ah| literal 0 HcmV?d00001 diff --git a/apps/web/src/components/DataSourceIcons.tsx b/apps/web/src/components/DataSourceIcons.tsx index 64cd79dd..e4171c9d 100644 --- a/apps/web/src/components/DataSourceIcons.tsx +++ b/apps/web/src/components/DataSourceIcons.tsx @@ -61,6 +61,11 @@ export const DataSourceIcons = ({ icon="/icons/snowflake.png" name="Snowflake" href={`/workspaces/${workspaceId}/data-sources/new/snowflake`} + /> +