From 3ad3b2ff7f36bc2369c0d5b95dbe0e321f5f061f Mon Sep 17 00:00:00 2001 From: brokun Date: Sat, 14 Sep 2024 02:44:53 +0800 Subject: [PATCH] feat(sql): sql magic, support postgresql/mysql/sqlite --- libro-sql/.gitignore | 10 +++ libro-sql/.python-version | 1 + libro-sql/README.md | 30 ++++++++ libro-sql/pyproject.toml | 32 +++++++++ libro-sql/src/libro_sql/__init__.py | 7 ++ libro-sql/src/libro_sql/_version.py | 2 + libro-sql/src/libro_sql/database.py | 94 +++++++++++++++++++++++++ libro-sql/src/libro_sql/exception.py | 35 ++++++++++ libro-sql/src/libro_sql/extensions.py | 22 ++++++ libro-sql/src/libro_sql/sql_magic.py | 99 +++++++++++++++++++++++++++ requirements-dev.lock | 22 +++++- requirements.lock | 22 +++++- 12 files changed, 370 insertions(+), 6 deletions(-) create mode 100644 libro-sql/.gitignore create mode 100644 libro-sql/.python-version create mode 100644 libro-sql/README.md create mode 100644 libro-sql/pyproject.toml create mode 100644 libro-sql/src/libro_sql/__init__.py create mode 100644 libro-sql/src/libro_sql/_version.py create mode 100644 libro-sql/src/libro_sql/database.py create mode 100644 libro-sql/src/libro_sql/exception.py create mode 100644 libro-sql/src/libro_sql/extensions.py create mode 100644 libro-sql/src/libro_sql/sql_magic.py diff --git a/libro-sql/.gitignore b/libro-sql/.gitignore new file mode 100644 index 0000000..ae8554d --- /dev/null +++ b/libro-sql/.gitignore @@ -0,0 +1,10 @@ +# python generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# venv +.venv diff --git a/libro-sql/.python-version b/libro-sql/.python-version new file mode 100644 index 0000000..c8cfe39 --- /dev/null +++ b/libro-sql/.python-version @@ -0,0 +1 @@ +3.10 diff --git a/libro-sql/README.md b/libro-sql/README.md new file mode 100644 index 0000000..05f4f6e --- /dev/null +++ b/libro-sql/README.md @@ -0,0 +1,30 @@ +# libro-sql + +# 使用 + +## 加载 + +```shell +%load_ext libro_sql +``` + +# 设置 + +```python +from libro_sql.database import db +db.config({ + 'db_type': '', + 'username': '', + 'password': '', + 'host': '', + 'port': 5432, + 'database': '' +}) +``` + +# 执行 + +```python +%%sql +{"result_variable":"a", "sql_script":"select 1"} +``` diff --git a/libro-sql/pyproject.toml b/libro-sql/pyproject.toml new file mode 100644 index 0000000..18eaed6 --- /dev/null +++ b/libro-sql/pyproject.toml @@ -0,0 +1,32 @@ +[project] +name = "libro-sql" +version = "0.1.2" +description = "libro flow" +authors = [ + { name = "brokun", email = "brokun0128@gmail.com" }, + { name = "sunshinesmilelk", email = "1176136681@qq.com" }, +] +dependencies = [ + "ipython>=7.34.0", + "sqlalchemy>=2.0.34", + "pandas>=2.2.2", + "pydantic>=2.9.1", + "psycopg2-binary>=2.9.9", + "pymysql>=1.1.1", +] +dev-dependencies = [] +readme = "README.md" +requires-python = ">= 3.10" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.rye] +managed = true + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.build.targets.wheel] +packages = ["src/libro_sql"] diff --git a/libro-sql/src/libro_sql/__init__.py b/libro-sql/src/libro_sql/__init__.py new file mode 100644 index 0000000..e4d23d4 --- /dev/null +++ b/libro-sql/src/libro_sql/__init__.py @@ -0,0 +1,7 @@ +from ._version import __version__ + +from .extensions import ( + load_ipython_extension, + unload_ipython_extension, + _load_jupyter_server_extension, +) diff --git a/libro-sql/src/libro_sql/_version.py b/libro-sql/src/libro_sql/_version.py new file mode 100644 index 0000000..bdafc4c --- /dev/null +++ b/libro-sql/src/libro_sql/_version.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +__version__ = "0.1.2" diff --git a/libro-sql/src/libro_sql/database.py b/libro-sql/src/libro_sql/database.py new file mode 100644 index 0000000..2bf44ed --- /dev/null +++ b/libro-sql/src/libro_sql/database.py @@ -0,0 +1,94 @@ + +from typing import Optional +from pydantic import BaseModel +from sqlalchemy import create_engine, text +from sqlalchemy.exc import SQLAlchemyError +import pandas as pd + + +class DatabaseConfig(BaseModel): + db_type: str + username: str + password: str + host: str + port: int + database: str + + +class Database: + config: DatabaseConfig + + def __init__(self, config: DatabaseConfig): + self.config = config + self.engine = self._create_engine() + + def _create_engine(self): + """Create the SQLAlchemy engine based on the database type.""" + config = self.config + try: + if config.db_type == 'postgresql': + engine = create_engine( + f'postgresql+psycopg2://{config.username}:{config.password}@{config.host}:{config.port}/{config.database}') + elif config.db_type == 'mysql': + engine = create_engine( + f'mysql+pymysql://{config.username}:{config.password}@{config.host}:{config.port}/{config.database}') + elif config.db_type == 'sqlite': + engine = create_engine(f'sqlite:///{config.database}') + else: + raise ValueError( + f"Unsupported database type: {config.db_type}") + return engine + except Exception as e: + print(f"Error creating engine: {e}") + raise + + def execute(self, query): + """Execute a SQL query or non-query and return the result. + + If the query is a SELECT statement, return the result as a DataFrame. + For other statements (INSERT, UPDATE, DELETE), execute the statement and return the number of affected rows. + """ + with self.engine.connect() as connection: + try: + result = connection.execute(text(query)) + if result.returns_rows: + # Fetch all rows and construct DataFrame with column names + rows = result.fetchall() + if rows: + # Debug: Print fetched rows + df = pd.DataFrame(rows, columns=result.keys()) + else: + df = pd.DataFrame() # Return empty DataFrame if no rows + return df + else: + if result.rowcount is not None: + connection.commit() + return result.rowcount + else: + return result + except SQLAlchemyError as e: + print(f"Error executing query: {e}") + raise + + +class DatabaseManager(): + db: Optional[Database] = None + + def config(self, c: dict): + config = DatabaseConfig.model_validate(c) + self.db = Database(config) + + def execute(self, query): + """Execute a SQL query or non-query and return the result. + + If the query is a SELECT statement, return the result as a DataFrame. + For other statements (INSERT, UPDATE, DELETE), execute the statement and return the number of affected rows. + """ + if self.db is not None: + return self.db.execute(query) + else: + raise Exception( + 'Can not execute sql before database config set') + + +db = DatabaseManager() diff --git a/libro-sql/src/libro_sql/exception.py b/libro-sql/src/libro_sql/exception.py new file mode 100644 index 0000000..739f955 --- /dev/null +++ b/libro-sql/src/libro_sql/exception.py @@ -0,0 +1,35 @@ +from IPython.core.interactiveshell import InteractiveShell + + +def store_exception(shell: InteractiveShell, etype: type, evalue, tb, tb_offset=None): + # A structured traceback (a list of strings) or None + + if issubclass(etype, SyntaxError): + # Disable ANSI color strings + shell.SyntaxTB.color_toggle() + # Don't display a stacktrace because a syntax error has no stacktrace + stb = shell.SyntaxTB.structured_traceback(etype, evalue, []) + stb_text = shell.SyntaxTB.stb2text(stb) + # Re-enable ANSI color strings + shell.SyntaxTB.color_toggle() + else: + # Disable ANSI color strings + shell.InteractiveTB.color_toggle() + stb = shell.InteractiveTB.structured_traceback( + etype, evalue, tb, tb_offset=tb_offset + ) + stb_text = shell.InteractiveTB.stb2text(stb) + # Re-enable ANSI color strings + shell.InteractiveTB.color_toggle() + + etraceback = shell.showtraceback() + + styled_exception = str(stb_text) + + prompt_number = shell.execution_count + err = shell.user_ns.get("Err", {}) + err[prompt_number] = styled_exception + shell.user_ns["Err"] = err + + # Return + return etraceback diff --git a/libro-sql/src/libro_sql/extensions.py b/libro-sql/src/libro_sql/extensions.py new file mode 100644 index 0000000..82c0446 --- /dev/null +++ b/libro-sql/src/libro_sql/extensions.py @@ -0,0 +1,22 @@ +from IPython.core.interactiveshell import InteractiveShell +from .exception import store_exception +from .sql_magic import SQLMagic + + +def load_ipython_extension(ipython: InteractiveShell): + ipython.register_magics(SQLMagic) + ipython.set_custom_exc((BaseException,), store_exception) + + +def unload_ipython_extension(ipython: InteractiveShell): + ipython.set_custom_exc((BaseException,), ipython.CustomTB) + + +def _load_jupyter_server_extension(ipython): + """Load the Jupyter server extension. + Parameters + ---------- + ipython: :class:`jupyter_client.ioloop.IOLoopKernelManager` + Jupyter kernel manager instance. + """ + load_ipython_extension(ipython) diff --git a/libro-sql/src/libro_sql/sql_magic.py b/libro-sql/src/libro_sql/sql_magic.py new file mode 100644 index 0000000..dc57f36 --- /dev/null +++ b/libro-sql/src/libro_sql/sql_magic.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +import base64 +import json +from IPython.core.magic import Magics, magics_class, line_cell_magic +from .database import db + + +def is_ipython() -> bool: + """ + Check if interface is launching from iPython + :return is_ipython (bool): True or False + """ + is_ipython = False + try: # Check if running interactively using ipython. + from IPython import get_ipython + + if get_ipython() is not None: + is_ipython = True + except (ImportError, NameError): + pass + return is_ipython + + +def preprocessing_line(line, local_ns): + try: + user_input = str(base64.decodebytes(line.encode()), "utf-8") + # 将JSON字符串解析成Python对象 + json_obj = json.loads(user_input) + content = json_obj.get("sql_script") + # 替换变量 + if content: + for key, value in local_ns.items(): + if key and not key.startswith("_"): + content = content.replace("{{" + key + "}}", str(value)) + json_obj["sql_script"] = content + return json_obj + except Exception as e: + raise Exception("preprocess error", e) + + +def preprocessing_cell(cell, local_ns): + try: + # 将JSON字符串解析成Python对象 + json_obj = json.loads(cell) + content = json_obj.get("sql_script") + # 替换变量 + if content: + for key, value in local_ns.items(): + if key and not key.startswith("_"): + content = content.replace("{{" + key + "}}", str(value)) + json_obj["sql_script"] = content + return json_obj + except Exception as e: + raise Exception("preprocess error", e) + + +@magics_class +class SQLMagic(Magics): + """ + %%prompt + {"result_variable":"custom_variable_name","sql_script":"SELECT 1"} + """ + + def __init__(self, shell=None): + super(SQLMagic, self).__init__(shell) + + @line_cell_magic + def sql(self, line="", cell=None): + local_ns = self.shell.user_ns # type: ignore + if cell is None: + args = preprocessing_line(line, local_ns) + else: + args = preprocessing_cell(cell, local_ns) + + result_variable: str = args.get("result_variable") + sql_script: str = args.get("sql_script") + + if sql_script is None or sql_script == "": + raise Exception("Invalid sql script!") + + res = db.execute(sql_script) + + # Set variable + try: + if result_variable is None or result_variable == "": + return + if not result_variable.isidentifier(): + raise Exception( + 'Invalid variable name "{}".'.format(result_variable) + ) + else: + local_ns[result_variable] = res + except Exception as e: + raise Exception("set variable error", e) + + if is_ipython(): + from IPython.display import display + display(res) diff --git a/requirements-dev.lock b/requirements-dev.lock index 71a5c9f..a0fc3f9 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -10,6 +10,7 @@ -e file:libro-ai -e file:libro-flow -e file:libro-server +-e file:libro-sql aiohttp==3.9.3 # via langchain # via langchain-community @@ -109,6 +110,7 @@ ipython==8.23.0 # via ipywidgets # via libro # via libro-ai + # via libro-sql ipywidgets==8.1.2 # via libro-flow isoduration==20.11.0 @@ -216,6 +218,7 @@ numpy==1.26.4 # via langchain # via langchain-community # via matplotlib + # via pandas openai==1.16.2 # via langchain-openai orjson==3.10.0 @@ -231,6 +234,8 @@ packaging==23.2 # via matplotlib # via nbconvert # via ruff-lsp +pandas==2.2.2 + # via libro-sql pandocfilters==1.5.1 # via nbconvert parso==0.8.4 @@ -247,6 +252,8 @@ prompt-toolkit==3.0.43 # via ipython psutil==5.9.8 # via ipykernel +psycopg2-binary==2.9.9 + # via libro-sql ptyprocess==0.7.0 # via pexpect # via terminado @@ -254,28 +261,34 @@ pure-eval==0.2.2 # via stack-data pycparser==2.22 # via cffi -pydantic==2.6.4 +pydantic==2.9.1 # via langchain # via langchain-core # via langsmith # via libro-ai # via libro-flow + # via libro-sql # via openai -pydantic-core==2.16.3 +pydantic-core==2.23.3 # via pydantic pygls==1.3.1 # via ruff-lsp pygments==2.17.2 # via ipython # via nbconvert +pymysql==1.1.1 + # via libro-sql pyparsing==3.1.2 # via matplotlib python-dateutil==2.9.0.post0 # via arrow # via jupyter-client # via matplotlib + # via pandas python-json-logger==2.0.7 # via jupyter-events +pytz==2024.2 + # via pandas pyyaml==6.0.1 # via jupyter-events # via langchain @@ -324,9 +337,10 @@ sniffio==1.3.1 # via openai soupsieve==2.5 # via beautifulsoup4 -sqlalchemy==2.0.29 +sqlalchemy==2.0.34 # via langchain # via langchain-community + # via libro-sql stack-data==0.6.3 # via ipython tenacity==8.2.3 @@ -374,6 +388,8 @@ typing-extensions==4.11.0 # via typing-inspect typing-inspect==0.9.0 # via dataclasses-json +tzdata==2024.1 + # via pandas uri-template==1.3.0 # via jsonschema urllib3==2.2.1 diff --git a/requirements.lock b/requirements.lock index c47d859..99ec160 100644 --- a/requirements.lock +++ b/requirements.lock @@ -10,6 +10,7 @@ -e file:libro-ai -e file:libro-flow -e file:libro-server +-e file:libro-sql aiohttp==3.9.3 # via langchain # via langchain-community @@ -96,6 +97,7 @@ ipython==8.23.0 # via ipywidgets # via libro # via libro-ai + # via libro-sql ipywidgets==8.1.2 # via libro-flow isoduration==20.11.0 @@ -201,6 +203,7 @@ numpy==1.26.4 # via langchain # via langchain-community # via matplotlib + # via pandas orjson==3.10.0 # via langsmith overrides==7.7.0 @@ -214,6 +217,8 @@ packaging==23.2 # via matplotlib # via nbconvert # via ruff-lsp +pandas==2.2.2 + # via libro-sql pandocfilters==1.5.1 # via nbconvert parso==0.8.4 @@ -230,6 +235,8 @@ prompt-toolkit==3.0.43 # via ipython psutil==5.9.8 # via ipykernel +psycopg2-binary==2.9.9 + # via libro-sql ptyprocess==0.7.0 # via pexpect # via terminado @@ -237,27 +244,33 @@ pure-eval==0.2.2 # via stack-data pycparser==2.22 # via cffi -pydantic==2.6.4 +pydantic==2.9.1 # via langchain # via langchain-core # via langsmith # via libro-ai # via libro-flow -pydantic-core==2.16.3 + # via libro-sql +pydantic-core==2.23.3 # via pydantic pygls==1.3.1 # via ruff-lsp pygments==2.17.2 # via ipython # via nbconvert +pymysql==1.1.1 + # via libro-sql pyparsing==3.1.2 # via matplotlib python-dateutil==2.9.0.post0 # via arrow # via jupyter-client # via matplotlib + # via pandas python-json-logger==2.0.7 # via jupyter-events +pytz==2024.2 + # via pandas pyyaml==6.0.1 # via jupyter-events # via langchain @@ -301,9 +314,10 @@ sniffio==1.3.1 # via anyio soupsieve==2.5 # via beautifulsoup4 -sqlalchemy==2.0.29 +sqlalchemy==2.0.34 # via langchain # via langchain-community + # via libro-sql stack-data==0.6.3 # via ipython tenacity==8.2.3 @@ -346,6 +360,8 @@ typing-extensions==4.11.0 # via typing-inspect typing-inspect==0.9.0 # via dataclasses-json +tzdata==2024.1 + # via pandas uri-template==1.3.0 # via jsonschema urllib3==2.2.1