diff --git a/tpch/notebooks/q13/kernel-metadata.json b/tpch/notebooks/q13/kernel-metadata.json new file mode 100644 index 000000000..7626c20d6 --- /dev/null +++ b/tpch/notebooks/q13/kernel-metadata.json @@ -0,0 +1,15 @@ +{ + "id": "marcogorelli/narwhals-tpch-q22-s2", + "title": "Narwhals TPCH Q22 S2", + "code_file": "execute.ipynb", + "language": "python", + "kernel_type": "notebook", + "is_private": "false", + "enable_gpu": "false", + "enable_tpu": "false", + "enable_internet": "true", + "dataset_sources": [], + "competition_sources": [], + "kernel_sources": ["marcogorelli/tpc-h-data-parquet-s-2"], + "model_sources": [] +} \ No newline at end of file diff --git a/tpch/notebooks/q13/narwhals-tpch-q13-s2.ipynb b/tpch/notebooks/q13/narwhals-tpch-q13-s2.ipynb new file mode 100644 index 000000000..27781cf5e --- /dev/null +++ b/tpch/notebooks/q13/narwhals-tpch-q13-s2.ipynb @@ -0,0 +1,462 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "1dbc9557", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:09:49.813423Z", + "iopub.status.busy": "2024-07-25T23:09:49.813055Z", + "iopub.status.idle": "2024-07-25T23:10:36.285987Z", + "shell.execute_reply": "2024-07-25T23:10:36.284597Z" + }, + "papermill": { + "duration": 46.481932, + "end_time": "2024-07-25T23:10:36.288698", + "exception": false, + "start_time": "2024-07-25T23:09:49.806766", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e8353a92", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:10:36.305747Z", + "iopub.status.busy": "2024-07-25T23:10:36.305327Z", + "iopub.status.idle": "2024-07-25T23:10:36.816412Z", + "shell.execute_reply": "2024-07-25T23:10:36.815296Z" + }, + "papermill": { + "duration": 0.522877, + "end_time": "2024-07-25T23:10:36.819167", + "exception": false, + "start_time": "2024-07-25T23:10:36.296290", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import polars as pl\n", + "\n", + "pd.options.mode.copy_on_write = True\n", + "pd.options.future.infer_string = True" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "86331fa9", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:10:36.835559Z", + "iopub.status.busy": "2024-07-25T23:10:36.835036Z", + "iopub.status.idle": "2024-07-25T23:10:36.851655Z", + "shell.execute_reply": "2024-07-25T23:10:36.850660Z" + }, + "papermill": { + "duration": 0.02756, + "end_time": "2024-07-25T23:10:36.854110", + "exception": false, + "start_time": "2024-07-25T23:10:36.826550", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from typing import Any\n", + "import narwhals as nw\n", + "\n", + "def q13(\n", + " customer_ds_raw: Any,\n", + " orders_ds_raw: Any\n", + ") -> Any:\n", + "\n", + " customer_ds = nw.from_native(customer_ds_raw)\n", + " orders_ds = nw.from_native(orders_ds_raw)\n", + " \n", + " var1 = \"special\"\n", + " var2 = \"requests\"\n", + "\n", + " orders_ds = orders_ds.filter(~nw.col(\"o_comment\").str.contains(f\"{var1}.*{var2}\"))\n", + " \n", + " result = (\n", + " customer_ds.join(orders_ds, left_on=\"c_custkey\", right_on=\"o_custkey\", how=\"left\")\n", + " .group_by(\"c_custkey\")\n", + " .agg(nw.col(\"o_orderkey\").len().alias(\"c_count\"))\n", + " .group_by(\"c_count\")\n", + " .agg(nw.col(\"c_count\").len().alias(\"len\"))\n", + " .select(nw.col(\"c_count\"), nw.col(\"len\").alias(\"custdist\"))\n", + " .sort(by=[\"custdist\", \"c_count\"], descending=[True, True])\n", + " )\n", + "\n", + " return nw.to_native(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "661cc834", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:10:36.870641Z", + "iopub.status.busy": "2024-07-25T23:10:36.870107Z", + "iopub.status.idle": "2024-07-25T23:10:36.875494Z", + "shell.execute_reply": "2024-07-25T23:10:36.874307Z" + }, + "papermill": { + "duration": 0.016452, + "end_time": "2024-07-25T23:10:36.878001", + "exception": false, + "start_time": "2024-07-25T23:10:36.861549", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n", + "customer = dir_ + 'customer.parquet'\n", + "orders = dir_ + 'orders.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8631fd1d", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:10:36.894346Z", + "iopub.status.busy": "2024-07-25T23:10:36.893939Z", + "iopub.status.idle": "2024-07-25T23:10:36.899766Z", + "shell.execute_reply": "2024-07-25T23:10:36.898615Z" + }, + "papermill": { + "duration": 0.016664, + "end_time": "2024-07-25T23:10:36.902043", + "exception": false, + "start_time": "2024-07-25T23:10:36.885379", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "IO_FUNCS = {\n", + " 'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n", + " 'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n", + " 'polars[eager]': lambda x: pl.read_parquet(x),\n", + " 'polars[lazy]': lambda x: pl.scan_parquet(x),\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "82b3c815", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:10:36.918846Z", + "iopub.status.busy": "2024-07-25T23:10:36.917939Z", + "iopub.status.idle": "2024-07-25T23:10:36.922723Z", + "shell.execute_reply": "2024-07-25T23:10:36.921667Z" + }, + "papermill": { + "duration": 0.015614, + "end_time": "2024-07-25T23:10:36.924894", + "exception": false, + "start_time": "2024-07-25T23:10:36.909280", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "results = {}" + ] + }, + { + "cell_type": "markdown", + "id": "d997c627", + "metadata": { + "papermill": { + "duration": 0.006996, + "end_time": "2024-07-25T23:10:36.939302", + "exception": false, + "start_time": "2024-07-25T23:10:36.932306", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## pandas via Narwhals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eab291f2", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:10:36.955324Z", + "iopub.status.busy": "2024-07-25T23:10:36.954974Z", + "iopub.status.idle": "2024-07-25T23:11:21.862720Z", + "shell.execute_reply": "2024-07-25T23:11:21.861590Z" + }, + "papermill": { + "duration": 44.926739, + "end_time": "2024-07-25T23:11:21.873211", + "exception": false, + "start_time": "2024-07-25T23:10:36.946472", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tool = 'pandas'\n", + "fn = IO_FUNCS[tool]\n", + "timings = %timeit -o q13(fn(customer), fn(orders))\n", + "results[tool] = timings.all_runs" + ] + }, + { + "cell_type": "markdown", + "id": "297721e0", + "metadata": { + "papermill": { + "duration": 0.006853, + "end_time": "2024-07-25T23:11:21.887504", + "exception": false, + "start_time": "2024-07-25T23:11:21.880651", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## pandas, pyarrow dtypes, via Narwhals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25b4f90f", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:11:21.904256Z", + "iopub.status.busy": "2024-07-25T23:11:21.903414Z", + "iopub.status.idle": "2024-07-25T23:12:06.618899Z", + "shell.execute_reply": "2024-07-25T23:12:06.617743Z" + }, + "papermill": { + "duration": 44.733391, + "end_time": "2024-07-25T23:12:06.628137", + "exception": false, + "start_time": "2024-07-25T23:11:21.894746", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tool = 'pandas[pyarrow]'\n", + "fn = IO_FUNCS[tool]\n", + "timings = %timeit -o q13(fn(customer), fn(orders))\n", + "results[tool] = timings.all_runs" + ] + }, + { + "cell_type": "markdown", + "id": "456df2b0", + "metadata": { + "papermill": { + "duration": 0.006896, + "end_time": "2024-07-25T23:12:06.642200", + "exception": false, + "start_time": "2024-07-25T23:12:06.635304", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Polars read_parquet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3998ae3e", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:12:06.658219Z", + "iopub.status.busy": "2024-07-25T23:12:06.657876Z", + "iopub.status.idle": "2024-07-25T23:12:20.548334Z", + "shell.execute_reply": "2024-07-25T23:12:20.547138Z" + }, + "papermill": { + "duration": 13.901571, + "end_time": "2024-07-25T23:12:20.550910", + "exception": false, + "start_time": "2024-07-25T23:12:06.649339", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tool = 'polars[eager]'\n", + "fn = IO_FUNCS[tool]\n", + "timings = %timeit -o q13(fn(customer), fn(orders))\n", + "results[tool] = timings.all_runs" + ] + }, + { + "cell_type": "markdown", + "id": "d153b79c", + "metadata": { + "papermill": { + "duration": 0.0075, + "end_time": "2024-07-25T23:12:20.566105", + "exception": false, + "start_time": "2024-07-25T23:12:20.558605", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Polars scan_parquet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fc67864", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:12:20.582484Z", + "iopub.status.busy": "2024-07-25T23:12:20.582099Z", + "iopub.status.idle": "2024-07-25T23:12:30.964431Z", + "shell.execute_reply": "2024-07-25T23:12:30.963406Z" + }, + "papermill": { + "duration": 10.393542, + "end_time": "2024-07-25T23:12:30.967063", + "exception": false, + "start_time": "2024-07-25T23:12:20.573521", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tool = 'polars[lazy]'\n", + "fn = IO_FUNCS[tool]\n", + "timings = %timeit -o q13(fn(customer), fn(orders)).collect()\n", + "results[tool] = timings.all_runs" + ] + }, + { + "cell_type": "markdown", + "id": "03aa381e", + "metadata": { + "papermill": { + "duration": 0.007808, + "end_time": "2024-07-25T23:12:30.982613", + "exception": false, + "start_time": "2024-07-25T23:12:30.974805", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Save" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "38ab0d35", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-25T23:12:30.999827Z", + "iopub.status.busy": "2024-07-25T23:12:30.999435Z", + "iopub.status.idle": "2024-07-25T23:12:31.005087Z", + "shell.execute_reply": "2024-07-25T23:12:31.004131Z" + }, + "papermill": { + "duration": 0.017013, + "end_time": "2024-07-25T23:12:31.007464", + "exception": false, + "start_time": "2024-07-25T23:12:30.990451", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import json\n", + "with open('results.json', 'w') as fd:\n", + " json.dump(results, fd)\n" + ] + } + ], + "metadata": { + "kaggle": { + "accelerator": "none", + "dataSources": [ + { + "sourceId": 167796934, + "sourceType": "kernelVersion" + } + ], + "dockerImageVersionId": 30673, + "isGpuEnabled": false, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "papermill": { + "default_parameters": {}, + "duration": 164.526043, + "end_time": "2024-07-25T23:12:31.536428", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2024-07-25T23:09:47.010385", + "version": "2.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tpch/notebooks/q22/execute.ipynb b/tpch/notebooks/q22/execute.ipynb new file mode 100644 index 000000000..4680b7275 --- /dev/null +++ b/tpch/notebooks/q22/execute.ipynb @@ -0,0 +1,475 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "10f5b1a7", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:49:41.077701Z", + "iopub.status.busy": "2024-07-26T12:49:41.077271Z", + "iopub.status.idle": "2024-07-26T12:50:30.928716Z", + "shell.execute_reply": "2024-07-26T12:50:30.927384Z" + }, + "papermill": { + "duration": 49.860489, + "end_time": "2024-07-26T12:50:30.931456", + "exception": false, + "start_time": "2024-07-26T12:49:41.070967", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d8c5bcf8", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:50:30.948344Z", + "iopub.status.busy": "2024-07-26T12:50:30.947945Z", + "iopub.status.idle": "2024-07-26T12:50:31.499684Z", + "shell.execute_reply": "2024-07-26T12:50:31.498329Z" + }, + "papermill": { + "duration": 0.563589, + "end_time": "2024-07-26T12:50:31.502451", + "exception": false, + "start_time": "2024-07-26T12:50:30.938862", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import polars as pl\n", + "\n", + "pd.options.mode.copy_on_write = True\n", + "pd.options.future.infer_string = True" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2ddc9a54", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:50:31.519027Z", + "iopub.status.busy": "2024-07-26T12:50:31.518502Z", + "iopub.status.idle": "2024-07-26T12:50:31.538355Z", + "shell.execute_reply": "2024-07-26T12:50:31.537196Z" + }, + "papermill": { + "duration": 0.031352, + "end_time": "2024-07-26T12:50:31.541220", + "exception": false, + "start_time": "2024-07-26T12:50:31.509868", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from typing import Any\n", + "import narwhals as nw\n", + "\n", + "def q22(\n", + " customer_ds_raw: Any,\n", + " orders_ds_raw: Any,\n", + ") -> Any:\n", + "\n", + " customer_ds = nw.from_native(customer_ds_raw)\n", + " orders_ds = nw.from_native(orders_ds_raw)\n", + " \n", + " \n", + " query1 = (\n", + " customer_ds.with_columns(nw.col(\"c_phone\").str.slice(0, 2).alias(\"cntrycode\"))\n", + " .filter(nw.col(\"cntrycode\").str.contains(\"13|31|23|29|30|18|17\"))\n", + " .select(\"c_acctbal\", \"c_custkey\", \"cntrycode\")\n", + " )\n", + "\n", + " query2 = query1.filter(nw.col(\"c_acctbal\") > 0.0).select(\n", + " nw.col(\"c_acctbal\").mean().alias(\"avg_acctbal\")\n", + " )\n", + "\n", + " query3 = orders_ds.select(nw.col(\"o_custkey\").unique()).with_columns(\n", + " nw.col(\"o_custkey\").alias(\"c_custkey\")\n", + " )\n", + "\n", + " final_query = (\n", + " query1.join(query3, left_on=\"c_custkey\", right_on=\"c_custkey\", how=\"left\")\n", + " .filter(nw.col(\"o_custkey\").is_null())\n", + " .join(query2, how=\"cross\")\n", + " .filter(nw.col(\"c_acctbal\") > nw.col(\"avg_acctbal\"))\n", + " .group_by(\"cntrycode\")\n", + " .agg(\n", + " nw.col(\"c_acctbal\").count().alias(\"numcust\"),\n", + " nw.col(\"c_acctbal\").sum().round(2).alias(\"totacctbal\"),\n", + " )\n", + " .sort(\"cntrycode\")\n", + " )\n", + "\n", + " return nw.to_native(final_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0968498d", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:50:31.557563Z", + "iopub.status.busy": "2024-07-26T12:50:31.557170Z", + "iopub.status.idle": "2024-07-26T12:50:31.562493Z", + "shell.execute_reply": "2024-07-26T12:50:31.561227Z" + }, + "papermill": { + "duration": 0.016247, + "end_time": "2024-07-26T12:50:31.564892", + "exception": false, + "start_time": "2024-07-26T12:50:31.548645", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n", + "customer = dir_ + 'customer.parquet'\n", + "orders = dir_ + 'orders.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b0b4c398", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:50:31.581652Z", + "iopub.status.busy": "2024-07-26T12:50:31.580724Z", + "iopub.status.idle": "2024-07-26T12:50:31.587156Z", + "shell.execute_reply": "2024-07-26T12:50:31.585916Z" + }, + "papermill": { + "duration": 0.017416, + "end_time": "2024-07-26T12:50:31.589507", + "exception": false, + "start_time": "2024-07-26T12:50:31.572091", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "IO_FUNCS = {\n", + " 'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n", + " 'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n", + " 'polars[eager]': lambda x: pl.read_parquet(x),\n", + " 'polars[lazy]': lambda x: pl.scan_parquet(x),\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f2243418", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:50:31.605577Z", + "iopub.status.busy": "2024-07-26T12:50:31.605224Z", + "iopub.status.idle": "2024-07-26T12:50:31.610000Z", + "shell.execute_reply": "2024-07-26T12:50:31.608852Z" + }, + "papermill": { + "duration": 0.015402, + "end_time": "2024-07-26T12:50:31.612243", + "exception": false, + "start_time": "2024-07-26T12:50:31.596841", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "results = {}" + ] + }, + { + "cell_type": "markdown", + "id": "5f63c333", + "metadata": { + "papermill": { + "duration": 0.006963, + "end_time": "2024-07-26T12:50:31.626579", + "exception": false, + "start_time": "2024-07-26T12:50:31.619616", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## pandas via Narwhals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e07d9351", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:50:31.643326Z", + "iopub.status.busy": "2024-07-26T12:50:31.642928Z", + "iopub.status.idle": "2024-07-26T12:50:42.190030Z", + "shell.execute_reply": "2024-07-26T12:50:42.188641Z" + }, + "papermill": { + "duration": 10.559142, + "end_time": "2024-07-26T12:50:42.192870", + "exception": false, + "start_time": "2024-07-26T12:50:31.633728", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tool = 'pandas'\n", + "fn = IO_FUNCS[tool]\n", + "timings = %timeit -o q22(fn(customer), fn(orders))\n", + "results[tool] = timings.all_runs" + ] + }, + { + "cell_type": "markdown", + "id": "7758c217", + "metadata": { + "papermill": { + "duration": 0.008478, + "end_time": "2024-07-26T12:50:42.209200", + "exception": false, + "start_time": "2024-07-26T12:50:42.200722", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## pandas, pyarrow dtypes, via Narwhals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3e98843", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:50:42.228915Z", + "iopub.status.busy": "2024-07-26T12:50:42.227947Z", + "iopub.status.idle": "2024-07-26T12:50:49.289566Z", + "shell.execute_reply": "2024-07-26T12:50:49.287955Z" + }, + "papermill": { + "duration": 7.07373, + "end_time": "2024-07-26T12:50:49.292349", + "exception": false, + "start_time": "2024-07-26T12:50:42.218619", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tool = 'pandas[pyarrow]'\n", + "fn = IO_FUNCS[tool]\n", + "timings = %timeit -o q22(fn(customer), fn(orders))\n", + "results[tool] = timings.all_runs" + ] + }, + { + "cell_type": "markdown", + "id": "0228c0c3", + "metadata": { + "papermill": { + "duration": 0.00828, + "end_time": "2024-07-26T12:50:49.309227", + "exception": false, + "start_time": "2024-07-26T12:50:49.300947", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Polars read_parquet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c7514c9", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:50:49.328263Z", + "iopub.status.busy": "2024-07-26T12:50:49.327894Z", + "iopub.status.idle": "2024-07-26T12:50:55.518036Z", + "shell.execute_reply": "2024-07-26T12:50:55.516611Z" + }, + "papermill": { + "duration": 6.202911, + "end_time": "2024-07-26T12:50:55.520546", + "exception": false, + "start_time": "2024-07-26T12:50:49.317635", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tool = 'polars[eager]'\n", + "fn = IO_FUNCS[tool]\n", + "timings = %timeit -o q22(fn(customer), fn(orders))\n", + "results[tool] = timings.all_runs" + ] + }, + { + "cell_type": "markdown", + "id": "f2b87124", + "metadata": { + "papermill": { + "duration": 0.008436, + "end_time": "2024-07-26T12:50:55.537904", + "exception": false, + "start_time": "2024-07-26T12:50:55.529468", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Polars scan_parquet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "444bd29c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:50:55.556976Z", + "iopub.status.busy": "2024-07-26T12:50:55.556249Z", + "iopub.status.idle": "2024-07-26T12:51:08.135016Z", + "shell.execute_reply": "2024-07-26T12:51:08.133598Z" + }, + "papermill": { + "duration": 12.591074, + "end_time": "2024-07-26T12:51:08.137474", + "exception": false, + "start_time": "2024-07-26T12:50:55.546400", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tool = 'polars[lazy]'\n", + "fn = IO_FUNCS[tool]\n", + "timings = %timeit -o q22(fn(customer), fn(orders)).collect()\n", + "results[tool] = timings.all_runs" + ] + }, + { + "cell_type": "markdown", + "id": "24023413", + "metadata": { + "papermill": { + "duration": 0.008232, + "end_time": "2024-07-26T12:51:08.154223", + "exception": false, + "start_time": "2024-07-26T12:51:08.145991", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Save" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "74585c82", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-26T12:51:08.172684Z", + "iopub.status.busy": "2024-07-26T12:51:08.172310Z", + "iopub.status.idle": "2024-07-26T12:51:08.177951Z", + "shell.execute_reply": "2024-07-26T12:51:08.176919Z" + }, + "papermill": { + "duration": 0.017523, + "end_time": "2024-07-26T12:51:08.180173", + "exception": false, + "start_time": "2024-07-26T12:51:08.162650", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import json\n", + "with open('results.json', 'w') as fd:\n", + " json.dump(results, fd)\n" + ] + } + ], + "metadata": { + "kaggle": { + "accelerator": "none", + "dataSources": [ + { + "sourceId": 167796934, + "sourceType": "kernelVersion" + } + ], + "dockerImageVersionId": 30673, + "isGpuEnabled": false, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "papermill": { + "default_parameters": {}, + "duration": 90.628471, + "end_time": "2024-07-26T12:51:08.710737", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2024-07-26T12:49:38.082266", + "version": "2.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tpch/notebooks/q22/kernel-metadata.json b/tpch/notebooks/q22/kernel-metadata.json new file mode 100644 index 000000000..7626c20d6 --- /dev/null +++ b/tpch/notebooks/q22/kernel-metadata.json @@ -0,0 +1,15 @@ +{ + "id": "marcogorelli/narwhals-tpch-q22-s2", + "title": "Narwhals TPCH Q22 S2", + "code_file": "execute.ipynb", + "language": "python", + "kernel_type": "notebook", + "is_private": "false", + "enable_gpu": "false", + "enable_tpu": "false", + "enable_internet": "true", + "dataset_sources": [], + "competition_sources": [], + "kernel_sources": ["marcogorelli/tpc-h-data-parquet-s-2"], + "model_sources": [] +} \ No newline at end of file