forked from narwhals-dev/narwhals
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d1e752f
commit 3af2d14
Showing
2 changed files
with
16 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"metadata":{"kaggle":{"accelerator":"none","dataSources":[{"sourceId":167796934,"sourceType":"kernelVersion"}],"dockerImageVersionId":30673,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"},"papermill":{"default_parameters":{},"duration":458.423327,"end_time":"2024-03-22T17:31:18.077306","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2024-03-22T17:23:39.653979","version":"2.5.0"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals ","metadata":{"papermill":{"duration":33.390992,"end_time":"2024-03-22T17:24:15.601719","exception":false,"start_time":"2024-03-22T17:23:42.210727","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2024-07-05T10:32:59.927716Z","iopub.execute_input":"2024-07-05T10:32:59.928639Z","iopub.status.idle":"2024-07-05T10:33:14.029566Z","shell.execute_reply.started":"2024-07-05T10:32:59.928593Z","shell.execute_reply":"2024-07-05T10:33:14.028082Z"},"trusted":true},"execution_count":13,"outputs":[{"name":"stdout","text":"\u001b[33mWARNING: Skipping apache-beam as it is not installed.\u001b[0m\u001b[33m\n\u001b[0mRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (2.2.2)\nRequirement already satisfied: polars in /opt/conda/lib/python3.10/site-packages (1.0.0)\nRequirement already satisfied: pyarrow in /opt/conda/lib/python3.10/site-packages (16.1.0)\nRequirement already satisfied: narwhals in /opt/conda/lib/python3.10/site-packages (0.9.28)\nRequirement already satisfied: numpy>=1.22.4 in /opt/conda/lib/python3.10/site-packages (from pandas) (1.26.4)\nRequirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas) (2.9.0.post0)\nRequirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas) (2023.3.post1)\nRequirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.10/site-packages (from pandas) (2023.4)\nRequirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n","output_type":"stream"}]},{"cell_type":"code","source":"import pandas as pd\nimport polars as pl\n\npd.options.mode.copy_on_write = True\npd.options.future.infer_string = True","metadata":{"papermill":{"duration":0.907754,"end_time":"2024-03-22T17:24:39.053873","exception":false,"start_time":"2024-03-22T17:24:38.146119","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2024-07-05T10:33:14.032036Z","iopub.execute_input":"2024-07-05T10:33:14.032455Z","iopub.status.idle":"2024-07-05T10:33:14.039074Z","shell.execute_reply.started":"2024-07-05T10:33:14.032409Z","shell.execute_reply":"2024-07-05T10:33:14.037818Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"from typing import Any\nimport narwhals as nw\n\ndef q11(\n partsupp_ds_raw: Any,\n nation_ds_raw: Any,\n supplier_ds_raw: Any,\n) -> Any:\n\n nation_ds = nw.from_native(nation_ds_raw)\n partsupp_ds = nw.from_native(partsupp_ds_raw)\n supplier_ds = nw.from_native(supplier_ds_raw)\n\n \n var1 = \"GERMANY\"\n var2 = 0.0001\n\n q1 = (\n partsupp_ds.join(supplier_ds, left_on=\"ps_suppkey\", right_on=\"s_suppkey\")\n .join(nation_ds, left_on=\"s_nationkey\", right_on=\"n_nationkey\")\n .filter(nw.col(\"n_name\") == var1)\n )\n q2 = q1.select(\n (nw.col(\"ps_supplycost\") * nw.col(\"ps_availqty\")).sum().round(2).alias(\"tmp\")\n * var2\n )\n\n q_final = (\n q1.with_columns(\n (nw.col(\"ps_supplycost\") * nw.col(\"ps_availqty\"))\n .alias(\"value\")\n )\n .group_by(\"ps_partkey\")\n .agg(\n nw.sum(\"value\")\n )\n .join(q2, how=\"cross\")\n .filter(nw.col(\"value\") > nw.col(\"tmp\"))\n .select(\"ps_partkey\", \"value\")\n .sort(\"value\", descending=True)\n )\n\n return nw.to_native(q_final)","metadata":{"papermill":{"duration":0.021725,"end_time":"2024-03-22T17:24:39.080999","exception":false,"start_time":"2024-03-22T17:24:39.059274","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2024-07-05T10:33:14.040616Z","iopub.execute_input":"2024-07-05T10:33:14.040982Z","iopub.status.idle":"2024-07-05T10:33:14.056075Z","shell.execute_reply.started":"2024-07-05T10:33:14.040950Z","shell.execute_reply":"2024-07-05T10:33:14.054409Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\nnation = dir_ + 'nation.parquet'\nsupplier = dir_ + 'supplier.parquet'\npartsupp = dir_ + 'partsupp.parquet'","metadata":{"papermill":{"duration":0.013325,"end_time":"2024-03-22T17:24:39.099766","exception":false,"start_time":"2024-03-22T17:24:39.086441","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2024-07-05T10:33:14.059211Z","iopub.execute_input":"2024-07-05T10:33:14.059609Z","iopub.status.idle":"2024-07-05T10:33:14.068385Z","shell.execute_reply.started":"2024-07-05T10:33:14.059578Z","shell.execute_reply":"2024-07-05T10:33:14.067130Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"code","source":"IO_FUNCS = {\n 'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n 'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n 'polars[eager]': lambda x: pl.read_parquet(x),\n 'polars[lazy]': lambda x: pl.scan_parquet(x),\n}","metadata":{"papermill":{"duration":0.014284,"end_time":"2024-03-22T17:24:39.119737","exception":false,"start_time":"2024-03-22T17:24:39.105453","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2024-07-05T10:33:14.069813Z","iopub.execute_input":"2024-07-05T10:33:14.070174Z","iopub.status.idle":"2024-07-05T10:33:14.085661Z","shell.execute_reply.started":"2024-07-05T10:33:14.070142Z","shell.execute_reply":"2024-07-05T10:33:14.084444Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"results = {}","metadata":{"execution":{"iopub.status.busy":"2024-07-05T10:33:14.086962Z","iopub.execute_input":"2024-07-05T10:33:14.087330Z","iopub.status.idle":"2024-07-05T10:33:14.096595Z","shell.execute_reply.started":"2024-07-05T10:33:14.087298Z","shell.execute_reply":"2024-07-05T10:33:14.095430Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"markdown","source":"## pandas via Narwhals","metadata":{"papermill":{"duration":0.005113,"end_time":"2024-03-22T17:24:39.130472","exception":false,"start_time":"2024-03-22T17:24:39.125359","status":"completed"},"tags":[]}},{"cell_type":"code","source":"tool = 'pandas'\nfn = IO_FUNCS[tool]\ntimings = %timeit -o q11(fn(partsupp), fn(nation), fn(supplier))\nresults[tool] = timings.all_runs","metadata":{"papermill":{"duration":196.786925,"end_time":"2024-03-22T17:27:55.922832","exception":false,"start_time":"2024-03-22T17:24:39.135907","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2024-07-05T10:33:14.098215Z","iopub.execute_input":"2024-07-05T10:33:14.098662Z","iopub.status.idle":"2024-07-05T10:33:27.624855Z","shell.execute_reply.started":"2024-07-05T10:33:14.098622Z","shell.execute_reply":"2024-07-05T10:33:27.623747Z"},"trusted":true},"execution_count":19,"outputs":[{"name":"stdout","text":"1.66 s ± 118 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## pandas, pyarrow dtypes, via Narwhals","metadata":{"papermill":{"duration":0.005184,"end_time":"2024-03-22T17:27:55.933407","exception":false,"start_time":"2024-03-22T17:27:55.928223","status":"completed"},"tags":[]}},{"cell_type":"code","source":"tool = 'pandas[pyarrow]'\nfn = IO_FUNCS[tool]\ntimings = %timeit -o q11(fn(partsupp), fn(nation), fn(supplier))\nresults[tool] = timings.all_runs","metadata":{"papermill":{"duration":158.748353,"end_time":"2024-03-22T17:30:34.688289","exception":false,"start_time":"2024-03-22T17:27:55.939936","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2024-07-05T10:33:27.626162Z","iopub.execute_input":"2024-07-05T10:33:27.626489Z","iopub.status.idle":"2024-07-05T10:33:40.866730Z","shell.execute_reply.started":"2024-07-05T10:33:27.626463Z","shell.execute_reply":"2024-07-05T10:33:40.865296Z"},"trusted":true},"execution_count":20,"outputs":[{"name":"stdout","text":"1.67 s ± 86.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Polars read_parquet","metadata":{"papermill":{"duration":0.005773,"end_time":"2024-03-22T17:30:34.7003","exception":false,"start_time":"2024-03-22T17:30:34.694527","status":"completed"},"tags":[]}},{"cell_type":"code","source":"tool = 'polars[eager]'\nfn = IO_FUNCS[tool]\ntimings = %timeit -o q11(fn(partsupp), fn(nation), fn(supplier))\nresults[tool] = timings.all_runs","metadata":{"papermill":{"duration":37.821116,"end_time":"2024-03-22T17:31:12.527466","exception":false,"start_time":"2024-03-22T17:30:34.70635","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2024-07-05T10:33:40.868234Z","iopub.execute_input":"2024-07-05T10:33:40.868584Z","iopub.status.idle":"2024-07-05T10:33:48.072165Z","shell.execute_reply.started":"2024-07-05T10:33:40.868552Z","shell.execute_reply":"2024-07-05T10:33:48.071018Z"},"trusted":true},"execution_count":21,"outputs":[{"name":"stdout","text":"890 ms ± 13.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Polars scan_parquet","metadata":{"papermill":{"duration":0.005515,"end_time":"2024-03-22T17:31:12.539068","exception":false,"start_time":"2024-03-22T17:31:12.533553","status":"completed"},"tags":[]}},{"cell_type":"code","source":"tool = 'polars[lazy]'\nfn = IO_FUNCS[tool]\ntimings = %timeit -o q11(fn(partsupp), fn(nation), fn(supplier)).collect()\nresults[tool] = timings.all_runs","metadata":{"papermill":{"duration":4.800698,"end_time":"2024-03-22T17:31:17.346813","exception":false,"start_time":"2024-03-22T17:31:12.546115","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2024-07-05T10:33:48.075257Z","iopub.execute_input":"2024-07-05T10:33:48.075613Z","iopub.status.idle":"2024-07-05T10:33:56.980125Z","shell.execute_reply.started":"2024-07-05T10:33:48.075582Z","shell.execute_reply":"2024-07-05T10:33:56.979088Z"},"trusted":true},"execution_count":22,"outputs":[{"name":"stdout","text":"110 ms ± 12.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Save","metadata":{}},{"cell_type":"code","source":"import json\nwith open('results.json', 'w') as fd:\n json.dump(results, fd)\n","metadata":{"execution":{"iopub.status.busy":"2024-07-05T10:33:56.981599Z","iopub.execute_input":"2024-07-05T10:33:56.982033Z","iopub.status.idle":"2024-07-05T10:33:56.988558Z","shell.execute_reply.started":"2024-07-05T10:33:56.981992Z","shell.execute_reply":"2024-07-05T10:33:56.987342Z"},"trusted":true},"execution_count":23,"outputs":[]},{"cell_type":"code","source":"from pprint import pprint\n\npprint(results)","metadata":{"execution":{"iopub.status.busy":"2024-07-05T10:33:56.990445Z","iopub.execute_input":"2024-07-05T10:33:56.990881Z","iopub.status.idle":"2024-07-05T10:33:57.000730Z","shell.execute_reply.started":"2024-07-05T10:33:56.990841Z","shell.execute_reply":"2024-07-05T10:33:56.999458Z"},"trusted":true},"execution_count":24,"outputs":[{"name":"stdout","text":"{'pandas': [1.708278326000027,\n 1.8040552429999934,\n 1.8417796100000032,\n 1.600905629999943,\n 1.6415783779998492,\n 1.5647700059998897,\n 1.493057884000109],\n 'pandas[pyarrow]': [1.6380957989999843,\n 1.5802785819998917,\n 1.5376337459999831,\n 1.7884727590001148,\n 1.7397616020000441,\n 1.7496962650000114,\n 1.6605698180001127],\n 'polars[eager]': [0.9160442119998606,\n 0.8955544509999527,\n 0.8863846530000501,\n 0.8829364579999037,\n 0.8918134509999618,\n 0.8924379529998987,\n 0.8672452630000862],\n 'polars[lazy]': [1.1258213609999075,\n 1.4064464999999018,\n 1.046419743999877,\n 1.0376091739999538,\n 1.043019643999969,\n 1.0296597439999005,\n 1.0383537459999843]}\n","output_type":"stream"}]}]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"id": "uchennaugoh/narwhals-tpch-q11-s2", | ||
"title": "Narwhals TPCH Q11 S2", | ||
"code_file": "execute.ipynb", | ||
"language": "python", | ||
"kernel_type": "notebook", | ||
"is_private": "false", | ||
"enable_gpu": "false", | ||
"enable_tpu": "false", | ||
"enable_internet": "true", | ||
"dataset_sources": [], | ||
"competition_sources": [], | ||
"kernel_sources": ["marcogorelli/tpc-h-data-parquet-s-2"], | ||
"model_sources": [] | ||
} |