diff --git a/404.html b/404.html index dcfbcd2..d9e530a 100644 --- a/404.html +++ b/404.html @@ -451,7 +451,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 diff --git a/examples/example.ddb b/examples/example.ddb index 2f4aa64..ceeba0d 100644 Binary files a/examples/example.ddb and b/examples/example.ddb differ diff --git a/examples/fashion-brand-co2e.ipynb b/examples/fashion-brand-co2e.ipynb index 3dfa2cf..355cbd7 100644 --- a/examples/fashion-brand-co2e.ipynb +++ b/examples/fashion-brand-co2e.ipynb @@ -21,10 +21,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:15.405313Z", - "iopub.status.busy": "2024-09-25T08:40:15.405215Z", - "iopub.status.idle": "2024-09-25T08:40:15.649547Z", - "shell.execute_reply": "2024-09-25T08:40:15.649239Z" + "iopub.execute_input": "2024-09-25T18:17:07.663533Z", + "iopub.status.busy": "2024-09-25T18:17:07.662999Z", + "iopub.status.idle": "2024-09-25T18:17:07.946187Z", + "shell.execute_reply": "2024-09-25T18:17:07.945926Z" } }, "outputs": [ @@ -33,63 +33,63 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 yearcategoryproduct_idfootprintunitsyearcategoryproduct_idfootprintunits
905122022TSHIRTcea264427.62kgCO2e1,486796222022PANTS0c7938bf13.38kgCO2e105
460752022JACKETd17ec41538.43kgCO2e2,254235752021PANTS7693f75b36.50kgCO2e41
518492022PANTSd5531c9b41.55kgCO2e81134172023PANTSc5c5414026.89kgCO2e288
128182021PANTS335f31e313.53kgCO2e4677912022PANTSaed08558106.76kgCO2e301
648702022PANTSe5562fe829.16kgCO2e576490452022PANTSa1cf7d5c35.67kgCO2e925
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 1, @@ -123,10 +123,10 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:15.666848Z", - "iopub.status.busy": "2024-09-25T08:40:15.666609Z", - "iopub.status.idle": "2024-09-25T08:40:15.688363Z", - "shell.execute_reply": "2024-09-25T08:40:15.688091Z" + "iopub.execute_input": "2024-09-25T18:17:07.963649Z", + "iopub.status.busy": "2024-09-25T18:17:07.963431Z", + "iopub.status.idle": "2024-09-25T18:17:07.983272Z", + "shell.execute_reply": "2024-09-25T18:17:07.983004Z" } }, "outputs": [ @@ -135,12 +135,12 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -150,25 +150,25 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 averagediffaveragediff
year
202121.95kgCO2e202121.95kgCO2e
202221.71kgCO2e-0.24kgCO2e202221.71kgCO2e-0.24kgCO2e
202322.74kgCO2e1.03kgCO2e202322.74kgCO2e1.03kgCO2e
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -208,10 +208,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:15.689716Z", - "iopub.status.busy": "2024-09-25T08:40:15.689591Z", - "iopub.status.idle": "2024-09-25T08:40:16.035496Z", - "shell.execute_reply": "2024-09-25T08:40:16.035164Z" + "iopub.execute_input": "2024-09-25T18:17:07.984631Z", + "iopub.status.busy": "2024-09-25T18:17:07.984530Z", + "iopub.status.idle": "2024-09-25T18:17:08.330730Z", + "shell.execute_reply": "2024-09-25T18:17:08.330457Z" } }, "outputs": [ @@ -220,13 +220,13 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -237,72 +237,72 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", "
  innermixinnermix
year
2022DRESS0.05kgCO2e-0.14kgCO2e2022DRESS0.05kgCO2e-0.14kgCO2e
JACKET-0.17kgCO2e-0.69kgCO2eJACKET-0.17kgCO2e-0.69kgCO2e
PANTS0.61kgCO2e0.20kgCO2ePANTS0.61kgCO2e0.20kgCO2e
SHIRT-0.02kgCO2e0.00kgCO2eSHIRT-0.02kgCO2e0.00kgCO2e
SWEATER-0.39kgCO2e-0.09kgCO2eSWEATER-0.39kgCO2e-0.09kgCO2e
TSHIRT0.08kgCO2e0.30kgCO2eTSHIRT0.08kgCO2e0.30kgCO2e
2023DRESS-0.08kgCO2e0.51kgCO2e2023DRESS-0.08kgCO2e0.51kgCO2e
JACKET-0.13kgCO2e0.97kgCO2eJACKET-0.13kgCO2e0.97kgCO2e
PANTS-0.22kgCO2e-0.09kgCO2ePANTS-0.22kgCO2e-0.09kgCO2e
SHIRT0.02kgCO2e-0.03kgCO2eSHIRT0.02kgCO2e-0.03kgCO2e
SWEATER-0.06kgCO2e0.36kgCO2eSWEATER-0.06kgCO2e0.36kgCO2e
TSHIRT-0.16kgCO2e-0.06kgCO2eTSHIRT-0.16kgCO2e-0.06kgCO2e
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -343,10 +343,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:16.037078Z", - "iopub.status.busy": "2024-09-25T08:40:16.036965Z", - "iopub.status.idle": "2024-09-25T08:40:16.268245Z", - "shell.execute_reply": "2024-09-25T08:40:16.267977Z" + "iopub.execute_input": "2024-09-25T18:17:08.332085Z", + "iopub.status.busy": "2024-09-25T18:17:08.331986Z", + "iopub.status.idle": "2024-09-25T18:17:08.541018Z", + "shell.execute_reply": "2024-09-25T18:17:08.540737Z" } }, "outputs": [ @@ -355,23 +355,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "" ], "text/plain": [ diff --git a/examples/fashion-brand-co2e/index.html b/examples/fashion-brand-co2e/index.html index 2cb9b8c..a115db0 100644 --- a/examples/fashion-brand-co2e/index.html +++ b/examples/fashion-brand-co2e/index.html @@ -477,7 +477,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 @@ -545,57 +545,57 @@

Fashion brand CO2e emissions 👟 - +
- - - - - + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - + + + + + +
 yearcategoryproduct_idfootprintunitsyearcategoryproduct_idfootprintunits
905122022TSHIRTcea264427.62kgCO2e1,486796222022PANTS0c7938bf13.38kgCO2e105
460752022JACKETd17ec41538.43kgCO2e2,254235752021PANTS7693f75b36.50kgCO2e41
518492022PANTSd5531c9b41.55kgCO2e81134172023PANTSc5c5414026.89kgCO2e288
128182021PANTS335f31e313.53kgCO2e4677912022PANTSaed08558106.76kgCO2e301
648702022PANTSe5562fe829.16kgCO2e576490452022PANTSa1cf7d5c35.67kgCO2e925
@@ -613,12 +613,12 @@

Fashion brand CO2e emissions 👟 - +
- - + + @@ -628,19 +628,19 @@

Fashion brand CO2e emissions 👟

- - - + + + - - - + + + - - - + + +
 averagediffaveragediff
year
202121.95kgCO2e202121.95kgCO2e
202221.71kgCO2e-0.24kgCO2e202221.71kgCO2e-0.24kgCO2e
202322.74kgCO2e1.03kgCO2e202322.74kgCO2e1.03kgCO2e
@@ -664,13 +664,13 @@

Fashion brand CO2e emissions 👟 - +
- - + + @@ -681,66 +681,66 @@

Fashion brand CO2e emissions 👟

- - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + +
   innermixinnermix
year
2022DRESS0.05kgCO2e-0.14kgCO2e2022DRESS0.05kgCO2e-0.14kgCO2e
JACKET-0.17kgCO2e-0.69kgCO2eJACKET-0.17kgCO2e-0.69kgCO2e
PANTS0.61kgCO2e0.20kgCO2ePANTS0.61kgCO2e0.20kgCO2e
SHIRT-0.02kgCO2e0.00kgCO2eSHIRT-0.02kgCO2e0.00kgCO2e
SWEATER-0.39kgCO2e-0.09kgCO2eSWEATER-0.39kgCO2e-0.09kgCO2e
TSHIRT0.08kgCO2e0.30kgCO2eTSHIRT0.08kgCO2e0.30kgCO2e
2023DRESS-0.08kgCO2e0.51kgCO2e2023DRESS-0.08kgCO2e0.51kgCO2e
JACKET-0.13kgCO2e0.97kgCO2eJACKET-0.13kgCO2e0.97kgCO2e
PANTS-0.22kgCO2e-0.09kgCO2ePANTS-0.22kgCO2e-0.09kgCO2e
SHIRT0.02kgCO2e-0.03kgCO2eSHIRT0.02kgCO2e-0.03kgCO2e
SWEATER-0.06kgCO2e0.36kgCO2eSWEATER-0.06kgCO2e0.36kgCO2e
TSHIRT-0.16kgCO2e-0.06kgCO2eTSHIRT-0.16kgCO2e-0.06kgCO2e
@@ -754,23 +754,23 @@

Fashion brand CO2e emissions 👟
explainer.plot(products)
 
-
+

This is better than reporting the average footprint and unit produced separately. It's more informative to quantify their contribution to the change in emissions. Here it's good to confirm that the decrease in emissions is mostly due to a reduction in the number of units produced for both years. But it's also good to see that there was an increase due to the average footprint in 2023. Importantly, each one of these effects is calculated, and not just assumed.

diff --git a/examples/ibis.ipynb b/examples/ibis.ipynb index 0c56da7..89b1a87 100644 --- a/examples/ibis.ipynb +++ b/examples/ibis.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Different backend support with Ibis 🐦" + "# Different backend support with Ibis 🪽" ] }, { @@ -19,10 +19,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:17.697850Z", - "iopub.status.busy": "2024-09-25T08:40:17.697492Z", - "iopub.status.idle": "2024-09-25T08:40:18.283974Z", - "shell.execute_reply": "2024-09-25T08:40:18.283643Z" + "iopub.execute_input": "2024-09-25T18:17:09.953693Z", + "iopub.status.busy": "2024-09-25T18:17:09.953258Z", + "iopub.status.idle": "2024-09-25T18:17:10.426438Z", + "shell.execute_reply": "2024-09-25T18:17:10.426171Z" } }, "outputs": [ @@ -67,10 +67,10 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:18.285677Z", - "iopub.status.busy": "2024-09-25T08:40:18.285576Z", - "iopub.status.idle": "2024-09-25T08:40:18.302537Z", - "shell.execute_reply": "2024-09-25T08:40:18.302247Z" + "iopub.execute_input": "2024-09-25T18:17:10.427787Z", + "iopub.status.busy": "2024-09-25T18:17:10.427672Z", + "iopub.status.idle": "2024-09-25T18:17:10.442879Z", + "shell.execute_reply": "2024-09-25T18:17:10.442634Z" } }, "outputs": [ @@ -95,10 +95,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:18.303900Z", - "iopub.status.busy": "2024-09-25T08:40:18.303802Z", - "iopub.status.idle": "2024-09-25T08:40:18.320391Z", - "shell.execute_reply": "2024-09-25T08:40:18.320127Z" + "iopub.execute_input": "2024-09-25T18:17:10.444029Z", + "iopub.status.busy": "2024-09-25T18:17:10.443927Z", + "iopub.status.idle": "2024-09-25T18:17:10.459534Z", + "shell.execute_reply": "2024-09-25T18:17:10.459262Z" } }, "outputs": [ @@ -148,10 +148,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:18.321693Z", - "iopub.status.busy": "2024-09-25T08:40:18.321596Z", - "iopub.status.idle": "2024-09-25T08:40:18.436592Z", - "shell.execute_reply": "2024-09-25T08:40:18.436269Z" + "iopub.execute_input": "2024-09-25T18:17:10.460956Z", + "iopub.status.busy": "2024-09-25T18:17:10.460848Z", + "iopub.status.idle": "2024-09-25T18:17:10.570482Z", + "shell.execute_reply": "2024-09-25T18:17:10.570200Z" } }, "outputs": [ @@ -218,10 +218,10 @@ "execution_count": 5, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:18.438032Z", - "iopub.status.busy": "2024-09-25T08:40:18.437923Z", - "iopub.status.idle": "2024-09-25T08:40:18.447657Z", - "shell.execute_reply": "2024-09-25T08:40:18.447396Z" + "iopub.execute_input": "2024-09-25T18:17:10.571729Z", + "iopub.status.busy": "2024-09-25T18:17:10.571626Z", + "iopub.status.idle": "2024-09-25T18:17:10.580820Z", + "shell.execute_reply": "2024-09-25T18:17:10.580616Z" } }, "outputs": [ @@ -245,143 +245,118 @@ "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:18.448884Z", - "iopub.status.busy": "2024-09-25T08:40:18.448793Z", - "iopub.status.idle": "2024-09-25T08:40:18.477238Z", - "shell.execute_reply": "2024-09-25T08:40:18.476975Z" + "iopub.execute_input": "2024-09-25T18:17:10.582057Z", + "iopub.status.busy": "2024-09-25T18:17:10.581973Z", + "iopub.status.idle": "2024-09-25T18:17:10.632387Z", + "shell.execute_reply": "2024-09-25T18:17:10.632131Z" } }, "outputs": [ { "data": { "text/html": [ - "
\n", - "\n", - "\n", + "
\n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - "
yearcategoryinnermix
 yearcategoryinnermix
02022DRESS3.931932e+06-1.881370e+0702022DRESS3931932.447552-18813695.617552
12022JACKET-1.510008e+07-9.238617e+0712022JACKET-15100081.026506-92386166.203493
22022PANTS4.002506e+075.295190e+0722022PANTS40025058.51525152951900.074743
32022SHIRT-1.484809e+06-5.791456e+0632022SHIRT-1484809.008950-5791455.871050
42022SWEATER-2.676209e+071.181504e+0742022SWEATER-26762091.19671511815043.806715
52022TSHIRT6.650940e+06-2.311836e+0752022TSHIRT6650940.137541-23118359.127541
62023DRESS-4.078094e+06-1.240339e+0762023DRESS-4078094.357618-12403387.412381
72023JACKET-6.793317e+06-4.924036e+0772023JACKET-6793316.568952-49240364.221049
82023PANTS-1.636299e+07-2.295608e+0882023PANTS-16362993.150075-229560842.809919
92023SHIRT8.920908e+05-4.019144e+0692023SHIRT892090.812023-4019143.772023
102023SWEATER-5.701391e+06-1.130507e+08102023SWEATER-5701391.450394-113050731.029606
112023TSHIRT-1.150391e+07-8.391323e+07112023TSHIRT-11503906.840478-83913226.849522
\n", - "
" + "\n" ], "text/plain": [ - " year category inner mix\n", - "0 2022 DRESS 3.931932e+06 -1.881370e+07\n", - "1 2022 JACKET -1.510008e+07 -9.238617e+07\n", - "2 2022 PANTS 4.002506e+07 5.295190e+07\n", - "3 2022 SHIRT -1.484809e+06 -5.791456e+06\n", - "4 2022 SWEATER -2.676209e+07 1.181504e+07\n", - "5 2022 TSHIRT 6.650940e+06 -2.311836e+07\n", - "6 2023 DRESS -4.078094e+06 -1.240339e+07\n", - "7 2023 JACKET -6.793317e+06 -4.924036e+07\n", - "8 2023 PANTS -1.636299e+07 -2.295608e+08\n", - "9 2023 SHIRT 8.920908e+05 -4.019144e+06\n", - "10 2023 SWEATER -5.701391e+06 -1.130507e+08\n", - "11 2023 TSHIRT -1.150391e+07 -8.391323e+07" + "" ] }, "execution_count": 6, @@ -390,7 +365,7 @@ } ], "source": [ - "explanation.execute()" + "explanation.execute().style.format()" ] }, { @@ -398,10 +373,10 @@ "execution_count": 7, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:18.478434Z", - "iopub.status.busy": "2024-09-25T08:40:18.478337Z", - "iopub.status.idle": "2024-09-25T08:40:18.510327Z", - "shell.execute_reply": "2024-09-25T08:40:18.510055Z" + "iopub.execute_input": "2024-09-25T18:17:10.633866Z", + "iopub.status.busy": "2024-09-25T18:17:10.633689Z", + "iopub.status.idle": "2024-09-25T18:17:10.666507Z", + "shell.execute_reply": "2024-09-25T18:17:10.666257Z" } }, "outputs": [ diff --git a/examples/ibis/index.html b/examples/ibis/index.html index 74b58f2..775a4a9 100644 --- a/examples/ibis/index.html +++ b/examples/ibis/index.html @@ -18,7 +18,7 @@ - Different backend support with Ibis 🐦 - icanexplain + Different backend support with Ibis 🪽 - icanexplain @@ -102,7 +102,7 @@
- Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽
@@ -474,7 +474,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 @@ -528,7 +528,7 @@ -

Different backend support with Ibis 🐦

+

Different backend support with Ibis 🪽

icanexplain is implemented with Ibis. This means that it is framework agnostic, and can work with different backends. This example shows how to use it with DuckDB.

import ibis
 import icanexplain as ice
@@ -601,120 +601,107 @@ 

Different backend support with Ibis

ibis.expr.types.relations.Table
 
-
explanation.execute()
+
explanation.execute().style.format()
 
-
- - +
- - - - - - + + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + +
yearcategoryinnermix
 yearcategoryinnermix
02022DRESS3.931932e+06-1.881370e+0702022DRESS3931932.447552-18813695.617552
12022JACKET-1.510008e+07-9.238617e+0712022JACKET-15100081.026506-92386166.203493
22022PANTS4.002506e+075.295190e+0722022PANTS40025058.51525152951900.074743
32022SHIRT-1.484809e+06-5.791456e+0632022SHIRT-1484809.008950-5791455.871050
42022SWEATER-2.676209e+071.181504e+0742022SWEATER-26762091.19671511815043.806715
52022TSHIRT6.650940e+06-2.311836e+0752022TSHIRT6650940.137541-23118359.127541
62023DRESS-4.078094e+06-1.240339e+0762023DRESS-4078094.357618-12403387.412381
72023JACKET-6.793317e+06-4.924036e+0772023JACKET-6793316.568952-49240364.221049
82023PANTS-1.636299e+07-2.295608e+0882023PANTS-16362993.150075-229560842.809919
92023SHIRT8.920908e+05-4.019144e+0692023SHIRT892090.812023-4019143.772023
102023SWEATER-5.701391e+06-1.130507e+08102023SWEATER-5701391.450394-113050731.029606
112023TSHIRT-1.150391e+07-8.391323e+07112023TSHIRT-11503906.840478-83913226.849522
-
ibis.to_sql(explanation)
 
diff --git a/examples/iowa-whiskey-sales.ipynb b/examples/iowa-whiskey-sales.ipynb index 8cb4ff2..2aa40a6 100644 --- a/examples/iowa-whiskey-sales.ipynb +++ b/examples/iowa-whiskey-sales.ipynb @@ -19,10 +19,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:20.192056Z", - "iopub.status.busy": "2024-09-25T08:40:20.191922Z", - "iopub.status.idle": "2024-09-25T08:40:20.404476Z", - "shell.execute_reply": "2024-09-25T08:40:20.404182Z" + "iopub.execute_input": "2024-09-25T18:17:12.047320Z", + "iopub.status.busy": "2024-09-25T18:17:12.046926Z", + "iopub.status.idle": "2024-09-25T18:17:12.246438Z", + "shell.execute_reply": "2024-09-25T18:17:12.246181Z" } }, "outputs": [ @@ -31,81 +31,81 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 datecategoryvendorsales_amountprice_per_bottlebottles_soldbottle_volume_mlyeardatecategoryvendorsales_amountprice_per_bottlebottles_soldbottle_volume_mlyear
02012-06-04CANADIAN WHISKIESCONSTELLATION WINE COMPANY, INC.94.02000015.67000061750201202012-06-04CANADIAN WHISKIESCONSTELLATION WINE COMPANY, INC.94.02000015.670000617502012
12016-01-05STRAIGHT BOURBON WHISKIESCAMPARI(SKYY)18.7600009.3800002375201612016-01-05STRAIGHT BOURBON WHISKIESCAMPARI(SKYY)18.7600009.38000023752016
22016-05-25CANADIAN WHISKIESDIAGEO AMERICAS11.03000011.0300001300201622016-05-25CANADIAN WHISKIESDIAGEO AMERICAS11.03000011.03000013002016
32016-01-20CANADIAN WHISKIESPHILLIPS BEVERAGE COMPANY33.84000011.2800003750201632016-01-20CANADIAN WHISKIESPHILLIPS BEVERAGE COMPANY33.84000011.28000037502016
42012-03-19CANADIAN WHISKIESCONSTELLATION WINE COMPANY, INC.94.02000015.67000061750201242012-03-19CANADIAN WHISKIESCONSTELLATION WINE COMPANY, INC.94.02000015.670000617502012
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 1, @@ -132,10 +132,10 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:20.406107Z", - "iopub.status.busy": "2024-09-25T08:40:20.405990Z", - "iopub.status.idle": "2024-09-25T08:40:20.420649Z", - "shell.execute_reply": "2024-09-25T08:40:20.420373Z" + "iopub.execute_input": "2024-09-25T18:17:12.247872Z", + "iopub.status.busy": "2024-09-25T18:17:12.247766Z", + "iopub.status.idle": "2024-09-25T18:17:12.267108Z", + "shell.execute_reply": "2024-09-25T18:17:12.266828Z" } }, "outputs": [ @@ -144,12 +144,12 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -159,25 +159,25 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 sales_amountdiffsales_amountdiff
year
2012$1,842,098.862012$1,842,098.86
2016$2,298,505.88$456,407.022016$2,298,505.88$456,407.02
2020$3,378,164.43$1,079,658.552020$3,378,164.43$1,079,658.55
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -216,10 +216,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:20.422041Z", - "iopub.status.busy": "2024-09-25T08:40:20.421955Z", - "iopub.status.idle": "2024-09-25T08:40:20.610846Z", - "shell.execute_reply": "2024-09-25T08:40:20.610586Z" + "iopub.execute_input": "2024-09-25T18:17:12.268369Z", + "iopub.status.busy": "2024-09-25T18:17:12.268266Z", + "iopub.status.idle": "2024-09-25T18:17:12.458516Z", + "shell.execute_reply": "2024-09-25T18:17:12.458245Z" } }, "outputs": [ @@ -227,17 +227,17 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -248,92 +248,92 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", "
  innermixinnermix
year
2016BLENDED WHISKIES$17,854.43$7,356.772016BLENDED WHISKIES$17,854.43$7,356.77
CANADIAN WHISKIES$0$225,902.66CANADIAN WHISKIES$0$225,902.66
CORN WHISKIES$0$4,113.90CORN WHISKIES$0$4,113.90
IRISH WHISKIES$22,144.48$75,122.83IRISH WHISKIES$22,144.48$75,122.83
SCOTCH WHISKIES$19,591.97$0SCOTCH WHISKIES$19,591.97$0
SINGLE BARREL BOURBON WHISKIES$1,852.03$6,375.43SINGLE BARREL BOURBON WHISKIES$1,852.03$6,375.43
STRAIGHT BOURBON WHISKIES$107,144.93$97,934.50STRAIGHT BOURBON WHISKIES$107,144.93$97,934.50
STRAIGHT RYE WHISKIES$0$0STRAIGHT RYE WHISKIES$0$0
2020BLENDED WHISKIES$83,342.60$59,768.582020BLENDED WHISKIES$83,342.60$59,768.58
CANADIAN WHISKIES$224,022.62$149,363.35CANADIAN WHISKIES$224,022.62$149,363.35
CORN WHISKIES$1,517.48$1,453.26CORN WHISKIES$1,517.48$1,453.26
IRISH WHISKIES$0$67,344.41IRISH WHISKIES$0$67,344.41
SCOTCH WHISKIES$19,840.48$0SCOTCH WHISKIES$19,840.48$0
SINGLE BARREL BOURBON WHISKIES$11,958.32$3,819.27SINGLE BARREL BOURBON WHISKIES$11,958.32$3,819.27
STRAIGHT BOURBON WHISKIES$167,864.46$268,064.74STRAIGHT BOURBON WHISKIES$167,864.46$268,064.74
STRAIGHT RYE WHISKIES$0$64,056.43STRAIGHT RYE WHISKIES$0$64,056.43
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -378,10 +378,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:20.615379Z", - "iopub.status.busy": "2024-09-25T08:40:20.615204Z", - "iopub.status.idle": "2024-09-25T08:40:20.901358Z", - "shell.execute_reply": "2024-09-25T08:40:20.901095Z" + "iopub.execute_input": "2024-09-25T18:17:12.459975Z", + "iopub.status.busy": "2024-09-25T18:17:12.459870Z", + "iopub.status.idle": "2024-09-25T18:17:12.644977Z", + "shell.execute_reply": "2024-09-25T18:17:12.644671Z" } }, "outputs": [ @@ -390,23 +390,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "" ], "text/plain": [ diff --git a/examples/iowa-whiskey-sales/index.html b/examples/iowa-whiskey-sales/index.html index 54d75fc..cd198a8 100644 --- a/examples/iowa-whiskey-sales/index.html +++ b/examples/iowa-whiskey-sales/index.html @@ -477,7 +477,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 @@ -539,75 +539,75 @@

Iowa whiskey sales 🥃 - +
- - - - - - - - + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + +
 datecategoryvendorsales_amountprice_per_bottlebottles_soldbottle_volume_mlyeardatecategoryvendorsales_amountprice_per_bottlebottles_soldbottle_volume_mlyear
02012-06-04CANADIAN WHISKIESCONSTELLATION WINE COMPANY, INC.94.02000015.67000061750201202012-06-04CANADIAN WHISKIESCONSTELLATION WINE COMPANY, INC.94.02000015.670000617502012
12016-01-05STRAIGHT BOURBON WHISKIESCAMPARI(SKYY)18.7600009.3800002375201612016-01-05STRAIGHT BOURBON WHISKIESCAMPARI(SKYY)18.7600009.38000023752016
22016-05-25CANADIAN WHISKIESDIAGEO AMERICAS11.03000011.0300001300201622016-05-25CANADIAN WHISKIESDIAGEO AMERICAS11.03000011.03000013002016
32016-01-20CANADIAN WHISKIESPHILLIPS BEVERAGE COMPANY33.84000011.2800003750201632016-01-20CANADIAN WHISKIESPHILLIPS BEVERAGE COMPANY33.84000011.28000037502016
42012-03-19CANADIAN WHISKIESCONSTELLATION WINE COMPANY, INC.94.02000015.67000061750201242012-03-19CANADIAN WHISKIESCONSTELLATION WINE COMPANY, INC.94.02000015.670000617502012
@@ -629,12 +629,12 @@

Iowa whiskey sales 🥃 - +
- - + + @@ -644,19 +644,19 @@

Iowa whiskey sales 🥃2012 -

- + + + - - - + + + - - - + + +
 sales_amountdiffsales_amountdiff
year$1,842,098.862012$1,842,098.86
2016$2,298,505.88$456,407.022016$2,298,505.88$456,407.02
2020$3,378,164.43$1,079,658.552020$3,378,164.43$1,079,658.55
@@ -681,17 +681,17 @@

Iowa whiskey sales 🥃)

- +
- - + + @@ -702,86 +702,86 @@

Iowa whiskey sales 🥃2016 -

- - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + +
   innermixinnermix
yearBLENDED WHISKIES$17,854.43$7,356.772016BLENDED WHISKIES$17,854.43$7,356.77
CANADIAN WHISKIES$0$225,902.66CANADIAN WHISKIES$0$225,902.66
CORN WHISKIES$0$4,113.90CORN WHISKIES$0$4,113.90
IRISH WHISKIES$22,144.48$75,122.83IRISH WHISKIES$22,144.48$75,122.83
SCOTCH WHISKIES$19,591.97$0SCOTCH WHISKIES$19,591.97$0
SINGLE BARREL BOURBON WHISKIES$1,852.03$6,375.43SINGLE BARREL BOURBON WHISKIES$1,852.03$6,375.43
STRAIGHT BOURBON WHISKIES$107,144.93$97,934.50STRAIGHT BOURBON WHISKIES$107,144.93$97,934.50
STRAIGHT RYE WHISKIES$0$0STRAIGHT RYE WHISKIES$0$0
2020BLENDED WHISKIES$83,342.60$59,768.582020BLENDED WHISKIES$83,342.60$59,768.58
CANADIAN WHISKIES$224,022.62$149,363.35CANADIAN WHISKIES$224,022.62$149,363.35
CORN WHISKIES$1,517.48$1,453.26CORN WHISKIES$1,517.48$1,453.26
IRISH WHISKIES$0$67,344.41IRISH WHISKIES$0$67,344.41
SCOTCH WHISKIES$19,840.48$0SCOTCH WHISKIES$19,840.48$0
SINGLE BARREL BOURBON WHISKIES$11,958.32$3,819.27SINGLE BARREL BOURBON WHISKIES$11,958.32$3,819.27
STRAIGHT BOURBON WHISKIES$167,864.46$268,064.74STRAIGHT BOURBON WHISKIES$167,864.46$268,064.74
STRAIGHT RYE WHISKIES$0$64,056.43STRAIGHT RYE WHISKIES$0$64,056.43
@@ -792,23 +792,23 @@

Iowa whiskey sales 🥃
explainer.plot(sales)
 
-
+
diff --git a/examples/simple-revenue-funnel.ipynb b/examples/simple-revenue-funnel.ipynb index 9239fdd..3c5d281 100644 --- a/examples/simple-revenue-funnel.ipynb +++ b/examples/simple-revenue-funnel.ipynb @@ -19,10 +19,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:22.306476Z", - "iopub.status.busy": "2024-09-25T08:40:22.305631Z", - "iopub.status.idle": "2024-09-25T08:40:22.376497Z", - "shell.execute_reply": "2024-09-25T08:40:22.376158Z" + "iopub.execute_input": "2024-09-25T18:17:14.026252Z", + "iopub.status.busy": "2024-09-25T18:17:14.025915Z", + "iopub.status.idle": "2024-09-25T18:17:14.104707Z", + "shell.execute_reply": "2024-09-25T18:17:14.104426Z" } }, "outputs": [ @@ -31,132 +31,132 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 dategroupimpressionsclicksconversionsrevenuedategroupimpressionsclicksconversionsrevenue
02018-01-01A1000150120$8,600.0002018-01-01A1000150120$8,600.00
12018-01-01B2000150150$9,400.0012018-01-01B2000150150$9,400.00
22018-01-01C2500250125$10,750.0022018-01-01C2500250125$10,750.00
32019-01-01A1000120160$9,055.0032019-01-01A1000120160$9,055.00
42019-01-01B2150200145$8,739.0042019-01-01B2150200145$8,739.00
52019-01-01C2000400166$10,147.0052019-01-01C2000400166$10,147.00
62018-02-01A502010$500.0062018-02-01A502010$500.00
72018-02-01B2000300150$11,400.0072018-02-01B2000300150$11,400.00
82018-02-01C2500250125$8,750.0082018-02-01C2500250125$8,750.00
92019-02-01A25001000500$50,000.0092019-02-01A25001000500$50,000.00
102019-02-01B2150323145$10,739.00102019-02-01B2150323145$10,739.00
112019-02-01C2000320166$12,147.00112019-02-01C2000320166$12,147.00
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 1, @@ -199,10 +199,10 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:22.378509Z", - "iopub.status.busy": "2024-09-25T08:40:22.378303Z", - "iopub.status.idle": "2024-09-25T08:40:22.398464Z", - "shell.execute_reply": "2024-09-25T08:40:22.398209Z" + "iopub.execute_input": "2024-09-25T18:17:14.106265Z", + "iopub.status.busy": "2024-09-25T18:17:14.106153Z", + "iopub.status.idle": "2024-09-25T18:17:14.124196Z", + "shell.execute_reply": "2024-09-25T18:17:14.123930Z" } }, "outputs": [ @@ -211,15 +211,15 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -232,26 +232,26 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 impressionsclick_rateconversion_rateaverage_spendrevenueimpressionsclick_rateconversion_rateaverage_spendrevenue
year
2018100500.1114430.607143$72.65$49,400.002018100500.1114430.607143$72.65$49,400.00
2019118000.2002540.542531$78.65$100,827.002019118000.2002540.542531$78.65$100,827.00
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -306,10 +306,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:22.399934Z", - "iopub.status.busy": "2024-09-25T08:40:22.399828Z", - "iopub.status.idle": "2024-09-25T08:40:22.728790Z", - "shell.execute_reply": "2024-09-25T08:40:22.728507Z" + "iopub.execute_input": "2024-09-25T18:17:14.125577Z", + "iopub.status.busy": "2024-09-25T18:17:14.125466Z", + "iopub.status.idle": "2024-09-25T18:17:14.430522Z", + "shell.execute_reply": "2024-09-25T18:17:14.430007Z" } }, "outputs": [ @@ -317,20 +317,20 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -344,55 +344,55 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
   impressions_contributionclicks_over_impressions_contributionconversions_over_clicks_contributionrevenue_over_conversions_contributionimpressions_contributionclicks_over_impressions_contributionconversions_over_clicks_contributionrevenue_over_conversions_contribution
year
20191A$0.00-$1,720.00$4,586.67-$2,411.6720191A$0.00-$1,720.00$4,586.67-$2,411.67
B$705.00$2,428.33-$3,446.67-$347.67B$705.00$2,428.33-$3,446.67-$347.67
C-$2,150.00$8,600.00-$2,924.00-$4,129.00C-$2,150.00$8,600.00-$2,924.00-$4,129.00
2A$24,500.00$0.00$0.00$25,000.002A$24,500.00$0.00$0.00$25,000.00
B$855.00$19.00-$1,254.00-$281.00B$855.00$19.00-$1,254.00-$281.00
C-$1,750.00$4,200.00$420.00$527.00C-$1,750.00$4,200.00$420.00$527.00
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -435,10 +435,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:22.730483Z", - "iopub.status.busy": "2024-09-25T08:40:22.730381Z", - "iopub.status.idle": "2024-09-25T08:40:22.742096Z", - "shell.execute_reply": "2024-09-25T08:40:22.741750Z" + "iopub.execute_input": "2024-09-25T18:17:14.432095Z", + "iopub.status.busy": "2024-09-25T18:17:14.431996Z", + "iopub.status.idle": "2024-09-25T18:17:14.443043Z", + "shell.execute_reply": "2024-09-25T18:17:14.442721Z" } }, "outputs": [ @@ -447,11 +447,11 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -460,14 +460,14 @@ " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "
 sumsum
year
2019$51,427.002019$51,427.00
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 4, diff --git a/examples/simple-revenue-funnel/index.html b/examples/simple-revenue-funnel/index.html index dbce3e0..fa6ba85 100644 --- a/examples/simple-revenue-funnel/index.html +++ b/examples/simple-revenue-funnel/index.html @@ -477,7 +477,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 @@ -553,126 +553,126 @@

Simple revenue funnel 🛒 - +
- - - - - - + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + +
 dategroupimpressionsclicksconversionsrevenuedategroupimpressionsclicksconversionsrevenue
02018-01-01A1000150120$8,600.0002018-01-01A1000150120$8,600.00
12018-01-01B2000150150$9,400.0012018-01-01B2000150150$9,400.00
22018-01-01C2500250125$10,750.0022018-01-01C2500250125$10,750.00
32019-01-01A1000120160$9,055.0032019-01-01A1000120160$9,055.00
42019-01-01B2150200145$8,739.0042019-01-01B2150200145$8,739.00
52019-01-01C2000400166$10,147.0052019-01-01C2000400166$10,147.00
62018-02-01A502010$500.0062018-02-01A502010$500.00
72018-02-01B2000300150$11,400.0072018-02-01B2000300150$11,400.00
82018-02-01C2500250125$8,750.0082018-02-01C2500250125$8,750.00
92019-02-01A25001000500$50,000.0092019-02-01A25001000500$50,000.00
102019-02-01B2150323145$10,739.00102019-02-01B2150323145$10,739.00
112019-02-01C2000320166$12,147.00112019-02-01C2000320166$12,147.00
@@ -714,15 +714,15 @@

Simple revenue funnel 🛒 - +
- - - - - + + + + + @@ -735,20 +735,20 @@

Simple revenue funnel 🛒

- - - - - - + + + + + + - - - - - - + + + + + +
 impressionsclick_rateconversion_rateaverage_spendrevenueimpressionsclick_rateconversion_rateaverage_spendrevenue
year
2018100500.1114430.607143$72.65$49,400.002018100500.1114430.607143$72.65$49,400.00
2019118000.2002540.542531$78.65$100,827.002019118000.2002540.542531$78.65$100,827.00
@@ -769,20 +769,20 @@

Simple revenue funnel 🛒explanation.style.format(fmt_currency).set_properties(**{'text-align': 'right'}) - +
- - - - + + + + @@ -796,49 +796,49 @@

Simple revenue funnel 🛒

- - - - - - - + + + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - - + + + + + + - - - - - + + + + + - - - - - + + + + +
     impressions_contributionclicks_over_impressions_contributionconversions_over_clicks_contributionrevenue_over_conversions_contributionimpressions_contributionclicks_over_impressions_contributionconversions_over_clicks_contributionrevenue_over_conversions_contribution
year
20191A$0.00-$1,720.00$4,586.67-$2,411.6720191A$0.00-$1,720.00$4,586.67-$2,411.67
B$705.00$2,428.33-$3,446.67-$347.67B$705.00$2,428.33-$3,446.67-$347.67
C-$2,150.00$8,600.00-$2,924.00-$4,129.00C-$2,150.00$8,600.00-$2,924.00-$4,129.00
2A$24,500.00$0.00$0.00$25,000.002A$24,500.00$0.00$0.00$25,000.00
B$855.00$19.00-$1,254.00-$281.00B$855.00$19.00-$1,254.00-$281.00
C-$1,750.00$4,200.00$420.00$527.00C-$1,750.00$4,200.00$420.00$527.00
@@ -854,11 +854,11 @@

Simple revenue funnel 🛒 - +
- + @@ -867,8 +867,8 @@

Simple revenue funnel 🛒

- - + +
 sumsum
year
2019$51,427.002019$51,427.00
diff --git a/index.html b/index.html index cf9dc70..e024c4c 100644 --- a/index.html +++ b/index.html @@ -475,7 +475,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 diff --git a/installation/index.html b/installation/index.html index 5e97837..f9b0dcc 100644 --- a/installation/index.html +++ b/installation/index.html @@ -477,7 +477,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 diff --git a/methods/funnel.ipynb b/methods/funnel.ipynb index bea7516..26c85fb 100644 --- a/methods/funnel.ipynb +++ b/methods/funnel.ipynb @@ -12,10 +12,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2024-09-25T08:40:25.341828Z", - "iopub.status.busy": "2024-09-25T08:40:25.341418Z", - "iopub.status.idle": "2024-09-25T08:40:25.360667Z", - "shell.execute_reply": "2024-09-25T08:40:25.360217Z" + "iopub.execute_input": "2024-09-25T18:17:17.060195Z", + "iopub.status.busy": "2024-09-25T18:17:17.057969Z", + "iopub.status.idle": "2024-09-25T18:17:17.082563Z", + "shell.execute_reply": "2024-09-25T18:17:17.082056Z" } }, "outputs": [], diff --git a/methods/funnel/index.html b/methods/funnel/index.html index fed215b..e5d928b 100644 --- a/methods/funnel/index.html +++ b/methods/funnel/index.html @@ -456,7 +456,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 diff --git a/methods/ratio/index.html b/methods/ratio/index.html index 87819f9..57379a2 100644 --- a/methods/ratio/index.html +++ b/methods/ratio/index.html @@ -456,7 +456,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 diff --git a/methods/total/index.html b/methods/total/index.html index 7d7453b..ee67b02 100644 --- a/methods/total/index.html +++ b/methods/total/index.html @@ -456,7 +456,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽 diff --git a/search/search_index.json b/search/search_index.json index 899cdb5..f5fd77b 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Welcome","text":"

Well met, fellow data analyst!

If you're like me, then you're used to pesky stakeholders, who ask you why a metric changed. These kind of questions are tricky to answer confidently. It usually ends with you sharing a few other related metrics, giving some context, and providing a weak explanation. All the while hoping the stakeholder will be satisfied (or fed up) and go away \ud83d\ude2e\u200d\ud83d\udca8

This isn't a good situation to be in. But what if you could tell exactly why a metric changed? Wouldn't that be great? \ud83e\udd29

icanexplain is a Python package. It provides a framework to break a metric down into drivers. It attributes the change in a metric to its drivers. Instead of just measuring the evolution of each driver, we can exactly quantify how much of the metric's evolution is due to each driver.

The best way to understand how icanexplain works is to see it in action, by checking out the examples.

icanexplain works with pandas and Polars out of the box. Additionally, it can run against other backends (e.g. SQL) because it is implemented with Ibis. Check out this example for more information.

"},{"location":"installation/","title":"Installation","text":"
pip install icanexplain\n
"},{"location":"examples/fashion-brand-co2e/","title":"Fashion brand CO2e emissions \ud83d\udc5f","text":"

Fashion brands increasingly have to be aware and report on their environmental footprint.

The following dataset comes from a real fashion brand, and has been anomymized. Each row represents a product manufactured in a given year.

import icanexplain as ice\n\ndef fmt_CO2e(kg):\n    if abs(kg) < 1e3:\n        return f'{kg:,.2f}kgCO2e'\n    return f'{kg / 1e6:,.1f}ktCO2e'\n\nproducts = ice.datasets.load_product_footprints()\nproducts.sample(5).style.format({'footprint': fmt_CO2e, 'units': '{:,d}'})\n
year category product_id footprint units 90512 2022 TSHIRT cea26442 7.62kgCO2e 1,486 46075 2022 JACKET d17ec415 38.43kgCO2e 2,254 51849 2022 PANTS d5531c9b 41.55kgCO2e 8 12818 2021 PANTS 335f31e3 13.53kgCO2e 4 64870 2022 PANTS e5562fe8 29.16kgCO2e 576

The footprint column indicates the product's carbon footprint in kgCO2e. The units column corresponds to the number of units produced.

Companies usually report their emissions on a yearly basis. We can do this by multiplying the footprint of each product, with the number of units produced, and summing the results.

(\n    products\n    .groupby('year')\n    .apply(lambda g: (g['footprint'] * g['units']).sum() / g['units'].sum(), include_groups=False)\n    .to_frame('average')\n    .assign(diff=lambda x: x.average.diff())\n    .style.format(fmt_CO2e, na_rep='')\n)\n
average diff year 2021 21.95kgCO2e 2022 21.71kgCO2e -0.24kgCO2e 2023 22.74kgCO2e 1.03kgCO2e

The average footprint went down between 2021 and 2022. It then went back up in 2023. Of course, we want to understand why. When they see this, fashion brands have one word coming out of their mouth: why, why, why?

The overall average footprint can change for two reasons:

  1. The average footprint per product category evolved.
  2. The mix of product categories evolved.

The second reason is called the mix effect. For instance, let's say t-shirts have a lower footprint than jackets. If the share of jackets produced in 2023 is higher than in 2022, the average footprint will go up.

The jackets in 2023 aren't necessarily the same than those of 2022. They could be more sustainable, and have a lower footprint. This is the tricky part: we need to disentangle the mix effect from the evolution of the footprint of each product category. That is the value proposition of this package.

explainer = ice.MeanExplainer(\n    fact='footprint',\n    count='units',\n    period='year',\n    group='category',\n)\nexplanation = explainer(products)\nexplanation.style.format({'inner': fmt_CO2e, 'mix': fmt_CO2e}, na_rep='')\n
inner mix year category 2022 DRESS 0.05kgCO2e -0.14kgCO2e JACKET -0.17kgCO2e -0.69kgCO2e PANTS 0.61kgCO2e 0.20kgCO2e SHIRT -0.02kgCO2e 0.00kgCO2e SWEATER -0.39kgCO2e -0.09kgCO2e TSHIRT 0.08kgCO2e 0.30kgCO2e 2023 DRESS -0.08kgCO2e 0.51kgCO2e JACKET -0.13kgCO2e 0.97kgCO2e PANTS -0.22kgCO2e -0.09kgCO2e SHIRT 0.02kgCO2e -0.03kgCO2e SWEATER -0.06kgCO2e 0.36kgCO2e TSHIRT -0.16kgCO2e -0.06kgCO2e

Here's the meaning of each column:

  • inner is the difference due to the change in the average footprint per unit. A negative inner values means the footprint per unit shifted in a way that reduced emissions. For instance, low emission products seem to have been prioritized in 2022 (-17.5ktCO2e), but not in 2023 (+73.4ktCO2e).
  • mix is the difference due to the change in the number of units produced. A negative mix value means the number of units produced shifted in a way that reduced emissions.

A convenient way to read these values is to use a waterfall chart.

explainer.plot(products)\n

This is better than reporting the average footprint and unit produced separately. It's more informative to quantify their contribution to the change in emissions. Here it's good to confirm that the decrease in emissions is mostly due to a reduction in the number of units produced for both years. But it's also good to see that there was an increase due to the average footprint in 2023. Importantly, each one of these effects is calculated, and not just assumed.

It's natural to want to deepen the analysis. For instance:

  1. Why is there a significant inner contribution for pants in 2022? Is it because the materials are less sustainable? Or because the pants got heavier?
  2. The reduction in 2023 is mainly due to the reduction in the number of units produced. Can this be broken down into marketing segments? For instance, is the reduction mainly driven by online or in-person sales? How does this break down by country?

These questions hint at the interactive aspect of this kind of analysis. Once you break down a metric's evolution along a dimension, the next steps are to break down the metric (question 1) and/or include another dimension (question 2).

"},{"location":"examples/ibis/","title":"Different backend support with Ibis \ud83d\udc26","text":"

icanexplain is implemented with Ibis. This means that it is framework agnostic, and can work with different backends. This example shows how to use it with DuckDB.

import ibis\nimport icanexplain as ice\n\nproducts_df = ice.datasets.load_product_footprints()\ncon = ibis.connect(\"duckdb://example.ddb\")\ncon.create_table(\n    \"products\", products_df, overwrite=True\n)\n
DatabaseTable: example.main.products\n  year       int64\n  category   string\n  product_id string\n  footprint  float64\n  units      int64\n
con = ibis.connect(\"duckdb://example.ddb\")\ncon.list_tables()\n
['products']\n
ibis.options.interactive = True\nproducts = con.table(\"products\")\nproducts.head()\n
\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 year  \u2503 category \u2503 product_id \u2503 footprint \u2503 units \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 int64 \u2502 string   \u2502 string     \u2502 float64   \u2502 int64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502  2021 \u2502 DRESS    \u2502 848be709   \u2502     96.04 \u2502   803 \u2502\n\u2502  2021 \u2502 DRESS    \u2502 658f92b3   \u2502     58.15 \u2502  3367 \u2502\n\u2502  2021 \u2502 DRESS    \u2502 3a26f323   \u2502     82.94 \u2502   240 \u2502\n\u2502  2021 \u2502 DRESS    \u2502 6221dca6   \u2502     85.94 \u2502   432 \u2502\n\u2502  2021 \u2502 DRESS    \u2502 46864ac5   \u2502     84.99 \u2502   816 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n
explainer = ice.SumExplainer(\n    fact='footprint',\n    count='units',\n    group='category',\n    period='year'\n)\nexplanation = explainer(products)\nexplanation\n
\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 year  \u2503 category \u2503 inner         \u2503 mix           \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 int64 \u2502 string   \u2502 float64       \u2502 float64       \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502  2022 \u2502 DRESS    \u2502  3.931932e+06 \u2502 -1.881370e+07 \u2502\n\u2502  2022 \u2502 JACKET   \u2502 -1.510008e+07 \u2502 -9.238617e+07 \u2502\n\u2502  2022 \u2502 PANTS    \u2502  4.002506e+07 \u2502  5.295190e+07 \u2502\n\u2502  2022 \u2502 SHIRT    \u2502 -1.484809e+06 \u2502 -5.791456e+06 \u2502\n\u2502  2022 \u2502 SWEATER  \u2502 -2.676209e+07 \u2502  1.181504e+07 \u2502\n\u2502  2022 \u2502 TSHIRT   \u2502  6.650940e+06 \u2502 -2.311836e+07 \u2502\n\u2502  2023 \u2502 DRESS    \u2502 -4.078094e+06 \u2502 -1.240339e+07 \u2502\n\u2502  2023 \u2502 JACKET   \u2502 -6.793317e+06 \u2502 -4.924036e+07 \u2502\n\u2502  2023 \u2502 PANTS    \u2502 -1.636299e+07 \u2502 -2.295608e+08 \u2502\n\u2502  2023 \u2502 SHIRT    \u2502  8.920908e+05 \u2502 -4.019144e+06 \u2502\n\u2502     \u2026 \u2502 \u2026        \u2502             \u2026 \u2502             \u2026 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n
type(explanation)\n
ibis.expr.types.relations.Table\n
explanation.execute()\n
year category inner mix 0 2022 DRESS 3.931932e+06 -1.881370e+07 1 2022 JACKET -1.510008e+07 -9.238617e+07 2 2022 PANTS 4.002506e+07 5.295190e+07 3 2022 SHIRT -1.484809e+06 -5.791456e+06 4 2022 SWEATER -2.676209e+07 1.181504e+07 5 2022 TSHIRT 6.650940e+06 -2.311836e+07 6 2023 DRESS -4.078094e+06 -1.240339e+07 7 2023 JACKET -6.793317e+06 -4.924036e+07 8 2023 PANTS -1.636299e+07 -2.295608e+08 9 2023 SHIRT 8.920908e+05 -4.019144e+06 10 2023 SWEATER -5.701391e+06 -1.130507e+08 11 2023 TSHIRT -1.150391e+07 -8.391323e+07
ibis.to_sql(explanation)\n
SELECT\n  *\nFROM (\n  SELECT\n    \"t9\".\"year\",\n    \"t9\".\"category\",\n    \"t9\".\"count_lag\" * (\n      \"t9\".\"mean\" - \"t9\".\"mean_lag\"\n    ) AS \"inner\",\n    (\n      \"t9\".\"count\" - \"t9\".\"count_lag\"\n    ) * \"t9\".\"mean\" AS \"mix\"\n  FROM (\n    SELECT\n      \"t8\".\"category\",\n      \"t8\".\"year\",\n      \"t8\".\"mean\",\n      \"t8\".\"count\",\n      LAG(\"t8\".\"mean\", 1) OVER (PARTITION BY \"t8\".\"category\" ORDER BY \"t8\".\"year\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"mean_lag\",\n      LAG(\"t8\".\"count\", 1) OVER (PARTITION BY \"t8\".\"category\" ORDER BY \"t8\".\"year\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"count_lag\"\n    FROM (\n      SELECT\n        \"t7\".\"category\",\n        \"t7\".\"year\",\n        COALESCE(\"t7\".\"mean\", 0) AS \"mean\",\n        COALESCE(\"t7\".\"count\", 0) AS \"count\"\n      FROM (\n        SELECT\n          \"t3\".\"category\",\n          \"t4\".\"year\",\n          \"t6\".\"mean\",\n          \"t6\".\"count\"\n        FROM (\n          SELECT DISTINCT\n            \"t0\".\"category\"\n          FROM \"products\" AS \"t0\"\n        ) AS \"t3\"\n        CROSS JOIN (\n          SELECT DISTINCT\n            \"t0\".\"year\"\n          FROM \"products\" AS \"t0\"\n        ) AS \"t4\"\n        LEFT OUTER JOIN (\n          SELECT\n            \"t0\".\"category\",\n            \"t0\".\"year\",\n            SUM(\"t0\".\"footprint\" * \"t0\".\"units\") / SUM(\"t0\".\"units\") AS \"mean\",\n            SUM(\"t0\".\"units\") AS \"count\"\n          FROM \"products\" AS \"t0\"\n          GROUP BY\n            1,\n            2\n        ) AS \"t6\"\n          ON \"t3\".\"category\" = \"t6\".\"category\" AND \"t4\".\"year\" = \"t6\".\"year\"\n      ) AS \"t7\"\n    ) AS \"t8\"\n  ) AS \"t9\"\n  ORDER BY\n    \"t9\".\"year\" ASC,\n    \"t9\".\"category\" ASC\n) AS \"t10\"\nWHERE\n  \"t10\".\"year\" IS NOT NULL\n  AND \"t10\".\"category\" IS NOT NULL\n  AND \"t10\".\"inner\" IS NOT NULL\n  AND \"t10\".\"mix\" IS NOT NULL\n
"},{"location":"examples/iowa-whiskey-sales/","title":"Iowa whiskey sales \ud83e\udd43","text":"

Let's look at whiskey sales in Iowa. This is a subset of the data from the Iowa Liquor Sales dataset.

import icanexplain as ice\n\nsales = ice.datasets.load_iowa_whiskey_sales()\nsales.head().style.format()\n
date category vendor sales_amount price_per_bottle bottles_sold bottle_volume_ml year 0 2012-06-04 CANADIAN WHISKIES CONSTELLATION WINE COMPANY, INC. 94.020000 15.670000 6 1750 2012 1 2016-01-05 STRAIGHT BOURBON WHISKIES CAMPARI(SKYY) 18.760000 9.380000 2 375 2016 2 2016-05-25 CANADIAN WHISKIES DIAGEO AMERICAS 11.030000 11.030000 1 300 2016 3 2016-01-20 CANADIAN WHISKIES PHILLIPS BEVERAGE COMPANY 33.840000 11.280000 3 750 2016 4 2012-03-19 CANADIAN WHISKIES CONSTELLATION WINE COMPANY, INC. 94.020000 15.670000 6 1750 2012

The sales_amount column represents the bill a customer payed for a given transaction. We can sum it and group by year to see how the total sales amount evolves over time.

import locale\n\nlocale.setlocale(locale.LC_MONETARY, 'en_US.UTF-8')\ndef fmt_currency(x):\n    return locale.currency(x, grouping=True)\n\n(\n    sales.groupby('year')['sales_amount']\n    .sum()\n    .to_frame()\n    .assign(diff=lambda x: x.diff())\n    .style.format(lambda x: fmt_currency(x) if x > 0 else '')\n)\n
sales_amount diff year 2012 $1,842,098.86 2016 $2,298,505.88 $456,407.02 2020 $3,378,164.43 $1,079,658.55

Ok, but why? Well, we can use icanexplain to break down the evolution into two effects:

  1. The inner effect: how much the average transaction value changed.
  2. The mix effect: how much the number of transations changed.
import icanexplain as ice\n\nexplainer = ice.SumExplainer(\n    fact='sales_amount',\n    period='year',\n    group='category'\n)\nexplanation = explainer(sales)\n(\n    explanation.style\n    .format(lambda x: fmt_currency(x) if x > 0 else '$0')\n    .set_properties(**{'text-align': 'right'})\n)\n
inner mix year category 2016 BLENDED WHISKIES $17,854.43 $7,356.77 CANADIAN WHISKIES $0 $225,902.66 CORN WHISKIES $0 $4,113.90 IRISH WHISKIES $22,144.48 $75,122.83 SCOTCH WHISKIES $19,591.97 $0 SINGLE BARREL BOURBON WHISKIES $1,852.03 $6,375.43 STRAIGHT BOURBON WHISKIES $107,144.93 $97,934.50 STRAIGHT RYE WHISKIES $0 $0 2020 BLENDED WHISKIES $83,342.60 $59,768.58 CANADIAN WHISKIES $224,022.62 $149,363.35 CORN WHISKIES $1,517.48 $1,453.26 IRISH WHISKIES $0 $67,344.41 SCOTCH WHISKIES $19,840.48 $0 SINGLE BARREL BOURBON WHISKIES $11,958.32 $3,819.27 STRAIGHT BOURBON WHISKIES $167,864.46 $268,064.74 STRAIGHT RYE WHISKIES $0 $64,056.43

For instance, we see that the average transation amount for blended whiskies contributed to an $17,854 increase in sales from 2012 to 2016. This is the inner effect. The mix effect for blended whiskies, on the other hand, contributed to a $7,356 increase in sales.

Here's another example: the mix effect of Canadian whiskies is $225,902. This value, the mix effect, represents the increase due to the number of extra sales for Canadian whiskies. The inner effect, on the other hand, is $0. This means that the average transaction value for Canadian whiskies did not change between 2012 and 2016, and therefore didn't contribute to the increase in sales.

A visual way to look interpret the above table is to use a waterfall chart. The idea is that the contributions sum to the difference between two periods. In this case, the difference in sales from 2012 to 2016 is $456,407. The waterfall chart shows how the inner and mix effects contributed to this difference.

explainer.plot(sales)\n
"},{"location":"examples/simple-revenue-funnel/","title":"Simple revenue funnel \ud83d\uded2","text":"

We look at a toy website funnel in this example. Imagine a fictitious website that sells stuff. Users go to the website, are presented with items, can add them to their cart, and then can buy them.

import pandas as pd\nimport locale\n\nlocale.setlocale(locale.LC_MONETARY, 'en_US.UTF-8')\ndef fmt_currency(x):\n    return locale.currency(x, grouping=True)\n\ntraffic = pd.DataFrame({\n    'date': ['2018-01-01', '2018-01-01', '2018-01-01', '2019-01-01', '2019-01-01', '2019-01-01', '2018-02-01', '2018-02-01', '2018-02-01', '2019-02-01', '2019-02-01', '2019-02-01'],\n    'group': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],\n    'impressions': [1000, 2000, 2500, 1000, 2150, 2000, 50, 2000, 2500, 2500, 2150, 2000],\n    'clicks': [150, 150, 250, 120, 200, 400, 20, 300, 250, 1000, 323, 320],\n    'conversions': [120, 150, 125, 160, 145, 166, 10, 150, 125, 500, 145, 166],\n    'revenue': ['$8,600', '$9,400', '$10,750', '$9,055', '$8,739', '$10,147', '$500', '$11,400', '$8,750', '$50,000', '$10,739', '$12,147'],\n})\ntraffic['date'] = pd.to_datetime(traffic['date'])\ntraffic['revenue'] = traffic['revenue'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)\ntraffic.style.format({'revenue': fmt_currency, 'date': lambda x: x.strftime('%Y-%m-%d')}, na_rep='N/A')\n
date group impressions clicks conversions revenue 0 2018-01-01 A 1000 150 120 $8,600.00 1 2018-01-01 B 2000 150 150 $9,400.00 2 2018-01-01 C 2500 250 125 $10,750.00 3 2019-01-01 A 1000 120 160 $9,055.00 4 2019-01-01 B 2150 200 145 $8,739.00 5 2019-01-01 C 2000 400 166 $10,147.00 6 2018-02-01 A 50 20 10 $500.00 7 2018-02-01 B 2000 300 150 $11,400.00 8 2018-02-01 C 2500 250 125 $8,750.00 9 2019-02-01 A 2500 1000 500 $50,000.00 10 2019-02-01 B 2150 323 145 $10,739.00 11 2019-02-01 C 2000 320 166 $12,147.00

The users are bucketed into 3 groups: A, B, C. We've also bucketed impressions/clicks/conversions/revenue figures by month of the year.

We're interested in understanding how the metrics evolve over time. The basic method is to calculate each metric separately. To keep things simple, we can do this for each year.

pd.DataFrame({\n    'impressions': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .impressions.sum()\n    ),\n    'click_rate': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .apply(lambda x: x.clicks.sum() / x.impressions.sum(), include_groups=False)\n    ),\n    'conversion_rate': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .apply(lambda x: x.conversions.sum() / x.clicks.sum(), include_groups=False)\n    ),\n    'average_spend': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .apply(lambda x: x.revenue.sum() / x.conversions.sum(), include_groups=False)\n    ),\n    'revenue': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .revenue.sum()\n    )\n}).style.format({'average_spend': fmt_currency, 'revenue': fmt_currency}, na_rep='')\n
impressions click_rate conversion_rate average_spend revenue year 2018 10050 0.111443 0.607143 $72.65 $49,400.00 2019 11800 0.200254 0.542531 $78.65 $100,827.00

In and of itself, this is already quite interesting. However, what we really want to know is how the change of each metric contributes to the change in revenue. This is where icanexplain comes in.

import icanexplain as ice\n\nexplainer = ice.FunnelExplainer(\n    funnel=['impressions', 'clicks', 'conversions', 'revenue'],\n    period='year',\n    group=['month', 'group']\n)\ntraffic = traffic.assign(\n    month=traffic.date.dt.month,\n    year=traffic.date.dt.year\n)\nexplanation = explainer(traffic)\nexplanation.style.format(fmt_currency).set_properties(**{'text-align': 'right'})\n
impressions_contribution clicks_over_impressions_contribution conversions_over_clicks_contribution revenue_over_conversions_contribution year month group 2019 1 A $0.00 -$1,720.00 $4,586.67 -$2,411.67 B $705.00 $2,428.33 -$3,446.67 -$347.67 C -$2,150.00 $8,600.00 -$2,924.00 -$4,129.00 2 A $24,500.00 $0.00 $0.00 $25,000.00 B $855.00 $19.00 -$1,254.00 -$281.00 C -$1,750.00 $4,200.00 $420.00 $527.00

This is powerful, because it allows us to understand the drivers of revenue growth. For example, between January 2018 and January 2019, revenue went up by $8,600 due an increase in clicks for group C. This is more insightful than just saying that their click rate went up.

One thing to keep in mind is that contributions sum up to the overall difference between two periods. This means that it's easy to unit test that the contributions are correct:

(\n    explanation\n    .groupby('year').sum().sum(axis=1)\n    .to_frame('sum')\n    .style.format(fmt_currency)\n)\n
sum year 2019 $51,427.00

Of course, it would be more interesting to apply this methodology to some real data. One example is the Google Analytics dataset sample which is publicly available in BigQuery.

"},{"location":"methods/funnel/","title":"Funnel decomposition","text":"
# Funnel\n
"},{"location":"methods/ratio/","title":"Ratio decomposition","text":""},{"location":"methods/total/","title":"Total decomposition","text":"\\[ \\sum_{i=1}^n \\frac{1}{2} \\]"},{"location":"theme/","title":"Kilsbergen","text":"

A clean MkDocs theme.

This theme is designed for Tako, Pris, and Noblit. It is not flexible on purpose: it supports everything I need, and nothing more.

"},{"location":"theme/#demos","title":"Demos","text":"
  • Musium documentation
  • Noblit documentation
  • Pris documentation
  • RCL documentation
  • Squiller documentation
  • Tako documentation
"},{"location":"theme/#features","title":"Features","text":"
  • Responsive design
  • Zero javascript
"},{"location":"theme/#usage","title":"Usage","text":"

One easy way to use this theme, is to add it as a Git submodule to your docs directory, e.g. at docs/theme. Then add the following in your mkdocs.yml:

theme:\n  name: null\n  custom_dir: docs/theme\n

This theme requires MkDocs 1.1 or later. For earlier versions, delete this README.md to work around this bug.

To enable anchors next to section headings, add the following to your mkdocs.yml:

markdown_extensions:\n  - toc:\n      permalink: true\n      permalink_title: null\n

To enable syntax highlighting, ensure that pygmentize is available, and add the following to your mkdocs.yml:

markdown_extensions:\n  - codehilite\n

See also the python-markdown list of extensions.

"},{"location":"theme/#license","title":"License","text":"

Kilsbergen is licensed under the Apache 2.0 license. In the generated documentation, it is fine to just link to this readme from a comment.

"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Welcome","text":"

Well met, fellow data analyst!

If you're like me, then you're used to pesky stakeholders, who ask you why a metric changed. These kind of questions are tricky to answer confidently. It usually ends with you sharing a few other related metrics, giving some context, and providing a weak explanation. All the while hoping the stakeholder will be satisfied (or fed up) and go away \ud83d\ude2e\u200d\ud83d\udca8

This isn't a good situation to be in. But what if you could tell exactly why a metric changed? Wouldn't that be great? \ud83e\udd29

icanexplain is a Python package. It provides a framework to break a metric down into drivers. It attributes the change in a metric to its drivers. Instead of just measuring the evolution of each driver, we can exactly quantify how much of the metric's evolution is due to each driver.

The best way to understand how icanexplain works is to see it in action, by checking out the examples.

icanexplain works with pandas and Polars out of the box. Additionally, it can run against other backends (e.g. SQL) because it is implemented with Ibis. Check out this example for more information.

"},{"location":"installation/","title":"Installation","text":"
pip install icanexplain\n
"},{"location":"examples/fashion-brand-co2e/","title":"Fashion brand CO2e emissions \ud83d\udc5f","text":"

Fashion brands increasingly have to be aware and report on their environmental footprint.

The following dataset comes from a real fashion brand, and has been anomymized. Each row represents a product manufactured in a given year.

import icanexplain as ice\n\ndef fmt_CO2e(kg):\n    if abs(kg) < 1e3:\n        return f'{kg:,.2f}kgCO2e'\n    return f'{kg / 1e6:,.1f}ktCO2e'\n\nproducts = ice.datasets.load_product_footprints()\nproducts.sample(5).style.format({'footprint': fmt_CO2e, 'units': '{:,d}'})\n
year category product_id footprint units 79622 2022 PANTS 0c7938bf 13.38kgCO2e 105 23575 2021 PANTS 7693f75b 36.50kgCO2e 41 113417 2023 PANTS c5c54140 26.89kgCO2e 288 67791 2022 PANTS aed08558 106.76kgCO2e 301 49045 2022 PANTS a1cf7d5c 35.67kgCO2e 925

The footprint column indicates the product's carbon footprint in kgCO2e. The units column corresponds to the number of units produced.

Companies usually report their emissions on a yearly basis. We can do this by multiplying the footprint of each product, with the number of units produced, and summing the results.

(\n    products\n    .groupby('year')\n    .apply(lambda g: (g['footprint'] * g['units']).sum() / g['units'].sum(), include_groups=False)\n    .to_frame('average')\n    .assign(diff=lambda x: x.average.diff())\n    .style.format(fmt_CO2e, na_rep='')\n)\n
average diff year 2021 21.95kgCO2e 2022 21.71kgCO2e -0.24kgCO2e 2023 22.74kgCO2e 1.03kgCO2e

The average footprint went down between 2021 and 2022. It then went back up in 2023. Of course, we want to understand why. When they see this, fashion brands have one word coming out of their mouth: why, why, why?

The overall average footprint can change for two reasons:

  1. The average footprint per product category evolved.
  2. The mix of product categories evolved.

The second reason is called the mix effect. For instance, let's say t-shirts have a lower footprint than jackets. If the share of jackets produced in 2023 is higher than in 2022, the average footprint will go up.

The jackets in 2023 aren't necessarily the same than those of 2022. They could be more sustainable, and have a lower footprint. This is the tricky part: we need to disentangle the mix effect from the evolution of the footprint of each product category. That is the value proposition of this package.

explainer = ice.MeanExplainer(\n    fact='footprint',\n    count='units',\n    period='year',\n    group='category',\n)\nexplanation = explainer(products)\nexplanation.style.format({'inner': fmt_CO2e, 'mix': fmt_CO2e}, na_rep='')\n
inner mix year category 2022 DRESS 0.05kgCO2e -0.14kgCO2e JACKET -0.17kgCO2e -0.69kgCO2e PANTS 0.61kgCO2e 0.20kgCO2e SHIRT -0.02kgCO2e 0.00kgCO2e SWEATER -0.39kgCO2e -0.09kgCO2e TSHIRT 0.08kgCO2e 0.30kgCO2e 2023 DRESS -0.08kgCO2e 0.51kgCO2e JACKET -0.13kgCO2e 0.97kgCO2e PANTS -0.22kgCO2e -0.09kgCO2e SHIRT 0.02kgCO2e -0.03kgCO2e SWEATER -0.06kgCO2e 0.36kgCO2e TSHIRT -0.16kgCO2e -0.06kgCO2e

Here's the meaning of each column:

  • inner is the difference due to the change in the average footprint per unit. A negative inner values means the footprint per unit shifted in a way that reduced emissions. For instance, low emission products seem to have been prioritized in 2022 (-17.5ktCO2e), but not in 2023 (+73.4ktCO2e).
  • mix is the difference due to the change in the number of units produced. A negative mix value means the number of units produced shifted in a way that reduced emissions.

A convenient way to read these values is to use a waterfall chart.

explainer.plot(products)\n

This is better than reporting the average footprint and unit produced separately. It's more informative to quantify their contribution to the change in emissions. Here it's good to confirm that the decrease in emissions is mostly due to a reduction in the number of units produced for both years. But it's also good to see that there was an increase due to the average footprint in 2023. Importantly, each one of these effects is calculated, and not just assumed.

It's natural to want to deepen the analysis. For instance:

  1. Why is there a significant inner contribution for pants in 2022? Is it because the materials are less sustainable? Or because the pants got heavier?
  2. The reduction in 2023 is mainly due to the reduction in the number of units produced. Can this be broken down into marketing segments? For instance, is the reduction mainly driven by online or in-person sales? How does this break down by country?

These questions hint at the interactive aspect of this kind of analysis. Once you break down a metric's evolution along a dimension, the next steps are to break down the metric (question 1) and/or include another dimension (question 2).

"},{"location":"examples/ibis/","title":"Different backend support with Ibis \ud83e\udebd","text":"

icanexplain is implemented with Ibis. This means that it is framework agnostic, and can work with different backends. This example shows how to use it with DuckDB.

import ibis\nimport icanexplain as ice\n\nproducts_df = ice.datasets.load_product_footprints()\ncon = ibis.connect(\"duckdb://example.ddb\")\ncon.create_table(\n    \"products\", products_df, overwrite=True\n)\n
DatabaseTable: example.main.products\n  year       int64\n  category   string\n  product_id string\n  footprint  float64\n  units      int64\n
con = ibis.connect(\"duckdb://example.ddb\")\ncon.list_tables()\n
['products']\n
ibis.options.interactive = True\nproducts = con.table(\"products\")\nproducts.head()\n
\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 year  \u2503 category \u2503 product_id \u2503 footprint \u2503 units \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 int64 \u2502 string   \u2502 string     \u2502 float64   \u2502 int64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502  2021 \u2502 DRESS    \u2502 848be709   \u2502     96.04 \u2502   803 \u2502\n\u2502  2021 \u2502 DRESS    \u2502 658f92b3   \u2502     58.15 \u2502  3367 \u2502\n\u2502  2021 \u2502 DRESS    \u2502 3a26f323   \u2502     82.94 \u2502   240 \u2502\n\u2502  2021 \u2502 DRESS    \u2502 6221dca6   \u2502     85.94 \u2502   432 \u2502\n\u2502  2021 \u2502 DRESS    \u2502 46864ac5   \u2502     84.99 \u2502   816 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n
explainer = ice.SumExplainer(\n    fact='footprint',\n    count='units',\n    group='category',\n    period='year'\n)\nexplanation = explainer(products)\nexplanation\n
\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 year  \u2503 category \u2503 inner         \u2503 mix           \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 int64 \u2502 string   \u2502 float64       \u2502 float64       \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502  2022 \u2502 DRESS    \u2502  3.931932e+06 \u2502 -1.881370e+07 \u2502\n\u2502  2022 \u2502 JACKET   \u2502 -1.510008e+07 \u2502 -9.238617e+07 \u2502\n\u2502  2022 \u2502 PANTS    \u2502  4.002506e+07 \u2502  5.295190e+07 \u2502\n\u2502  2022 \u2502 SHIRT    \u2502 -1.484809e+06 \u2502 -5.791456e+06 \u2502\n\u2502  2022 \u2502 SWEATER  \u2502 -2.676209e+07 \u2502  1.181504e+07 \u2502\n\u2502  2022 \u2502 TSHIRT   \u2502  6.650940e+06 \u2502 -2.311836e+07 \u2502\n\u2502  2023 \u2502 DRESS    \u2502 -4.078094e+06 \u2502 -1.240339e+07 \u2502\n\u2502  2023 \u2502 JACKET   \u2502 -6.793317e+06 \u2502 -4.924036e+07 \u2502\n\u2502  2023 \u2502 PANTS    \u2502 -1.636299e+07 \u2502 -2.295608e+08 \u2502\n\u2502  2023 \u2502 SHIRT    \u2502  8.920908e+05 \u2502 -4.019144e+06 \u2502\n\u2502     \u2026 \u2502 \u2026        \u2502             \u2026 \u2502             \u2026 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n
type(explanation)\n
ibis.expr.types.relations.Table\n
explanation.execute().style.format()\n
year category inner mix 0 2022 DRESS 3931932.447552 -18813695.617552 1 2022 JACKET -15100081.026506 -92386166.203493 2 2022 PANTS 40025058.515251 52951900.074743 3 2022 SHIRT -1484809.008950 -5791455.871050 4 2022 SWEATER -26762091.196715 11815043.806715 5 2022 TSHIRT 6650940.137541 -23118359.127541 6 2023 DRESS -4078094.357618 -12403387.412381 7 2023 JACKET -6793316.568952 -49240364.221049 8 2023 PANTS -16362993.150075 -229560842.809919 9 2023 SHIRT 892090.812023 -4019143.772023 10 2023 SWEATER -5701391.450394 -113050731.029606 11 2023 TSHIRT -11503906.840478 -83913226.849522
ibis.to_sql(explanation)\n
SELECT\n  *\nFROM (\n  SELECT\n    \"t9\".\"year\",\n    \"t9\".\"category\",\n    \"t9\".\"count_lag\" * (\n      \"t9\".\"mean\" - \"t9\".\"mean_lag\"\n    ) AS \"inner\",\n    (\n      \"t9\".\"count\" - \"t9\".\"count_lag\"\n    ) * \"t9\".\"mean\" AS \"mix\"\n  FROM (\n    SELECT\n      \"t8\".\"category\",\n      \"t8\".\"year\",\n      \"t8\".\"mean\",\n      \"t8\".\"count\",\n      LAG(\"t8\".\"mean\", 1) OVER (PARTITION BY \"t8\".\"category\" ORDER BY \"t8\".\"year\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"mean_lag\",\n      LAG(\"t8\".\"count\", 1) OVER (PARTITION BY \"t8\".\"category\" ORDER BY \"t8\".\"year\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"count_lag\"\n    FROM (\n      SELECT\n        \"t7\".\"category\",\n        \"t7\".\"year\",\n        COALESCE(\"t7\".\"mean\", 0) AS \"mean\",\n        COALESCE(\"t7\".\"count\", 0) AS \"count\"\n      FROM (\n        SELECT\n          \"t3\".\"category\",\n          \"t4\".\"year\",\n          \"t6\".\"mean\",\n          \"t6\".\"count\"\n        FROM (\n          SELECT DISTINCT\n            \"t0\".\"category\"\n          FROM \"products\" AS \"t0\"\n        ) AS \"t3\"\n        CROSS JOIN (\n          SELECT DISTINCT\n            \"t0\".\"year\"\n          FROM \"products\" AS \"t0\"\n        ) AS \"t4\"\n        LEFT OUTER JOIN (\n          SELECT\n            \"t0\".\"category\",\n            \"t0\".\"year\",\n            SUM(\"t0\".\"footprint\" * \"t0\".\"units\") / SUM(\"t0\".\"units\") AS \"mean\",\n            SUM(\"t0\".\"units\") AS \"count\"\n          FROM \"products\" AS \"t0\"\n          GROUP BY\n            1,\n            2\n        ) AS \"t6\"\n          ON \"t3\".\"category\" = \"t6\".\"category\" AND \"t4\".\"year\" = \"t6\".\"year\"\n      ) AS \"t7\"\n    ) AS \"t8\"\n  ) AS \"t9\"\n  ORDER BY\n    \"t9\".\"year\" ASC,\n    \"t9\".\"category\" ASC\n) AS \"t10\"\nWHERE\n  \"t10\".\"year\" IS NOT NULL\n  AND \"t10\".\"category\" IS NOT NULL\n  AND \"t10\".\"inner\" IS NOT NULL\n  AND \"t10\".\"mix\" IS NOT NULL\n
"},{"location":"examples/iowa-whiskey-sales/","title":"Iowa whiskey sales \ud83e\udd43","text":"

Let's look at whiskey sales in Iowa. This is a subset of the data from the Iowa Liquor Sales dataset.

import icanexplain as ice\n\nsales = ice.datasets.load_iowa_whiskey_sales()\nsales.head().style.format()\n
date category vendor sales_amount price_per_bottle bottles_sold bottle_volume_ml year 0 2012-06-04 CANADIAN WHISKIES CONSTELLATION WINE COMPANY, INC. 94.020000 15.670000 6 1750 2012 1 2016-01-05 STRAIGHT BOURBON WHISKIES CAMPARI(SKYY) 18.760000 9.380000 2 375 2016 2 2016-05-25 CANADIAN WHISKIES DIAGEO AMERICAS 11.030000 11.030000 1 300 2016 3 2016-01-20 CANADIAN WHISKIES PHILLIPS BEVERAGE COMPANY 33.840000 11.280000 3 750 2016 4 2012-03-19 CANADIAN WHISKIES CONSTELLATION WINE COMPANY, INC. 94.020000 15.670000 6 1750 2012

The sales_amount column represents the bill a customer payed for a given transaction. We can sum it and group by year to see how the total sales amount evolves over time.

import locale\n\nlocale.setlocale(locale.LC_MONETARY, 'en_US.UTF-8')\ndef fmt_currency(x):\n    return locale.currency(x, grouping=True)\n\n(\n    sales.groupby('year')['sales_amount']\n    .sum()\n    .to_frame()\n    .assign(diff=lambda x: x.diff())\n    .style.format(lambda x: fmt_currency(x) if x > 0 else '')\n)\n
sales_amount diff year 2012 $1,842,098.86 2016 $2,298,505.88 $456,407.02 2020 $3,378,164.43 $1,079,658.55

Ok, but why? Well, we can use icanexplain to break down the evolution into two effects:

  1. The inner effect: how much the average transaction value changed.
  2. The mix effect: how much the number of transations changed.
import icanexplain as ice\n\nexplainer = ice.SumExplainer(\n    fact='sales_amount',\n    period='year',\n    group='category'\n)\nexplanation = explainer(sales)\n(\n    explanation.style\n    .format(lambda x: fmt_currency(x) if x > 0 else '$0')\n    .set_properties(**{'text-align': 'right'})\n)\n
inner mix year category 2016 BLENDED WHISKIES $17,854.43 $7,356.77 CANADIAN WHISKIES $0 $225,902.66 CORN WHISKIES $0 $4,113.90 IRISH WHISKIES $22,144.48 $75,122.83 SCOTCH WHISKIES $19,591.97 $0 SINGLE BARREL BOURBON WHISKIES $1,852.03 $6,375.43 STRAIGHT BOURBON WHISKIES $107,144.93 $97,934.50 STRAIGHT RYE WHISKIES $0 $0 2020 BLENDED WHISKIES $83,342.60 $59,768.58 CANADIAN WHISKIES $224,022.62 $149,363.35 CORN WHISKIES $1,517.48 $1,453.26 IRISH WHISKIES $0 $67,344.41 SCOTCH WHISKIES $19,840.48 $0 SINGLE BARREL BOURBON WHISKIES $11,958.32 $3,819.27 STRAIGHT BOURBON WHISKIES $167,864.46 $268,064.74 STRAIGHT RYE WHISKIES $0 $64,056.43

For instance, we see that the average transation amount for blended whiskies contributed to an $17,854 increase in sales from 2012 to 2016. This is the inner effect. The mix effect for blended whiskies, on the other hand, contributed to a $7,356 increase in sales.

Here's another example: the mix effect of Canadian whiskies is $225,902. This value, the mix effect, represents the increase due to the number of extra sales for Canadian whiskies. The inner effect, on the other hand, is $0. This means that the average transaction value for Canadian whiskies did not change between 2012 and 2016, and therefore didn't contribute to the increase in sales.

A visual way to look interpret the above table is to use a waterfall chart. The idea is that the contributions sum to the difference between two periods. In this case, the difference in sales from 2012 to 2016 is $456,407. The waterfall chart shows how the inner and mix effects contributed to this difference.

explainer.plot(sales)\n
"},{"location":"examples/simple-revenue-funnel/","title":"Simple revenue funnel \ud83d\uded2","text":"

We look at a toy website funnel in this example. Imagine a fictitious website that sells stuff. Users go to the website, are presented with items, can add them to their cart, and then can buy them.

import pandas as pd\nimport locale\n\nlocale.setlocale(locale.LC_MONETARY, 'en_US.UTF-8')\ndef fmt_currency(x):\n    return locale.currency(x, grouping=True)\n\ntraffic = pd.DataFrame({\n    'date': ['2018-01-01', '2018-01-01', '2018-01-01', '2019-01-01', '2019-01-01', '2019-01-01', '2018-02-01', '2018-02-01', '2018-02-01', '2019-02-01', '2019-02-01', '2019-02-01'],\n    'group': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],\n    'impressions': [1000, 2000, 2500, 1000, 2150, 2000, 50, 2000, 2500, 2500, 2150, 2000],\n    'clicks': [150, 150, 250, 120, 200, 400, 20, 300, 250, 1000, 323, 320],\n    'conversions': [120, 150, 125, 160, 145, 166, 10, 150, 125, 500, 145, 166],\n    'revenue': ['$8,600', '$9,400', '$10,750', '$9,055', '$8,739', '$10,147', '$500', '$11,400', '$8,750', '$50,000', '$10,739', '$12,147'],\n})\ntraffic['date'] = pd.to_datetime(traffic['date'])\ntraffic['revenue'] = traffic['revenue'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)\ntraffic.style.format({'revenue': fmt_currency, 'date': lambda x: x.strftime('%Y-%m-%d')}, na_rep='N/A')\n
date group impressions clicks conversions revenue 0 2018-01-01 A 1000 150 120 $8,600.00 1 2018-01-01 B 2000 150 150 $9,400.00 2 2018-01-01 C 2500 250 125 $10,750.00 3 2019-01-01 A 1000 120 160 $9,055.00 4 2019-01-01 B 2150 200 145 $8,739.00 5 2019-01-01 C 2000 400 166 $10,147.00 6 2018-02-01 A 50 20 10 $500.00 7 2018-02-01 B 2000 300 150 $11,400.00 8 2018-02-01 C 2500 250 125 $8,750.00 9 2019-02-01 A 2500 1000 500 $50,000.00 10 2019-02-01 B 2150 323 145 $10,739.00 11 2019-02-01 C 2000 320 166 $12,147.00

The users are bucketed into 3 groups: A, B, C. We've also bucketed impressions/clicks/conversions/revenue figures by month of the year.

We're interested in understanding how the metrics evolve over time. The basic method is to calculate each metric separately. To keep things simple, we can do this for each year.

pd.DataFrame({\n    'impressions': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .impressions.sum()\n    ),\n    'click_rate': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .apply(lambda x: x.clicks.sum() / x.impressions.sum(), include_groups=False)\n    ),\n    'conversion_rate': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .apply(lambda x: x.conversions.sum() / x.clicks.sum(), include_groups=False)\n    ),\n    'average_spend': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .apply(lambda x: x.revenue.sum() / x.conversions.sum(), include_groups=False)\n    ),\n    'revenue': (\n        traffic\n        .assign(year=traffic.date.dt.year)\n        .groupby('year')\n        .revenue.sum()\n    )\n}).style.format({'average_spend': fmt_currency, 'revenue': fmt_currency}, na_rep='')\n
impressions click_rate conversion_rate average_spend revenue year 2018 10050 0.111443 0.607143 $72.65 $49,400.00 2019 11800 0.200254 0.542531 $78.65 $100,827.00

In and of itself, this is already quite interesting. However, what we really want to know is how the change of each metric contributes to the change in revenue. This is where icanexplain comes in.

import icanexplain as ice\n\nexplainer = ice.FunnelExplainer(\n    funnel=['impressions', 'clicks', 'conversions', 'revenue'],\n    period='year',\n    group=['month', 'group']\n)\ntraffic = traffic.assign(\n    month=traffic.date.dt.month,\n    year=traffic.date.dt.year\n)\nexplanation = explainer(traffic)\nexplanation.style.format(fmt_currency).set_properties(**{'text-align': 'right'})\n
impressions_contribution clicks_over_impressions_contribution conversions_over_clicks_contribution revenue_over_conversions_contribution year month group 2019 1 A $0.00 -$1,720.00 $4,586.67 -$2,411.67 B $705.00 $2,428.33 -$3,446.67 -$347.67 C -$2,150.00 $8,600.00 -$2,924.00 -$4,129.00 2 A $24,500.00 $0.00 $0.00 $25,000.00 B $855.00 $19.00 -$1,254.00 -$281.00 C -$1,750.00 $4,200.00 $420.00 $527.00

This is powerful, because it allows us to understand the drivers of revenue growth. For example, between January 2018 and January 2019, revenue went up by $8,600 due an increase in clicks for group C. This is more insightful than just saying that their click rate went up.

One thing to keep in mind is that contributions sum up to the overall difference between two periods. This means that it's easy to unit test that the contributions are correct:

(\n    explanation\n    .groupby('year').sum().sum(axis=1)\n    .to_frame('sum')\n    .style.format(fmt_currency)\n)\n
sum year 2019 $51,427.00

Of course, it would be more interesting to apply this methodology to some real data. One example is the Google Analytics dataset sample which is publicly available in BigQuery.

"},{"location":"methods/funnel/","title":"Funnel decomposition","text":"
# Funnel\n
"},{"location":"methods/ratio/","title":"Ratio decomposition","text":""},{"location":"methods/total/","title":"Total decomposition","text":"\\[ \\sum_{i=1}^n \\frac{1}{2} \\]"},{"location":"theme/","title":"Kilsbergen","text":"

A clean MkDocs theme.

This theme is designed for Tako, Pris, and Noblit. It is not flexible on purpose: it supports everything I need, and nothing more.

"},{"location":"theme/#demos","title":"Demos","text":"
  • Musium documentation
  • Noblit documentation
  • Pris documentation
  • RCL documentation
  • Squiller documentation
  • Tako documentation
"},{"location":"theme/#features","title":"Features","text":"
  • Responsive design
  • Zero javascript
"},{"location":"theme/#usage","title":"Usage","text":"

One easy way to use this theme, is to add it as a Git submodule to your docs directory, e.g. at docs/theme. Then add the following in your mkdocs.yml:

theme:\n  name: null\n  custom_dir: docs/theme\n

This theme requires MkDocs 1.1 or later. For earlier versions, delete this README.md to work around this bug.

To enable anchors next to section headings, add the following to your mkdocs.yml:

markdown_extensions:\n  - toc:\n      permalink: true\n      permalink_title: null\n

To enable syntax highlighting, ensure that pygmentize is available, and add the following to your mkdocs.yml:

markdown_extensions:\n  - codehilite\n

See also the python-markdown list of extensions.

"},{"location":"theme/#license","title":"License","text":"

Kilsbergen is licensed under the Apache 2.0 license. In the generated documentation, it is fine to just link to this readme from a comment.

"}]} \ No newline at end of file diff --git a/theme/index.html b/theme/index.html index 68586e3..6efade9 100644 --- a/theme/index.html +++ b/theme/index.html @@ -456,7 +456,7 @@ - Different backend support with Ibis 🐦 + Different backend support with Ibis 🪽