From 1f73c9ea25c11a27808bc4ed29eca17474511ec9 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Wed, 19 Jun 2024 10:27:10 +0100
Subject: [PATCH] Add uprating docs

---
 docs/book/uprating.ipynb | 242 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 242 insertions(+)
 create mode 100644 docs/book/uprating.ipynb
diff --git a/docs/book/uprating.ipynb b/docs/book/uprating.ipynb
new file mode 100644
index 00000000..cd849347
--- /dev/null
+++ b/docs/book/uprating.ipynb
@@ -0,0 +1,242 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Uprating\n",
+    "\n",
+    "As part of the data generation process, we uprate the 2015 PUF to match 2021 SOI statistics. The table below shows all variables which are uprated *directly* from SOI aggregates."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "tags": [
+     "hide-input"
+    ]
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>E18500</th>\n",
+       "      <th>E19200</th>\n",
+       "      <th>E26270</th>\n",
+       "      <th>E26270</th>\n",
+       "      <th>E18400</th>\n",
+       "      <th>E18400</th>\n",
+       "      <th>E17500</th>\n",
+       "      <th>E00400</th>\n",
+       "      <th>E00300</th>\n",
+       "      <th>E19800</th>\n",
+       "      <th>E01700</th>\n",
+       "      <th>E00200</th>\n",
+       "      <th>E01500</th>\n",
+       "      <th>E02400</th>\n",
+       "      <th>E00650</th>\n",
+       "      <th>E00600</th>\n",
+       "      <th>E02500</th>\n",
+       "      <th>E01400</th>\n",
+       "      <th>E00900</th>\n",
+       "      <th>E00900</th>\n",
+       "      <th>E01100</th>\n",
+       "      <th>E01000</th>\n",
+       "      <th>E01000</th>\n",
+       "      <th>E02300</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2015 total ($bn)</th>\n",
+       "      <td>188.9</td>\n",
+       "      <td>306.6</td>\n",
+       "      <td>633.1</td>\n",
+       "      <td>633.1</td>\n",
+       "      <td>352.8</td>\n",
+       "      <td>352.8</td>\n",
+       "      <td>132.1</td>\n",
+       "      <td>61.6</td>\n",
+       "      <td>97.8</td>\n",
+       "      <td>163.5</td>\n",
+       "      <td>693.0</td>\n",
+       "      <td>7156.3</td>\n",
+       "      <td>1178.9</td>\n",
+       "      <td>604.8</td>\n",
+       "      <td>204.0</td>\n",
+       "      <td>260.9</td>\n",
+       "      <td>277.0</td>\n",
+       "      <td>251.8</td>\n",
+       "      <td>332.4</td>\n",
+       "      <td>332.4</td>\n",
+       "      <td>11.8</td>\n",
+       "      <td>701.4</td>\n",
+       "      <td>701.4</td>\n",
+       "      <td>26.7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2021 total ($bn)</th>\n",
+       "      <td>100.1</td>\n",
+       "      <td>164.4</td>\n",
+       "      <td>419.9</td>\n",
+       "      <td>419.9</td>\n",
+       "      <td>254.2</td>\n",
+       "      <td>254.2</td>\n",
+       "      <td>100.6</td>\n",
+       "      <td>55.3</td>\n",
+       "      <td>105.7</td>\n",
+       "      <td>194.0</td>\n",
+       "      <td>861.8</td>\n",
+       "      <td>9078.3</td>\n",
+       "      <td>1519.6</td>\n",
+       "      <td>790.7</td>\n",
+       "      <td>297.1</td>\n",
+       "      <td>388.0</td>\n",
+       "      <td>412.2</td>\n",
+       "      <td>406.1</td>\n",
+       "      <td>560.2</td>\n",
+       "      <td>560.2</td>\n",
+       "      <td>24.3</td>\n",
+       "      <td>2051.5</td>\n",
+       "      <td>2051.5</td>\n",
+       "      <td>204.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Growth (%)</th>\n",
+       "      <td>-47.0</td>\n",
+       "      <td>-46.4</td>\n",
+       "      <td>-33.7</td>\n",
+       "      <td>-33.7</td>\n",
+       "      <td>-28.0</td>\n",
+       "      <td>-28.0</td>\n",
+       "      <td>-23.9</td>\n",
+       "      <td>-10.3</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>18.7</td>\n",
+       "      <td>24.4</td>\n",
+       "      <td>26.9</td>\n",
+       "      <td>28.9</td>\n",
+       "      <td>30.7</td>\n",
+       "      <td>45.6</td>\n",
+       "      <td>48.7</td>\n",
+       "      <td>48.8</td>\n",
+       "      <td>61.3</td>\n",
+       "      <td>68.5</td>\n",
+       "      <td>68.5</td>\n",
+       "      <td>106.6</td>\n",
+       "      <td>192.5</td>\n",
+       "      <td>192.5</td>\n",
+       "      <td>667.2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  E18500  E19200  E26270  E26270  E18400  E18400  E17500  \\\n",
+       "2015 total ($bn)   188.9   306.6   633.1   633.1   352.8   352.8   132.1   \n",
+       "2021 total ($bn)   100.1   164.4   419.9   419.9   254.2   254.2   100.6   \n",
+       "Growth (%)         -47.0   -46.4   -33.7   -33.7   -28.0   -28.0   -23.9   \n",
+       "\n",
+       "                  E00400  E00300  E19800  E01700  E00200  E01500  E02400  \\\n",
+       "2015 total ($bn)    61.6    97.8   163.5   693.0  7156.3  1178.9   604.8   \n",
+       "2021 total ($bn)    55.3   105.7   194.0   861.8  9078.3  1519.6   790.7   \n",
+       "Growth (%)         -10.3     8.0    18.7    24.4    26.9    28.9    30.7   \n",
+       "\n",
+       "                  E00650  E00600  E02500  E01400  E00900  E00900  E01100  \\\n",
+       "2015 total ($bn)   204.0   260.9   277.0   251.8   332.4   332.4    11.8   \n",
+       "2021 total ($bn)   297.1   388.0   412.2   406.1   560.2   560.2    24.3   \n",
+       "Growth (%)          45.6    48.7    48.8    61.3    68.5    68.5   106.6   \n",
+       "\n",
+       "                  E01000  E01000  E02300  \n",
+       "2015 total ($bn)   701.4   701.4    26.7  \n",
+       "2021 total ($bn)  2051.5  2051.5   204.6  \n",
+       "Growth (%)         192.5   192.5   667.2  "
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from tax_microdata_benchmarking.storage import STORAGE_FOLDER\n",
+    "from microdf import MicroDataFrame\n",
+    "from tax_microdata_benchmarking.datasets.uprate_puf import (\n",
+    "    SOI_TO_PUF_STRAIGHT_RENAMES,\n",
+    "    SOI_TO_PUF_NEG_ONLY_RENAMES,\n",
+    "    SOI_TO_PUF_POS_ONLY_RENAMES,\n",
+    ")\n",
+    "\n",
+    "# Set maximum number of columns to display in pandas output to infinite\n",
+    "pd.set_option(\"display.max_columns\", None)\n",
+    "\n",
+    "puf_2015 = pd.read_csv(STORAGE_FOLDER / \"input\" / \"puf_2015.csv\")\n",
+    "puf_2021 = pd.read_csv(STORAGE_FOLDER / \"output\" / \"puf_2021.csv\")\n",
+    "\n",
+    "puf_2015.S006 /= 100\n",
+    "puf_2021.S006 /= 100\n",
+    "puf_2015 = MicroDataFrame(puf_2015, weights=\"S006\")\n",
+    "puf_2021 = MicroDataFrame(puf_2021, weights=\"S006\")\n",
+    "\n",
+    "totals_2015 = puf_2015.sum()\n",
+    "totals_2021 = puf_2021.sum()\n",
+    "\n",
+    "uprating_df = pd.DataFrame()\n",
+    "uprating_df[\"2015 total ($bn)\"] = (puf_2015.sum() / 1e9).round(1)\n",
+    "uprating_df[\"2021 total ($bn)\"] = (puf_2021.sum() / 1e9).round(1)\n",
+    "uprating_df[\"Growth (%)\"] = ((totals_2021 / totals_2015 - 1) * 100).round(1)\n",
+    "# uprating_df[\"Annualised growth (%)\"] = (((totals_2021 / totals_2015) ** (1 / (2021 - 2015)) - 1) * 100).round(1)\n",
+    "\n",
+    "UPRATED_DIRECTLY_FROM_SOI = list(\n",
+    "    list(SOI_TO_PUF_STRAIGHT_RENAMES.values())\n",
+    "    + list(SOI_TO_PUF_NEG_ONLY_RENAMES.values())\n",
+    "    + list(SOI_TO_PUF_POS_ONLY_RENAMES.values())\n",
+    ")\n",
+    "\n",
+    "uprating_df.T[UPRATED_DIRECTLY_FROM_SOI].T.sort_values(\"Growth (%)\").T"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

	E18500	E19200	E26270	E26270	E18400	E18400	E17500	E00400	E00300	E19800	E01700	E00200	E01500	E02400	E00650	E00600	E02500	E01400	E00900	E00900	E01100	E01000	E01000	E02300
2015 total ($bn)	188.9	306.6	633.1	633.1	352.8	352.8	132.1	61.6	97.8	163.5	693.0	7156.3	1178.9	604.8	204.0	260.9	277.0	251.8	332.4	332.4	11.8	701.4	701.4	26.7
2021 total ($bn)	100.1	164.4	419.9	419.9	254.2	254.2	100.6	55.3	105.7	194.0	861.8	9078.3	1519.6	790.7	297.1	388.0	412.2	406.1	560.2	560.2	24.3	2051.5	2051.5	204.6
Growth (%)	-47.0	-46.4	-33.7	-33.7	-28.0	-28.0	-23.9	-10.3	8.0	18.7	24.4	26.9	28.9	30.7	45.6	48.7	48.8	61.3	68.5	68.5	106.6	192.5	192.5	667.2