From 1f73c9ea25c11a27808bc4ed29eca17474511ec9 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 19 Jun 2024 10:27:10 +0100 Subject: [PATCH] Add uprating docs --- docs/book/uprating.ipynb | 242 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 docs/book/uprating.ipynb diff --git a/docs/book/uprating.ipynb b/docs/book/uprating.ipynb new file mode 100644 index 00000000..cd849347 --- /dev/null +++ b/docs/book/uprating.ipynb @@ -0,0 +1,242 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Uprating\n", + "\n", + "As part of the data generation process, we uprate the 2015 PUF to match 2021 SOI statistics. The table below shows all variables which are uprated *directly* from SOI aggregates." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
E18500E19200E26270E26270E18400E18400E17500E00400E00300E19800E01700E00200E01500E02400E00650E00600E02500E01400E00900E00900E01100E01000E01000E02300
2015 total ($bn)188.9306.6633.1633.1352.8352.8132.161.697.8163.5693.07156.31178.9604.8204.0260.9277.0251.8332.4332.411.8701.4701.426.7
2021 total ($bn)100.1164.4419.9419.9254.2254.2100.655.3105.7194.0861.89078.31519.6790.7297.1388.0412.2406.1560.2560.224.32051.52051.5204.6
Growth (%)-47.0-46.4-33.7-33.7-28.0-28.0-23.9-10.38.018.724.426.928.930.745.648.748.861.368.568.5106.6192.5192.5667.2
\n", + "
" + ], + "text/plain": [ + " E18500 E19200 E26270 E26270 E18400 E18400 E17500 \\\n", + "2015 total ($bn) 188.9 306.6 633.1 633.1 352.8 352.8 132.1 \n", + "2021 total ($bn) 100.1 164.4 419.9 419.9 254.2 254.2 100.6 \n", + "Growth (%) -47.0 -46.4 -33.7 -33.7 -28.0 -28.0 -23.9 \n", + "\n", + " E00400 E00300 E19800 E01700 E00200 E01500 E02400 \\\n", + "2015 total ($bn) 61.6 97.8 163.5 693.0 7156.3 1178.9 604.8 \n", + "2021 total ($bn) 55.3 105.7 194.0 861.8 9078.3 1519.6 790.7 \n", + "Growth (%) -10.3 8.0 18.7 24.4 26.9 28.9 30.7 \n", + "\n", + " E00650 E00600 E02500 E01400 E00900 E00900 E01100 \\\n", + "2015 total ($bn) 204.0 260.9 277.0 251.8 332.4 332.4 11.8 \n", + "2021 total ($bn) 297.1 388.0 412.2 406.1 560.2 560.2 24.3 \n", + "Growth (%) 45.6 48.7 48.8 61.3 68.5 68.5 106.6 \n", + "\n", + " E01000 E01000 E02300 \n", + "2015 total ($bn) 701.4 701.4 26.7 \n", + "2021 total ($bn) 2051.5 2051.5 204.6 \n", + "Growth (%) 192.5 192.5 667.2 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "from tax_microdata_benchmarking.storage import STORAGE_FOLDER\n", + "from microdf import MicroDataFrame\n", + "from tax_microdata_benchmarking.datasets.uprate_puf import (\n", + " SOI_TO_PUF_STRAIGHT_RENAMES,\n", + " SOI_TO_PUF_NEG_ONLY_RENAMES,\n", + " SOI_TO_PUF_POS_ONLY_RENAMES,\n", + ")\n", + "\n", + "# Set maximum number of columns to display in pandas output to infinite\n", + "pd.set_option(\"display.max_columns\", None)\n", + "\n", + "puf_2015 = pd.read_csv(STORAGE_FOLDER / \"input\" / \"puf_2015.csv\")\n", + "puf_2021 = pd.read_csv(STORAGE_FOLDER / \"output\" / \"puf_2021.csv\")\n", + "\n", + "puf_2015.S006 /= 100\n", + "puf_2021.S006 /= 100\n", + "puf_2015 = MicroDataFrame(puf_2015, weights=\"S006\")\n", + "puf_2021 = MicroDataFrame(puf_2021, weights=\"S006\")\n", + "\n", + "totals_2015 = puf_2015.sum()\n", + "totals_2021 = puf_2021.sum()\n", + "\n", + "uprating_df = pd.DataFrame()\n", + "uprating_df[\"2015 total ($bn)\"] = (puf_2015.sum() / 1e9).round(1)\n", + "uprating_df[\"2021 total ($bn)\"] = (puf_2021.sum() / 1e9).round(1)\n", + "uprating_df[\"Growth (%)\"] = ((totals_2021 / totals_2015 - 1) * 100).round(1)\n", + "# uprating_df[\"Annualised growth (%)\"] = (((totals_2021 / totals_2015) ** (1 / (2021 - 2015)) - 1) * 100).round(1)\n", + "\n", + "UPRATED_DIRECTLY_FROM_SOI = list(\n", + " list(SOI_TO_PUF_STRAIGHT_RENAMES.values())\n", + " + list(SOI_TO_PUF_NEG_ONLY_RENAMES.values())\n", + " + list(SOI_TO_PUF_POS_ONLY_RENAMES.values())\n", + ")\n", + "\n", + "uprating_df.T[UPRATED_DIRECTLY_FROM_SOI].T.sort_values(\"Growth (%)\").T" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}