Format

PSLmodels · Jun 19, 2024 · 482e4be · 482e4be
1 parent 1f73c9e
commit 482e4be
Showing 1 changed file with 51 additions and 18 deletions.
diff --git a/docs/book/validation.ipynb b/docs/book/validation.ipynb
@@ -127,9 +127,16 @@
     "puf_2015 = pd.read_csv(INPUTS / \"puf_2015.csv\")\n",
     "tc_puf_2015 = pd.read_csv(OUTPUTS / \"tc_puf_2015.csv\")\n",
     "\n",
-    "soi_from_puf_2015 = compare_soi_replication_to_soi(puf_to_soi(puf_2015, 2015), 2015)\n",
-    "soi_from_pe_puf_2015 = compare_soi_replication_to_soi(pe_to_soi(PUF_2015, 2015), 2015)\n",
-    "soi_from_tc_puf_2015 = compare_soi_replication_to_soi(tc_to_soi(tc_puf_2015, 2015), 2015)\n",
+    "soi_from_puf_2015 = compare_soi_replication_to_soi(\n",
+    "    puf_to_soi(puf_2015, 2015), 2015\n",
+    ")\n",
+    "soi_from_pe_puf_2015 = compare_soi_replication_to_soi(\n",
+    "    pe_to_soi(PUF_2015, 2015), 2015\n",
+    ")\n",
+    "soi_from_tc_puf_2015 = compare_soi_replication_to_soi(\n",
+    "    tc_to_soi(tc_puf_2015, 2015), 2015\n",
+    ")\n",
+    "\n",
     "\n",
     "def soi_statistic_passes_quality_test(df):\n",
     "    # Relative error lower than this => OK\n",
@@ -141,22 +148,37 @@
     "    # Absolute error lower than this for aggregates => OK\n",
     "    AGGREGATE_ABSOLUTE_ERROR_THRESHOLD = 1e9\n",
     "\n",
-    "    relative_error_ok = df[\"Absolute relative error\"] < RELATIVE_ERROR_THRESHOLD\n",
-    "    absolute_error_threshold = np.where(df.Count, COUNT_ABSOLUTE_ERROR_THRESHOLD, AGGREGATE_ABSOLUTE_ERROR_THRESHOLD)\n",
+    "    relative_error_ok = (\n",
+    "        df[\"Absolute relative error\"] < RELATIVE_ERROR_THRESHOLD\n",
+    "    )\n",
+    "    absolute_error_threshold = np.where(\n",
+    "        df.Count,\n",
+    "        COUNT_ABSOLUTE_ERROR_THRESHOLD,\n",
+    "        AGGREGATE_ABSOLUTE_ERROR_THRESHOLD,\n",
+    "    )\n",
     "    absolute_error_ok = df[\"Absolute error\"] < absolute_error_threshold\n",
     "\n",
     "    return relative_error_ok | absolute_error_ok\n",
     "\n",
+    "\n",
     "# 2021 datasets\n",
     "\n",
     "puf_2021 = pd.read_csv(OUTPUTS / \"puf_2021.csv\")\n",
     "tc_puf_2021 = pd.read_csv(OUTPUTS / \"tc_puf_2021.csv\")\n",
     "tmd_2021 = pd.read_csv(OUTPUTS / \"tmd_2021.csv\")\n",
     "\n",
-    "soi_from_puf_2021 = compare_soi_replication_to_soi(puf_to_soi(puf_2021, 2021), 2021)\n",
-    "soi_from_pe_puf_2021 = compare_soi_replication_to_soi(pe_to_soi(PUF_2021, 2021), 2021)\n",
-    "soi_from_tc_puf_2021 = compare_soi_replication_to_soi(tc_to_soi(tc_puf_2021, 2021), 2021)\n",
-    "soi_from_tmd_2021 = compare_soi_replication_to_soi(tc_to_soi(tmd_2021, 2021), 2021)\n",
+    "soi_from_puf_2021 = compare_soi_replication_to_soi(\n",
+    "    puf_to_soi(puf_2021, 2021), 2021\n",
+    ")\n",
+    "soi_from_pe_puf_2021 = compare_soi_replication_to_soi(\n",
+    "    pe_to_soi(PUF_2021, 2021), 2021\n",
+    ")\n",
+    "soi_from_tc_puf_2021 = compare_soi_replication_to_soi(\n",
+    "    tc_to_soi(tc_puf_2021, 2021), 2021\n",
+    ")\n",
+    "soi_from_tmd_2021 = compare_soi_replication_to_soi(\n",
+    "    tc_to_soi(tmd_2021, 2021), 2021\n",
+    ")\n",
     "\n",
     "dataset_soi_comparisons = [\n",
     "    soi_from_puf_2015,\n",
@@ -165,7 +187,7 @@
     "    soi_from_puf_2021,\n",
     "    soi_from_pe_puf_2021,\n",
     "    soi_from_tc_puf_2021,\n",
-    "    soi_from_tmd_2021\n",
+    "    soi_from_tmd_2021,\n",
     "]\n",
     "\n",
     "for dataset in dataset_soi_comparisons:\n",
@@ -178,15 +200,19 @@
     "    \"PUF (2021)\",\n",
     "    \"PE PUF (2021)\",\n",
     "    \"TC PUF (2021)\",\n",
-    "    \"TMD (2021)\"\n",
+    "    \"TMD (2021)\",\n",
     "]\n",
     "\n",
-    "comparison_df = pd.DataFrame({\n",
-    "    \"Dataset\": dataset_names,\n",
-    "    \"SOI match score\": [(df[\"OK\"].mean() * 100).round(1) for df in dataset_soi_comparisons]\n",
-    "})\n",
+    "comparison_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"Dataset\": dataset_names,\n",
+    "        \"SOI match score\": [\n",
+    "            (df[\"OK\"].mean() * 100).round(1) for df in dataset_soi_comparisons\n",
+    "        ],\n",
+    "    }\n",
+    ")\n",
     "\n",
-    "comparison_df\n"
+    "comparison_df"
    ]
   },
   {
@@ -675,8 +701,15 @@
    ],
    "source": [
     "score_by_dataset = pd.DataFrame(\n",
-    "    {dataset_name: (dataset.groupby(\"Variable\").OK.mean() * 100).round(1) for dataset_name, dataset in zip(dataset_names, dataset_soi_comparisons)}\n",
-    ").fillna(100) # Fillna because some variables aren't in the 2021 SOI releases.\n",
+    "    {\n",
+    "        dataset_name: (dataset.groupby(\"Variable\").OK.mean() * 100).round(1)\n",
+    "        for dataset_name, dataset in zip(\n",
+    "            dataset_names, dataset_soi_comparisons\n",
+    "        )\n",
+    "    }\n",
+    ").fillna(\n",
+    "    100\n",
+    ")  # Fillna because some variables aren't in the 2021 SOI releases.\n",
     "score_by_dataset.sort_values(\"TMD (2021)\")"
    ]
   }