Skip to content

Commit

Permalink
Format
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilwoodruff committed Jun 19, 2024
1 parent 1f73c9e commit 482e4be
Showing 1 changed file with 51 additions and 18 deletions.
69 changes: 51 additions & 18 deletions docs/book/validation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,16 @@
"puf_2015 = pd.read_csv(INPUTS / \"puf_2015.csv\")\n",
"tc_puf_2015 = pd.read_csv(OUTPUTS / \"tc_puf_2015.csv\")\n",
"\n",
"soi_from_puf_2015 = compare_soi_replication_to_soi(puf_to_soi(puf_2015, 2015), 2015)\n",
"soi_from_pe_puf_2015 = compare_soi_replication_to_soi(pe_to_soi(PUF_2015, 2015), 2015)\n",
"soi_from_tc_puf_2015 = compare_soi_replication_to_soi(tc_to_soi(tc_puf_2015, 2015), 2015)\n",
"soi_from_puf_2015 = compare_soi_replication_to_soi(\n",
" puf_to_soi(puf_2015, 2015), 2015\n",
")\n",
"soi_from_pe_puf_2015 = compare_soi_replication_to_soi(\n",
" pe_to_soi(PUF_2015, 2015), 2015\n",
")\n",
"soi_from_tc_puf_2015 = compare_soi_replication_to_soi(\n",
" tc_to_soi(tc_puf_2015, 2015), 2015\n",
")\n",
"\n",
"\n",
"def soi_statistic_passes_quality_test(df):\n",
" # Relative error lower than this => OK\n",
Expand All @@ -141,22 +148,37 @@
" # Absolute error lower than this for aggregates => OK\n",
" AGGREGATE_ABSOLUTE_ERROR_THRESHOLD = 1e9\n",
"\n",
" relative_error_ok = df[\"Absolute relative error\"] < RELATIVE_ERROR_THRESHOLD\n",
" absolute_error_threshold = np.where(df.Count, COUNT_ABSOLUTE_ERROR_THRESHOLD, AGGREGATE_ABSOLUTE_ERROR_THRESHOLD)\n",
" relative_error_ok = (\n",
" df[\"Absolute relative error\"] < RELATIVE_ERROR_THRESHOLD\n",
" )\n",
" absolute_error_threshold = np.where(\n",
" df.Count,\n",
" COUNT_ABSOLUTE_ERROR_THRESHOLD,\n",
" AGGREGATE_ABSOLUTE_ERROR_THRESHOLD,\n",
" )\n",
" absolute_error_ok = df[\"Absolute error\"] < absolute_error_threshold\n",
"\n",
" return relative_error_ok | absolute_error_ok\n",
"\n",
"\n",
"# 2021 datasets\n",
"\n",
"puf_2021 = pd.read_csv(OUTPUTS / \"puf_2021.csv\")\n",
"tc_puf_2021 = pd.read_csv(OUTPUTS / \"tc_puf_2021.csv\")\n",
"tmd_2021 = pd.read_csv(OUTPUTS / \"tmd_2021.csv\")\n",
"\n",
"soi_from_puf_2021 = compare_soi_replication_to_soi(puf_to_soi(puf_2021, 2021), 2021)\n",
"soi_from_pe_puf_2021 = compare_soi_replication_to_soi(pe_to_soi(PUF_2021, 2021), 2021)\n",
"soi_from_tc_puf_2021 = compare_soi_replication_to_soi(tc_to_soi(tc_puf_2021, 2021), 2021)\n",
"soi_from_tmd_2021 = compare_soi_replication_to_soi(tc_to_soi(tmd_2021, 2021), 2021)\n",
"soi_from_puf_2021 = compare_soi_replication_to_soi(\n",
" puf_to_soi(puf_2021, 2021), 2021\n",
")\n",
"soi_from_pe_puf_2021 = compare_soi_replication_to_soi(\n",
" pe_to_soi(PUF_2021, 2021), 2021\n",
")\n",
"soi_from_tc_puf_2021 = compare_soi_replication_to_soi(\n",
" tc_to_soi(tc_puf_2021, 2021), 2021\n",
")\n",
"soi_from_tmd_2021 = compare_soi_replication_to_soi(\n",
" tc_to_soi(tmd_2021, 2021), 2021\n",
")\n",
"\n",
"dataset_soi_comparisons = [\n",
" soi_from_puf_2015,\n",
Expand All @@ -165,7 +187,7 @@
" soi_from_puf_2021,\n",
" soi_from_pe_puf_2021,\n",
" soi_from_tc_puf_2021,\n",
" soi_from_tmd_2021\n",
" soi_from_tmd_2021,\n",
"]\n",
"\n",
"for dataset in dataset_soi_comparisons:\n",
Expand All @@ -178,15 +200,19 @@
" \"PUF (2021)\",\n",
" \"PE PUF (2021)\",\n",
" \"TC PUF (2021)\",\n",
" \"TMD (2021)\"\n",
" \"TMD (2021)\",\n",
"]\n",
"\n",
"comparison_df = pd.DataFrame({\n",
" \"Dataset\": dataset_names,\n",
" \"SOI match score\": [(df[\"OK\"].mean() * 100).round(1) for df in dataset_soi_comparisons]\n",
"})\n",
"comparison_df = pd.DataFrame(\n",
" {\n",
" \"Dataset\": dataset_names,\n",
" \"SOI match score\": [\n",
" (df[\"OK\"].mean() * 100).round(1) for df in dataset_soi_comparisons\n",
" ],\n",
" }\n",
")\n",
"\n",
"comparison_df\n"
"comparison_df"
]
},
{
Expand Down Expand Up @@ -675,8 +701,15 @@
],
"source": [
"score_by_dataset = pd.DataFrame(\n",
" {dataset_name: (dataset.groupby(\"Variable\").OK.mean() * 100).round(1) for dataset_name, dataset in zip(dataset_names, dataset_soi_comparisons)}\n",
").fillna(100) # Fillna because some variables aren't in the 2021 SOI releases.\n",
" {\n",
" dataset_name: (dataset.groupby(\"Variable\").OK.mean() * 100).round(1)\n",
" for dataset_name, dataset in zip(\n",
" dataset_names, dataset_soi_comparisons\n",
" )\n",
" }\n",
").fillna(\n",
" 100\n",
") # Fillna because some variables aren't in the 2021 SOI releases.\n",
"score_by_dataset.sort_values(\"TMD (2021)\")"
]
}
Expand Down

0 comments on commit 482e4be

Please sign in to comment.