diff --git a/examples/lost_fact_exploration.ipynb b/examples/lost_fact_exploration.ipynb
new file mode 100644
index 0000000..7a444b1
--- /dev/null
+++ b/examples/lost_fact_exploration.ipynb
@@ -0,0 +1,470 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a6e96483-25c6-488c-a423-558a0f10cfe1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ferc_xbrl_extractor.cli import TAXONOMY_MAP\n",
+    "import pandas as pd\n",
+    "from ferc_xbrl_extractor.xbrl import get_fact_tables\n",
+    "from pathlib import Path\n",
+    "from stringcase import snakecase\n",
+    "from collections import Counter\n",
+    "import itertools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e6d2c82a-457f-4d39-b749-e1e5b9f13bdc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_pickle(\"../lost_facts.pickle\")\n",
+    "\n",
+    "tmp_path=\"./\"\n",
+    "tables = get_fact_tables(\n",
+    "    taxonomy_path=TAXONOMY_MAP[1],\n",
+    "    form_number=1,\n",
+    "    db_path=\"path\",\n",
+    "    metadata_path=Path(tmp_path) / \"metadata.json\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2688defb-09c0-40cb-968c-7bd5c981ec4f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Counter({'C000537': 3990,\n",
+       "         'C000746': 3150,\n",
+       "         'C000532': 2574,\n",
+       "         'C007667': 2550,\n",
+       "         'C000530': 2460,\n",
+       "         'C000041': 2350,\n",
+       "         'C000536': 2160,\n",
+       "         'C001009': 2106,\n",
+       "         'C002196': 1908,\n",
+       "         'C001555': 1885,\n",
+       "         'C000620': 1692,\n",
+       "         'C004995': 1680,\n",
+       "         'C008999': 1664,\n",
+       "         'C000535': 1583,\n",
+       "         'C000533': 1545,\n",
+       "         'C001016': 1346,\n",
+       "         'C000538': 1304,\n",
+       "         'C003184': 1216,\n",
+       "         'C000135': 1135,\n",
+       "         'C001646': 1118,\n",
+       "         'C000136': 1098,\n",
+       "         'C000534': 1076,\n",
+       "         'C001789': 1064,\n",
+       "         'C000913': 1023,\n",
+       "         'C007565': 994,\n",
+       "         'C001436': 962,\n",
+       "         'C000744': 957,\n",
+       "         'C000317': 952,\n",
+       "         'C001132': 918,\n",
+       "         'C000379': 880,\n",
+       "         'C000825': 848,\n",
+       "         'C001218': 847,\n",
+       "         'C000851': 837,\n",
+       "         'C000318': 827,\n",
+       "         'C001257': 813,\n",
+       "         'C000312': 809,\n",
+       "         'C000905': 809,\n",
+       "         'C008998': 780,\n",
+       "         'C000316': 764,\n",
+       "         'C000522': 756,\n",
+       "         'C000314': 745,\n",
+       "         'C000315': 745,\n",
+       "         'C001465': 742,\n",
+       "         'C000313': 741,\n",
+       "         'C000685': 740,\n",
+       "         'C000292': 724,\n",
+       "         'C000319': 712,\n",
+       "         'C007679': 684,\n",
+       "         'C001306': 674,\n",
+       "         'C001554': 672,\n",
+       "         'C001188': 663,\n",
+       "         'C000852': 661,\n",
+       "         'C001017': 649,\n",
+       "         'C000291': 628,\n",
+       "         'C000524': 622,\n",
+       "         'C001111': 621,\n",
+       "         'C001025': 620,\n",
+       "         'C011285': 620,\n",
+       "         'C000196': 616,\n",
+       "         'C002101': 615,\n",
+       "         'C002525': 614,\n",
+       "         'C001030': 611,\n",
+       "         'C002446': 610,\n",
+       "         'C002012': 608,\n",
+       "         'C005475': 605,\n",
+       "         'C002045': 597,\n",
+       "         'C002089': 594,\n",
+       "         'C003138': 587,\n",
+       "         'C004936': 561,\n",
+       "         'C001464': 559,\n",
+       "         'C001330': 558,\n",
+       "         'C000862': 552,\n",
+       "         'C001673': 547,\n",
+       "         'C000199': 545,\n",
+       "         'C001552': 540,\n",
+       "         'C000911': 533,\n",
+       "         'C001466': 533,\n",
+       "         'C001702': 530,\n",
+       "         'C000527': 528,\n",
+       "         'C000465': 520,\n",
+       "         'C000134': 514,\n",
+       "         'C000824': 513,\n",
+       "         'C002827': 507,\n",
+       "         'C000906': 506,\n",
+       "         'C000290': 499,\n",
+       "         'C000523': 494,\n",
+       "         'C000191': 471,\n",
+       "         'C001553': 469,\n",
+       "         'C001143': 467,\n",
+       "         'C001655': 459,\n",
+       "         'C001316': 449,\n",
+       "         'C000525': 446,\n",
+       "         'C001559': 434,\n",
+       "         'C001130': 432,\n",
+       "         'C003849': 428,\n",
+       "         'C001194': 428,\n",
+       "         'C001252': 422,\n",
+       "         'C000823': 419,\n",
+       "         'C000822': 417,\n",
+       "         'C000500': 417,\n",
+       "         'C001181': 413,\n",
+       "         'C000415': 397,\n",
+       "         'C000241': 391,\n",
+       "         'C003483': 390,\n",
+       "         'C001182': 387,\n",
+       "         'C000388': 381,\n",
+       "         'C000615': 379,\n",
+       "         'C001230': 373,\n",
+       "         'C000772': 369,\n",
+       "         'C004872': 368,\n",
+       "         'C000447': 366,\n",
+       "         'C001775': 357,\n",
+       "         'C001745': 357,\n",
+       "         'C000507': 354,\n",
+       "         'C002308': 350,\n",
+       "         'C001298': 349,\n",
+       "         'C001184': 349,\n",
+       "         'C000171': 348,\n",
+       "         'C000201': 348,\n",
+       "         'C001610': 342,\n",
+       "         'C000618': 341,\n",
+       "         'C000617': 340,\n",
+       "         'C000553': 332,\n",
+       "         'C000691': 332,\n",
+       "         'C003194': 324,\n",
+       "         'C004044': 324,\n",
+       "         'C000555': 323,\n",
+       "         'C000542': 322,\n",
+       "         'C001607': 319,\n",
+       "         'C001609': 319,\n",
+       "         'C000289': 315,\n",
+       "         'C000692': 315,\n",
+       "         'C000602': 311,\n",
+       "         'C000120': 310,\n",
+       "         'C001288': 306,\n",
+       "         'C002498': 303,\n",
+       "         'C001486': 303,\n",
+       "         'C001421': 299,\n",
+       "         'C011163': 298,\n",
+       "         'C000526': 296,\n",
+       "         'C001221': 294,\n",
+       "         'C003646': 288,\n",
+       "         'C009068': 287,\n",
+       "         'C001187': 282,\n",
+       "         'C000116': 281,\n",
+       "         'C007581': 278,\n",
+       "         'C001315': 278,\n",
+       "         'C001322': 277,\n",
+       "         'C005443': 269,\n",
+       "         'C001696': 264,\n",
+       "         'C001308': 264,\n",
+       "         'C001309': 263,\n",
+       "         'C001305': 263,\n",
+       "         'C001222': 260,\n",
+       "         'C003836': 258,\n",
+       "         'C010474': 254,\n",
+       "         'C000045': 244,\n",
+       "         'C001153': 243,\n",
+       "         'C001346': 243,\n",
+       "         'C007582': 242,\n",
+       "         'C002115': 242,\n",
+       "         'C005067': 240,\n",
+       "         'C011302': 240,\n",
+       "         'C001656': 237,\n",
+       "         'C005059': 236,\n",
+       "         'C000030': 235,\n",
+       "         'C000509': 235,\n",
+       "         'C011150': 226,\n",
+       "         'C001454': 220,\n",
+       "         'C000622': 220,\n",
+       "         'C011301': 217,\n",
+       "         'C001183': 201,\n",
+       "         'C001654': 201,\n",
+       "         'C001307': 199,\n",
+       "         'C007584': 193,\n",
+       "         'C005519': 191,\n",
+       "         'C003713': 188,\n",
+       "         'C002116': 185,\n",
+       "         'C010464': 181,\n",
+       "         'C000616': 181,\n",
+       "         'C002335': 179,\n",
+       "         'C000501': 178,\n",
+       "         'C001344': 174,\n",
+       "         'C011423': 172,\n",
+       "         'C001731': 171,\n",
+       "         'C010432': 170,\n",
+       "         'C001446': 168,\n",
+       "         'C000502': 166,\n",
+       "         'C001675': 166,\n",
+       "         'C005423': 161,\n",
+       "         'C002083': 160,\n",
+       "         'C004881': 160,\n",
+       "         'C011100': 159,\n",
+       "         'C007624': 155,\n",
+       "         'C011304': 153,\n",
+       "         'C005424': 152,\n",
+       "         'C003435': 139,\n",
+       "         'C001245': 139,\n",
+       "         'C000200': 138,\n",
+       "         'C010845': 132,\n",
+       "         'C000945': 132,\n",
+       "         'C001444': 129,\n",
+       "         'C005444': 125,\n",
+       "         'C010523': 124,\n",
+       "         'C010388': 114,\n",
+       "         'C002336': 111,\n",
+       "         'C002073': 107,\n",
+       "         'C000367': 102,\n",
+       "         'C010151': 96,\n",
+       "         'C003988': 89,\n",
+       "         'C000771': 85,\n",
+       "         'C008947': 81,\n",
+       "         'C002854': 78,\n",
+       "         'C010473': 77,\n",
+       "         'C000029': 76,\n",
+       "         'C000038': 65,\n",
+       "         'C010446': 43})"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Counter(df.entity)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d4d559ca-8af1-4ecc-b6ca-860961e97000",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Percent lost facts missing utl type axis: 0.8633052612211787\n"
+     ]
+    }
+   ],
+   "source": [
+    "idx=df.table_candidates.apply(lambda x: any([\"utility_type_axis\" in tables[name].axes for name in x]))\n",
+    "missing_utl_type = df[idx]\n",
+    "\n",
+    "print(f\"Percent lost facts missing utl type axis: {len(missing_utl_type)/len(df)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "a9b8e452-2e6c-41a0-a35a-c4b246e004a1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "119573\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(df))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "271aef59-eb11-44a0-8653-baafa99a4888",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_30485/4286940243.py:10: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  missing_utl_type[\"refined_candidates\"] = refined_candidates\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Look for facts that are only missing the utility type axis and limit to those tables\n",
+    "refined_candidates = []\n",
+    "for idx, row in missing_utl_type.iterrows():\n",
+    "    dims = set([snakecase(name) for name, _ in row.dimensions])\n",
+    "    refined_candidates.append([\n",
+    "        name for name in row.table_candidates\n",
+    "        if (set(tables[name].axes) - dims) == set([\"utility_type_axis\"])\n",
+    "        and set(tables[name].axes) > dims\n",
+    "    ])\n",
+    "\n",
+    "missing_utl_type[\"refined_candidates\"] = refined_candidates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "9ca6bca8-45b2-40f3-9cc5-c9bab94328e8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8029739155160446"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sum(missing_utl_type.refined_candidates.apply(lambda x: len(x) == 1)) / len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "0e9de8ed-ddd8-46f7-9285-91528ca5fb01",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Counter({'taxes_accrued_prepaid_and_charged_during_year_262_duration': 28809,\n",
+       "         'taxes_accrued_prepaid_and_charged_during_year_262_instant': 22362,\n",
+       "         'statement_of_income_114_duration': 21819,\n",
+       "         'taxes_accrued_prepaid_and_charged_during_year_totals_262_duration': 10054,\n",
+       "         'taxes_accrued_prepaid_and_charged_during_year_totals_262_instant': 9520,\n",
+       "         'summary_of_utility_plant_and_accumulated_provisions_for_depreciation_amortization_and_depletion_200_instant': 2227,\n",
+       "         'accumulated_deferred_income_taxes_other_property_account_282_classified_by_business_activities_274_duration': 968,\n",
+       "         'accumulated_deferred_income_taxes_accelerated_amortization_property_account_281_classified_by_utility_types_272_duration': 155,\n",
+       "         'accumulated_deferred_investment_tax_credits_account_255_total_266_duration': 100})"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Counter(itertools.chain.from_iterable(missing_utl_type.refined_candidates))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "cce7a69a-bd67-4e85-8b95-6dcc526f356f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plant_in_srvce = missing_utl_type[missing_utl_type.refined_candidates.apply(lambda x: 'summary_of_utility_plant_and_accumulated_provisions_for_depreciation_amortization_and_depletion_200_instant' in x)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "a69e56fc-84d0-4fb9-aa20-05236d28d732",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['utility_plant_in_service_classified',\n",
+       "       'utility_plant_in_service_property_under_capital_leases',\n",
+       "       'utility_plant_in_service_completed_construction_not_classified',\n",
+       "       'utility_plant_in_service_classified_and_unclassified',\n",
+       "       'utility_plant_held_for_future_use',\n",
+       "       'utility_plant_acquisition_adjustment',\n",
+       "       'depreciation_utility_plant_in_service',\n",
+       "       'amortization_of_other_utility_plant_utility_plant_in_service',\n",
+       "       'depreciation_amortization_and_depletion_utility_plant_in_service',\n",
+       "       'depreciation_utility_plant_held_for_future_use',\n",
+       "       'depreciation_and_amortization_utility_plant_held_for_future_use',\n",
+       "       'amortization_of_plant_acquisition_adjustment',\n",
+       "       'utility_plant_leased_to_others',\n",
+       "       'depreciation_utility_plant_leased_to_others',\n",
+       "       'depreciation_amortization_and_depletion_utility_plant_leased_to_others',\n",
+       "       'utility_plant_in_service_plant_purchased_or_sold',\n",
+       "       'utility_plant_in_service_experimental_plant_unclassified',\n",
+       "       'amortization_and_depletion_of_producing_natural_gas_land_and_land_rightsutility_plant_in_service',\n",
+       "       'amortization_of_underground_storage_land_and_land_rightsutility_plant_in_service',\n",
+       "       'amortization_and_depletion_utility_plant_leased_to_others',\n",
+       "       'amortization_utility_plant_held_for_future_use',\n",
+       "       'abandonment_of_leases'], dtype=object)"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "plant_in_srvce.name.unique()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/src/ferc_xbrl_extractor/instance.py b/src/ferc_xbrl_extractor/instance.py
index 9acf31e..9a67564 100644
--- a/src/ferc_xbrl_extractor/instance.py
+++ b/src/ferc_xbrl_extractor/instance.py
@@ -193,7 +193,7 @@ class Fact(BaseModel):
 
     name: str
     c_id: str
-    f_id: str
+    f_id: str | None = None
     value: str | None = None
 
     @classmethod
@@ -204,7 +204,7 @@ def from_xml(cls, elem: Element) -> "Fact":
         return cls(
             name=stringcase.snakecase(elem.tag.replace(prefix, "")),  # Strip prefix
             c_id=elem.attrib["contextRef"],
-            f_id=elem.attrib["id"],
+            f_id=elem.attrib.get("id"),
             value=elem.text,
         )
 
diff --git a/tests/integration/lost_facts_test.py b/tests/integration/lost_facts_test.py
index 0e5640c..bbdfe38 100644
--- a/tests/integration/lost_facts_test.py
+++ b/tests/integration/lost_facts_test.py
@@ -3,10 +3,11 @@
 from collections import Counter
 from pathlib import Path
 
+import pandas as pd
 from sqlalchemy import create_engine
 
 from ferc_xbrl_extractor.cli import TAXONOMY_MAP, get_instances
-from ferc_xbrl_extractor.xbrl import extract, process_instance
+from ferc_xbrl_extractor.xbrl import extract, get_fact_tables, process_instance
 
 
 def test_lost_fact_finder(tmp_path):
@@ -18,36 +19,83 @@ def test_lost_fact_finder(tmp_path):
     )
 
     used_ids = extract(
-        instances=instances[:1],
+        instances=instances,
         engine=create_engine("sqlite:///:memory:"),
         taxonomy=TAXONOMY_MAP[1],
         form_number=1,
+        batch_size=50,
         metadata_path=Path(tmp_path) / "metadata.json",
     )
-
-    instance = instances[0].parse()
-    instant_facts = itertools.chain.from_iterable(
-        itertools.chain.from_iterable(
-            context.values() for context in instance.instant_facts.values()
-        )
-    )
-    duration_facts = itertools.chain.from_iterable(
-        itertools.chain.from_iterable(
-            context.values() for context in instance.duration_facts.values()
-        )
+    tables = get_fact_tables(
+        taxonomy_path=TAXONOMY_MAP[1],
+        form_number=1,
+        db_path="path",
+        metadata_path=Path(tmp_path) / "metadata.json",
     )
-    all_facts = list(itertools.chain(instant_facts, duration_facts))
+
+    lost_facts = []
 
     def clean_fact(fact, contexts):
-        return {"name": fact.name, "context": contexts[fact.c_id], "value": fact.value}
+        return {
+            "name": fact.name,
+            "context": contexts[fact.c_id],
+            "value": fact.value,
+            "instant": contexts[fact.c_id].period.instant,
+        }
 
-    lost_facts = [
-        clean_fact(f, instance.contexts)
-        for f in all_facts
-        if f.f_id not in used_ids[instances[0].name]
-    ]
+    num_all_facts = 0
+    for instance_builder in instances:
+        if len(used_ids[instance_builder.name]) < 10:
+            print(f"Skipping: {instance_builder.name}")
+            continue
+
+        instance = instance_builder.parse()
+        instant_facts = itertools.chain.from_iterable(
+            itertools.chain.from_iterable(
+                context.values() for context in instance.instant_facts.values()
+            )
+        )
+        duration_facts = itertools.chain.from_iterable(
+            itertools.chain.from_iterable(
+                context.values() for context in instance.duration_facts.values()
+            )
+        )
+        all_facts = list(itertools.chain(instant_facts, duration_facts))
+        num_all_facts += len(all_facts)
+
+        lost_facts += [
+            clean_fact(f, instance.contexts)
+            for f in all_facts
+            if f.f_id not in used_ids[instance_builder.name]
+        ]
 
     lostest_names = Counter(f["name"] for f in lost_facts)
+
+    rows = [
+        {
+            "name": fact["name"],
+            "filing": instance_builder.name,
+            "entity": fact["context"].entity.identifier,
+            "start_date": fact["context"].period.start_date,
+            "end_date": fact["context"].period.end_date,
+            "dimensions": [
+                (dim.name, dim.value) for dim in fact["context"].entity.dimensions
+            ],
+            "value": fact["value"],
+            "table_candidates": [
+                key
+                for key, table in tables.items()
+                if (fact["name"] in table.columns)
+                and (fact["instant"] == table.instant)
+            ],
+            "period": "instant" if fact["instant"] else "duration",
+        }
+        for fact in lost_facts
+    ]
+    df = pd.DataFrame(rows)
+    print(f"Of {num_all_facts} facts, {len(df) / num_all_facts}% were lost")
+    df.drop(df[df["name"] == "OrderNumber"].index, inplace=True)
+    df.to_pickle("lost_facts.pickle")
     breakpoint()
 
     assert len(lost_facts) / len(all_facts) < 0.1