diff --git a/setup.py b/setup.py
index a7330d3f..933d0f84 100644
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,6 @@
         "tensorboard",
         "jupyter-book",
         "furo",
-        "survey_enhance",
+        "scikit-learn",
     ],
 )
diff --git a/tax_microdata_benchmarking/create_taxcalc_input_variables.py b/tax_microdata_benchmarking/create_taxcalc_input_variables.py
index e35a0432..95ef00fb 100644
--- a/tax_microdata_benchmarking/create_taxcalc_input_variables.py
+++ b/tax_microdata_benchmarking/create_taxcalc_input_variables.py
@@ -2,9 +2,22 @@
 Construct tmd.csv, a Tax-Calculator-style input variable file for 2021.
 """
 
+import taxcalc as tc
+from tax_microdata_benchmarking.datasets.tmd import create_tmd_2021
+from tax_microdata_benchmarking.utils.qbi import (
+    add_pt_w2_wages,
+)
+from tax_microdata_benchmarking.imputation_assumptions import (
+    IMPUTATION_RF_RNG_SEED,
+    IMPUTATION_BETA_RNG_SEED,
+    W2_WAGES_SCALE,
+)
+from tax_microdata_benchmarking.storage import STORAGE_FOLDER
+
+
 TAXYEAR = 2021
+INITIAL_W2_WAGES_SCALE = W2_WAGES_SCALE
 DO_REWEIGHTING = True
-INITIAL_W2_WAGES_SCALE = 0.19980
 INCLUDE_ORIGINAL_WEIGHTS = True
 
 
@@ -12,29 +25,26 @@ def create_variable_file(write_file=True):
     """
     Create Tax-Calculator-style input variable file for TAXYEAR.
     """
-    import taxcalc as tc
-    from tax_microdata_benchmarking.datasets.tmd import create_tmd_2021
-    from tax_microdata_benchmarking.utils.qbi import (
-        add_pt_w2_wages,
-    )
-    from tax_microdata_benchmarking.storage import STORAGE_FOLDER
-
     # construct dataframe containing input and output variables
-    print(f"Creating {TAXYEAR} PUF-ECPS file assuming:")
-    print(f"  DO_REWEIGHTING = {DO_REWEIGHTING}")
+    print(f"Creating {TAXYEAR} PUF+CPS file assuming:")
+    print(f"  IMPUTATION_RF_RNG_SEED = {IMPUTATION_RF_RNG_SEED}")
+    print(f"  IMPUTATION_BETA_RNG_SEED = {IMPUTATION_BETA_RNG_SEED}")
     print(f"  INITIAL_W2_WAGES_SCALE = {INITIAL_W2_WAGES_SCALE:.5f}")
+    print(f"  DO_REWEIGHTING = {DO_REWEIGHTING}")
     print(f"  INCLUDE_ORIGINAL_WEIGHTS = {INCLUDE_ORIGINAL_WEIGHTS}")
     vdf = create_tmd_2021()
     vdf.FLPDYR = TAXYEAR
     (vdf, pt_w2_wages_scale) = add_pt_w2_wages(vdf)
     abs_diff = abs(pt_w2_wages_scale - INITIAL_W2_WAGES_SCALE)
-    if abs_diff > 1e-6:
-        msg = (
-            f"\nFINAL vs INITIAL scale diff = {abs_diff:.6f}"
-            f"\n  INITIAL pt_w2_wages_scale = {INITIAL_W2_WAGES_SCALE:.6f}"
-            f"\n    FINAL pt_w2_wages_scale = {pt_w2_wages_scale:.6f}"
-        )
-        raise ValueError(msg)
+    msg = (
+        f"  FINAL vs INITIAL scale diff = {abs_diff:.6f}\n"
+        f"    INITIAL pt_w2_wages_scale = {INITIAL_W2_WAGES_SCALE:.6f}\n"
+        f"      FINAL pt_w2_wages_scale = {pt_w2_wages_scale:.6f}"
+    )
+    print(msg)
+    if abs_diff > 1e-3:
+        emsg = "INITIAL and FINAL scale values are substantially inconsistent"
+        raise ValueError(emsg)
     # streamline dataframe so that it includes only input variables
     rec = tc.Records(
         data=vdf,
@@ -59,6 +69,7 @@ def create_variable_file(write_file=True):
     # write streamlined variables dataframe to CSV-formatted file
     if write_file:
         tmd_csv_fname = STORAGE_FOLDER / "output" / "tmd.csv.gz"
+        print(f"Writing PUF+CPS file... [{tmd_csv_fname}]")
         vdf.to_csv(tmd_csv_fname, index=False, float_format="%.2f")
 
 
diff --git a/tax_microdata_benchmarking/datasets/cps.py b/tax_microdata_benchmarking/datasets/cps.py
index d7b99f21..1d8e4ca6 100644
--- a/tax_microdata_benchmarking/datasets/cps.py
+++ b/tax_microdata_benchmarking/datasets/cps.py
@@ -1,6 +1,5 @@
 from io import BytesIO
 from zipfile import ZipFile
-from policyengine_core.data import Dataset
 import pandas as pd
 import requests
 from tqdm import tqdm
@@ -714,13 +713,10 @@ def add_household_variables(cps: h5py.File, household: DataFrame) -> None:
 
 def add_previous_year_income(self, cps: h5py.File) -> None:
     if self.previous_year_raw_cps is None:
-        print(
-            "No previous year data available for this dataset, skipping previous year income imputation."
-        )
+        msg = "Skipping CPS previous year income imputation given lack of data"
+        print(f"{msg}...")
         return
 
-    from survey_enhance.impute import Imputation
-
     cps_current_year_data = self.raw_cps(require=True).load()
     cps_previous_year_data = self.previous_year_raw_cps(require=True).load()
     cps_previous_year = cps_previous_year_data.person.set_index(
diff --git a/tax_microdata_benchmarking/datasets/puf.py b/tax_microdata_benchmarking/datasets/puf.py
index cbf889bb..076de76b 100644
--- a/tax_microdata_benchmarking/datasets/puf.py
+++ b/tax_microdata_benchmarking/datasets/puf.py
@@ -1,14 +1,17 @@
 import pandas as pd
 import numpy as np
 import yaml
-from survey_enhance import Imputation
 from microdf import MicroDataFrame
 from tax_microdata_benchmarking.storage import STORAGE_FOLDER
 from tax_microdata_benchmarking.utils.pension_contributions import (
     impute_pension_contributions_to_puf,
 )
-
-DEFAULT_W2_WAGE_RATE = 0.19824  # Solved for JCT Tax Expenditures in 2021
+from tax_microdata_benchmarking.utils.imputation import Imputation
+from tax_microdata_benchmarking.imputation_assumptions import (
+    IMPUTATION_RF_RNG_SEED,
+    IMPUTATION_BETA_RNG_SEED,
+    W2_WAGES_SCALE,
+)
 
 
 def impute_missing_demographics(
@@ -37,6 +40,9 @@ def impute_missing_demographics(
     ]
 
     demographics_from_puf = Imputation()
+    demographics_from_puf.rf_rng_seed = IMPUTATION_RF_RNG_SEED
+    demographics_from_puf.beta_rng_seed = IMPUTATION_BETA_RNG_SEED
+
     demographics_from_puf.train(
         X=puf_with_demographics[NON_DEMOGRAPHIC_VARIABLES],
         Y=puf_with_demographics[DEMOGRAPHIC_VARIABLES],
@@ -46,7 +52,7 @@ def impute_missing_demographics(
         ~puf.RECID.isin(puf_with_demographics.RECID)
     ].reset_index()
     predicted_demographics = demographics_from_puf.predict(
-        puf_without_demographics
+        X=puf_without_demographics,
     )
     puf_with_imputed_demographics = pd.concat(
         [puf_without_demographics, predicted_demographics], axis=1
@@ -174,8 +180,8 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
     # Ignore f2441 (AMT form attached)
     # Ignore cmbtp (estimate of AMT income not in AGI)
     # Ignore k1bx14s and k1bx14p (partner self-employment income included in partnership and S-corp income)
-    qbi = puf.E00900 + puf.E26270 + puf.E02100 + puf.E27200
-    puf["w2_wages_from_qualified_business"] = qbi * DEFAULT_W2_WAGE_RATE
+    qbi = np.maximum(0, puf.E00900 + puf.E26270 + puf.E02100 + puf.E27200)
+    puf["w2_wages_from_qualified_business"] = qbi * W2_WAGES_SCALE
 
     puf["filing_status"] = puf.MARS.map(
         {
@@ -264,15 +270,14 @@ def generate(self, puf: pd.DataFrame, demographics: pd.DataFrame):
         from tax_microdata_benchmarking.datasets.uprate_puf import uprate_puf
 
         if self.time_period > 2015:
-            print("Uprating PUF...")
             puf = uprate_puf(puf, 2015, self.time_period)
 
-        print("Loading and pre-processing PUF...")
+        print("Pre-processing PUF...")
         original_recid = puf.RECID.values.copy()
         puf = preprocess_puf(puf)
-        print("Imputing missing demographics...")
+        print("Imputing missing PUF demographics...")
         puf = impute_missing_demographics(puf, demographics)
-        print("Imputing pension contributions...")
+        print("Imputing PUF pension contributions...")
         puf["pre_tax_contributions"] = impute_pension_contributions_to_puf(
             puf[["employment_income"]]
         )
diff --git a/tax_microdata_benchmarking/datasets/taxcalc_dataset.py b/tax_microdata_benchmarking/datasets/taxcalc_dataset.py
index db02cdb2..8aabe1df 100644
--- a/tax_microdata_benchmarking/datasets/taxcalc_dataset.py
+++ b/tax_microdata_benchmarking/datasets/taxcalc_dataset.py
@@ -14,6 +14,8 @@ def create_tc_dataset(pe_dataset: Type, year: int = 2015) -> pd.DataFrame:
     pe_sim = Microsimulation(dataset=pe_dataset)
     df = pd.DataFrame()
 
+    print(f"Creating tc dataset for year {year}...")
+
     is_non_dep = ~pe_sim.calculate("is_tax_unit_dependent").values
     tax_unit = pe_sim.populations["tax_unit"]
 
diff --git a/tax_microdata_benchmarking/datasets/tmd.py b/tax_microdata_benchmarking/datasets/tmd.py
index f419a226..9aad3a99 100644
--- a/tax_microdata_benchmarking/datasets/tmd.py
+++ b/tax_microdata_benchmarking/datasets/tmd.py
@@ -13,16 +13,18 @@
 
 
 def create_tmd_2021():
-    if not CPS_2021().exists:
-        # Don't recreate if already exists.
-        create_cps_2021()
-    if not PUF_2021().exists:
-        # Don't recreate if already exists.
-        create_pe_puf_2021()
+    # always create CPS_2021 and PUF_2021
+    # (because imputation assumptions may have changed)
+    create_cps_2021()
+    create_pe_puf_2021()
+
     tc_puf_21 = create_tc_dataset(PUF_2021)
     tc_cps_21 = create_tc_dataset(CPS_2021)
 
-    # Add nonfiler flag to tc_cps_21 with 2022 filing rules (2021 had large changes)
+    print("Combining PUF and CPS nonfilers...")
+
+    # Add nonfiler flag to tc_cps_21 with 2022 filing rules
+    # (2021 had large changes)
     from policyengine_us import Microsimulation
 
     sim = Microsimulation(dataset=CPS_2021)
@@ -31,8 +33,6 @@ def create_tmd_2021():
 
     combined = pd.concat([tc_puf_21, tc_cps_21], ignore_index=True)
 
-    print("Combined PUF and CPS nonfilers.")
-
     # Add Tax-Calculator outputs
     print("Adding Tax-Calculator outputs...")
     combined = add_taxcalc_outputs(combined, 2021)
@@ -40,8 +40,6 @@ def create_tmd_2021():
     print("Reweighting...")
     combined = reweight(combined, 2021, weight_deviation_penalty=0)
 
-    print("Completed.")
-
     return combined
 
 
diff --git a/tax_microdata_benchmarking/datasets/uprate_puf.py b/tax_microdata_benchmarking/datasets/uprate_puf.py
index f824a53e..b1519a35 100644
--- a/tax_microdata_benchmarking/datasets/uprate_puf.py
+++ b/tax_microdata_benchmarking/datasets/uprate_puf.py
@@ -134,13 +134,13 @@ def get_growth(variable, from_year, to_year):
 
 
 def uprate_puf(puf, from_year, to_year):
+    print(f"Uprating PUF from {from_year} to {to_year}...")
     puf = puf.copy()
     for variable in SOI_TO_PUF_STRAIGHT_RENAMES:
         growth = get_growth(variable, from_year, to_year)
         puf[SOI_TO_PUF_STRAIGHT_RENAMES[variable]] *= growth
 
     # Positive and negative split variables
-
     for variable in SOI_TO_PUF_POS_ONLY_RENAMES:
         growth = get_growth(variable, from_year, to_year)
         puf_variable = SOI_TO_PUF_POS_ONLY_RENAMES[variable]
@@ -151,20 +151,18 @@ def uprate_puf(puf, from_year, to_year):
         puf_variable = SOI_TO_PUF_NEG_ONLY_RENAMES[variable]
         puf[puf_variable][puf[puf_variable] < 0] *= growth
 
-    # Remaining variables, uprate purely by AGI growth (for now, because I'm not sure how to handle the deductions, credits and incomes separately)
-
+    # Remaining variables, uprate purely by AGI growth
+    # (for now, because I'm not sure how to handle the deductions,
+    #  credits, and incomes separately)
     for variable in REMAINING_VARIABLES:
         growth = get_growth("adjusted_gross_income", from_year, to_year)
         puf[variable] *= growth
 
     # Uprate the weights
-
     returns_start = get_soi_aggregate("count", from_year, True)
     returns_end = get_soi_aggregate("count", to_year, True)
     puf.S006 *= returns_end / returns_start
 
-    print(f"Uprated PUF from {from_year} to {to_year}")
-
     return puf
 
 
diff --git a/tax_microdata_benchmarking/imputation_assumptions.py b/tax_microdata_benchmarking/imputation_assumptions.py
new file mode 100644
index 00000000..def44e66
--- /dev/null
+++ b/tax_microdata_benchmarking/imputation_assumptions.py
@@ -0,0 +1,9 @@
+"""
+Central location for data imputation assumptions.
+"""
+
+IMPUTATION_RF_RNG_SEED = 1928374  # random number seed used by RandomForest
+
+IMPUTATION_BETA_RNG_SEED = 37465  # random number seed used for Beta variates
+
+W2_WAGES_SCALE = 0.19979  # parameter used to impute pass-through W-2 wages
diff --git a/tax_microdata_benchmarking/utils/imputation.py b/tax_microdata_benchmarking/utils/imputation.py
new file mode 100644
index 00000000..332e6fcc
--- /dev/null
+++ b/tax_microdata_benchmarking/utils/imputation.py
@@ -0,0 +1,375 @@
+from pathlib import Path
+from typing import List, Dict
+import numpy as np
+import pandas as pd
+from sklearn.ensemble import RandomForestRegressor
+from tqdm import tqdm
+
+
+def to_array(values) -> np.ndarray:
+    if isinstance(values, (pd.Series, pd.DataFrame)):
+        return values.values
+    return values
+
+
+def get_category_mapping(values: pd.Series) -> Dict[str, int]:
+    return {category: i for i, category in enumerate(values.unique())}
+
+
+class Imputation:
+    """
+    An `Imputation` represents a learned function
+    f(`input_variables`) -> `output_variables`.
+    """
+
+    models: List["ManyToOneImputation"]
+    """Each column of the output variables is predicted by a separate model,
+    stored in this list."""
+    X_columns: List[str]
+    """The names of the input variables."""
+    Y_columns: List[str]
+    """The names of the output variables."""
+    X_category_mappings: List[Dict[str, int]] = None
+    """The mapping from category names to integers for each input variable."""
+    rf_rng_seed: int = None
+    """The random number generator seed used by RandomForestRegressor."""
+    beta_rng_seed: int = None
+    """Random number generator seed used to generate Beta variates."""
+
+    def encode_categories(self, X: pd.DataFrame) -> pd.DataFrame:
+        if self.X_category_mappings is None:
+            self.X_category_mappings = {
+                i: (
+                    get_category_mapping(X[column])
+                    if X[column].dtype == "object"
+                    else None
+                )
+                for i, column in enumerate(X.columns)
+            }
+        X = X.copy()
+        for i, column in enumerate(X.columns):
+            if self.X_category_mappings.get(i) is not None:
+                X[column] = X[column].map(self.X_category_mappings[i])
+                # If we got NaNs, raise an exception with the offending values.
+                if X[column].isna().any():
+                    emsg = (
+                        f"Column {column} has NaNs after encoding categories. "
+                        "Unknown values: "
+                        f"{X[column][X[column].isnull()].unique()}"
+                    )
+                    raise ValueError(emsg)
+        return X
+
+    def train(
+        self,
+        X: pd.DataFrame,
+        Y: pd.DataFrame,
+        num_trees: int = 100,
+        verbose: bool = False,
+        sample_weight: pd.Series = None,
+    ):
+        """
+        Train a random forest model to predict the output variables
+        from the input variables.
+
+        Args:
+            X (pd.DataFrame): The dataset containing the input variables.
+            Y (pd.DataFrame): The dataset containing the output variables.
+            num_trees (int): The number of trees to use in the random forest.
+            verbose (bool): Whether to print progress.
+        """
+
+        self.X_columns = X.columns
+        self.Y_columns = Y.columns
+
+        X = self.encode_categories(X)
+
+        self.models = []
+        # We train a separate model for each output variable.
+        # For example, if X = [income, age] and Y = [height, weight],
+        # we train two models:
+        # 1. Predict height from income and age.
+        # 2. Predict weight from income, age and (predicted) height.
+
+        if verbose:
+            iterator = tqdm(range(len(Y.columns)), desc="Training models")
+        else:
+            iterator = range(len(Y.columns))
+        for i in iterator:
+            Y_columns = Y.columns[:i]
+            if i == 0:
+                X_ = to_array(X)
+            else:
+                X_ = to_array(pd.concat([X, Y[Y_columns]], axis=1))
+            y_ = to_array(Y[Y.columns[i]])
+            model = ManyToOneImputation()
+            model.rf_rng_seed = self.rf_rng_seed
+            model.beta_rng_seed = self.beta_rng_seed
+            model.encode_categories = self.encode_categories
+            model.train(
+                X_, y_, num_trees=num_trees, sample_weight=sample_weight
+            )
+            self.models.append(model)
+
+    def predict(
+        self,
+        X: pd.DataFrame,
+        mean_quantile: float = 0.5,
+        verbose: bool = False,
+    ) -> pd.DataFrame:
+        """
+        Predict the output variables for the input dataset.
+
+        Args:
+            X (pd.DataFrame): The dataset to predict on.
+            mean_quantile (float): The beta parameter for the imputation.
+
+        Returns:
+            pd.DataFrame: The predicted dataset.
+        """
+
+        if isinstance(X, list):
+            X = pd.DataFrame(X, columns=self.X_columns)
+        X = pd.DataFrame(X, columns=self.X_columns)
+        X = to_array(self.encode_categories(X))
+        Y = np.zeros((X.shape[0], len(self.models)))
+        if verbose:
+            iterator = tqdm(enumerate(self.models), desc="Predicting")
+        else:
+            iterator = enumerate(self.models)
+        for i, model in iterator:
+            if isinstance(mean_quantile, list):
+                quantile = mean_quantile[i]
+            else:
+                quantile = mean_quantile
+            X_ = np.concatenate([X, Y[:, :i]], axis=1)
+            model.encode_categories = self.encode_categories
+            Y[:, i] = model.predict(X_, quantile)
+        return pd.DataFrame(Y, columns=self.Y_columns)
+
+    def save(self, path: str):
+        """
+        Save the imputation model to disk.
+
+        Args:
+            path (str): The path to save the model to.
+        """
+
+        import pickle
+
+        path = Path(path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, "wb") as f:
+            # Store the models only in a dictionary.
+            data = dict(
+                models=self.models,
+                X_columns=self.X_columns,
+                X_category_mappings=self.X_category_mappings,
+                Y_columns=self.Y_columns,
+            )
+            pickle.dump(data, f)
+
+    @staticmethod
+    def load(path: str) -> "Imputation":
+        """
+        Load the imputation model from disk.
+
+        Args:
+            path (str): The path to load the model from.
+
+        Returns:
+            Imputation: The imputation model.
+        """
+
+        import pickle
+
+        imputation = Imputation()
+        with open(path, "rb") as f:
+            data = pickle.load(f)
+            imputation.models = data["models"]
+            imputation.X_columns = data["X_columns"]
+            imputation.X_category_mappings = data["X_category_mappings"]
+            imputation.Y_columns = data["Y_columns"]
+            for model in imputation.models:
+                model.encode_categories = imputation.encode_categories
+                model.X_category_mappings = imputation.X_category_mappings
+        return imputation
+
+    def solve_for_mean_quantiles(
+        self,
+        targets: list,
+        input_data: pd.DataFrame,
+        weights: pd.Series,
+        max_iterations: int = 10,
+    ):
+        mean_quantiles = []
+        input_data = input_data.copy()
+        for i, model in enumerate(self.models):
+            print(f"Imputing {self.Y_columns[i]}...")
+            mean_quantiles.append(
+                model.solve_for_mean_quantile(
+                    target=targets[i],
+                    input_df=input_data,
+                    weights=weights,
+                    verbose=True,
+                    max_iterations=max_iterations,
+                )
+            )
+            predicted_column = model.predict(input_data, mean_quantiles[-1])
+            input_data[self.Y_columns[i]] = predicted_column
+        return mean_quantiles
+
+
+class ManyToOneImputation:
+    """
+    An `Imputation` consists of a set of `ManyToOneImputation` models,
+    one for each output variable.
+    """
+
+    model: RandomForestRegressor
+    """The random forest model."""
+    is_integer_coded: bool = False
+    """Whether the output variable is integer coded."""
+    rf_rng_seed: int = None
+    """Random number generator seed used by RandomForestRegressor."""
+    beta_rng_seed: int = None
+    """Random number generator seed used to generate Beta variates."""
+
+    def train(
+        self,
+        X: pd.DataFrame,
+        y: pd.Series,
+        sample_weight: pd.Series = None,
+        num_trees: int = 100,
+    ):
+        """
+        Train a random forest model to predict the output variable from
+        the input variables.
+
+        Args:
+            X (pd.DataFrame): The dataset containing the input variables.
+            y (pd.Series): The dataset containing the output variable.
+            sample_weight (pd.Series): The sample weights.
+        """
+
+        X = to_array(X)
+        y = to_array(y)
+        self.model = RandomForestRegressor(
+            n_estimators=num_trees,
+            bootstrap=True,
+            max_samples=0.01,
+            random_state=self.rf_rng_seed,
+        )
+        try:
+            self.is_integer_coded = (
+                isinstance(y[0], str) or (y - y.round()).mean() < 1e-3
+            )
+        except Exception as e:
+            pass
+        self.model.fit(X, y, sample_weight=sample_weight)
+
+    def predict(
+        self,
+        X: pd.DataFrame,
+        mean_quantile: float = 0.5,
+    ) -> pd.DataFrame:
+        """
+        Predict the output variable for the input dataset.
+
+        Args:
+            X (pd.DataFrame): dataset to predict on.
+            mean_quantile (float): mean quantile under the Beta distribution.
+
+        Returns:
+            pd.Series: The predicted distribution of values for each input row.
+        """
+        if isinstance(X, pd.DataFrame) and any(
+            [X[column].dtype == "O" for column in X.columns]
+        ):
+            X = self.encode_categories(X)
+        X = to_array(X)
+        tree_predictions = [tree.predict(X) for tree in self.model.estimators_]
+
+        # Get the percentiles of the predictions.
+        tree_predictions = np.array(tree_predictions).transpose()
+        if mean_quantile is None:
+            mean_quantile = 0.5
+        a = mean_quantile / (1 - mean_quantile)
+        random_generator = np.random.default_rng(seed=self.beta_rng_seed)
+        input_quantiles = random_generator.beta(
+            a, 1, size=tree_predictions.shape[0]
+        )
+        x = np.apply_along_axis(
+            lambda x: np.percentile(
+                x[1:], x[0], interpolation="nearest"
+            ),  # Privacy??
+            1,
+            np.concatenate(
+                [
+                    np.array(input_quantiles)[:, np.newaxis] * 100,
+                    tree_predictions,
+                ],
+                axis=1,
+            ),
+        )
+
+        if self.is_integer_coded:
+            x = np.round(x).astype(int)
+
+        return x
+
+    def solve_for_mean_quantile(
+        self,
+        target: float,
+        input_df: pd.DataFrame,
+        weights: np.ndarray,
+        max_iterations: int = 10,
+        verbose: bool = False,
+    ):
+        """
+        Solve for the mean quantile that produces the target value.
+
+        Args:
+            target (float): The target value.
+            input_df (pd.DataFrame): The input dataset.
+            weights (np.ndarray): The sample weights.
+            max_iterations (int, optional): The maximum number of iterations.
+                                            Defaults to 5.
+            verbose (bool, optional): Whether to print the loss at each
+                                      iteration. Defaults to False.
+
+        Returns:
+            float: The mean quantile.
+        """
+
+        def loss(mean_quantile):
+            pred_values = self.predict(input_df, mean_quantile)
+            pred_aggregate = (pred_values * weights).sum()
+            msg = (
+                f"PREDICTED: {pred_aggregate/1e9:.1f} "
+                f"(target: {target/1e9:.1f})"
+            )
+            print(msg)
+            return (pred_aggregate - target) ** 2, pred_aggregate
+
+        best_loss = float("inf")
+        min_quantile = 0
+        max_quantile = 1
+
+        # Binary search for the mean quantile.
+        for i in range(max_iterations):
+            mean_quantile = (min_quantile + max_quantile) / 2
+            loss_value, pred_agg = loss(mean_quantile)
+            if verbose:
+                msg = (
+                    f"Iteration {i}: {mean_quantile:.4f} "
+                    f"(loss: {loss_value:.4f})"
+                )
+                print(msg)
+            if loss_value < best_loss:
+                best_loss = loss_value
+            if pred_agg < target:
+                min_quantile = mean_quantile
+            else:
+                max_quantile = mean_quantile
+        return mean_quantile
diff --git a/tax_microdata_benchmarking/utils/pension_contributions.py b/tax_microdata_benchmarking/utils/pension_contributions.py
index a081f784..94c0b9ff 100644
--- a/tax_microdata_benchmarking/utils/pension_contributions.py
+++ b/tax_microdata_benchmarking/utils/pension_contributions.py
@@ -1,6 +1,11 @@
-from survey_enhance import Imputation
+import numpy as np
 from policyengine_us import Microsimulation
 from tax_microdata_benchmarking.datasets.cps import CPS_2021
+from tax_microdata_benchmarking.utils.imputation import Imputation
+from tax_microdata_benchmarking.imputation_assumptions import (
+    IMPUTATION_RF_RNG_SEED,
+    IMPUTATION_BETA_RNG_SEED,
+)
 
 
 def impute_pension_contributions_to_puf(puf_df):
@@ -11,9 +16,14 @@ def impute_pension_contributions_to_puf(puf_df):
     )
 
     pension_contributions = Imputation()
+    pension_contributions.rf_rng_seed = IMPUTATION_RF_RNG_SEED
+    pension_contributions.beta_rng_seed = IMPUTATION_BETA_RNG_SEED
+
     pension_contributions.train(
         X=cps_df[["employment_income"]],
         Y=cps_df[["pre_tax_contributions"]],
         sample_weight=cps_df["household_weight"],
     )
-    return pension_contributions.predict(X=puf_df[["employment_income"]])
+    return pension_contributions.predict(
+        X=puf_df[["employment_income"]],
+    )
diff --git a/tax_microdata_benchmarking/utils/qbi.py b/tax_microdata_benchmarking/utils/qbi.py
index d6215916..b156acdd 100644
--- a/tax_microdata_benchmarking/utils/qbi.py
+++ b/tax_microdata_benchmarking/utils/qbi.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import pandas as pd
-from scipy.optimize import minimize, bisect
+from scipy.optimize import bisect
 import taxcalc as tc
 
 
@@ -21,7 +21,7 @@ def add_pt_w2_wages(df, verbose: bool = True):
           pt_w2_wages_scale: rounded to five decimal digits
     """
     if verbose:
-        print("Finding scale to use in imputing pass-through W-2 wages")
+        print("Finding scale to use in imputing pass-through W-2 wages...")
     QBID_TOTAL = 205.8  # from IRS SOI P4801 tabulations of 2021 data (in $B)
     qbi = np.maximum(0, df.e00900 + df.e26270 + df.e02100 + df.e27200)
 
@@ -43,10 +43,10 @@ def deduction_deviation(scale):
         qbided = (sim.array("qbided") * df.s006).sum() / 1e9
         dev = qbided - QBID_TOTAL
         if verbose:
-            print(f"scale: {scale:8.6f}, dev: {dev:6.2f}, tot: {qbided:.2f}")
+            print(f"scale: {scale:8.6f}, dev: {dev:7.3f}, tot: {qbided:.3f}")
         return dev
 
-    scale = bisect(deduction_deviation, 0.1, 0.5, rtol=0.001)
+    scale = bisect(deduction_deviation, 0.1, 0.5, rtol=0.0001)
     rounded_scale = round(scale, 5)
     print(f"Final (rounded) scale: {rounded_scale}")
     df["PT_binc_w2_wages"] = qbi * rounded_scale
diff --git a/tax_microdata_benchmarking/utils/reweight.py b/tax_microdata_benchmarking/utils/reweight.py
index 762e93fb..0b249702 100644
--- a/tax_microdata_benchmarking/utils/reweight.py
+++ b/tax_microdata_benchmarking/utils/reweight.py
@@ -60,6 +60,7 @@ def reweight(
 
     if time_period not in targets.year.unique():
         raise ValueError(f"Year {time_period} not in targets.")
+    print(f"...reweighting for year {time_period}")
 
     def build_loss_matrix(df):
         loss_matrix = pd.DataFrame()
@@ -158,7 +159,7 @@ def build_loss_matrix(df):
                 i,
             )
 
-    print("Finished optimisation")
+    print("...reweighting finished")
 
     flat_file["s006"] = weights.detach().numpy()
     return flat_file