Skip to content

Commit

Permalink
Merge pull request #199 from PSLmodels/delta-loop
Browse files Browse the repository at this point in the history
Add regularization loop to create_area_weights.py
  • Loading branch information
martinholmer authored Sep 15, 2024
2 parents 02e5b86 + 0938fae commit e32487a
Showing 1 changed file with 74 additions and 31 deletions.
105 changes: 74 additions & 31 deletions tmd/areas/create_area_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,21 @@
GFFILE_PATH = STORAGE_FOLDER / "output" / "tmd_growfactors.csv"
POPFILE_PATH = STORAGE_FOLDER / "input" / "cbo_population_forecast.yaml"

REGULARIZATION_DELTA = 1.0e-9
# target parameters:
TARGET_RATIO_TOLERANCE = 0.0005 # what is considered hitting the target
DUMP_ALL_TARGET_DEVIATIONS = False # set to True only for diagnostic work

# regularization parameters:
DELTA_INIT_VALUE = 1.0e-9
DELTA_MAX_LOOPS = 5
DELTA_LOOP_DECREMENT = DELTA_INIT_VALUE / (DELTA_MAX_LOOPS - 1)

# optimization parameters:
OPTIMIZE_FTOL = 1e-8
OPTIMIZE_GTOL = 1e-8
OPTIMIZE_MAXITER = 5000
OPTIMIZE_IPRINT = 0 # 20 is a good diagnostic value; set to 0 for production
OPTIMIZE_RESULTS = False # set to True to see complete optimization results
DUMP_ALL_TARGET_DEVIATIONS = False # set to True only for diagnostic work


def all_taxcalc_variables():
Expand Down Expand Up @@ -130,7 +138,18 @@ def prepared_data(area: str, vardf: pd.DataFrame):
)


def target_rmse(wght, target_matrix, target_array):
def target_misses(wght, target_matrix, target_array):
"""
Return number of target misses for the specified weight array.
"""
actual = np.dot(wght, target_matrix)
tratio = actual / target_array
lob = 1.0 - TARGET_RATIO_TOLERANCE
hib = 1.0 + TARGET_RATIO_TOLERANCE
return ((tratio < lob) | (tratio >= hib)).sum()


def target_rmse(wght, target_matrix, target_array, delta=None):
"""
Return RMSE of the target deviations given specified arguments.
"""
Expand All @@ -150,8 +169,8 @@ def target_rmse(wght, target_matrix, target_array):
0.8,
0.9,
0.99,
0.9995,
1.0005,
1.0 - TARGET_RATIO_TOLERANCE,
1.0 + TARGET_RATIO_TOLERANCE,
1.01,
1.1,
1.2,
Expand All @@ -164,7 +183,8 @@ def target_rmse(wght, target_matrix, target_array):
]
tot = ratio.size
print(f"DISTRIBUTION OF TARGET ACT/EXP RATIOS (n={tot}):")
print(f" with REGULARIZATION_DELTA= {REGULARIZATION_DELTA:e}")
if delta is not None:
print(f" with REGULARIZATION_DELTA= {delta:e}")
header = (
"low bin ratio high bin ratio"
" bin # cum # bin % cum %"
Expand Down Expand Up @@ -203,7 +223,7 @@ def objective_function(x, *args):
JIT_FVAL_AND_GRAD = jax.jit(jax.value_and_grad(objective_function))


def weight_ratio_distribution(ratio):
def weight_ratio_distribution(ratio, delta):
"""
Print distribution of post-optimized to pre-optimized weight ratios.
"""
Expand Down Expand Up @@ -233,7 +253,7 @@ def weight_ratio_distribution(ratio):
]
tot = ratio.size
print(f"DISTRIBUTION OF AREA/US WEIGHT RATIO (n={tot}):")
print(f" with REGULARIZATION_DELTA= {REGULARIZATION_DELTA:e}")
print(f" with REGULARIZATION_DELTA= {delta:e}")
header = (
"low bin ratio high bin ratio"
" bin # cum # bin % cum %"
Expand Down Expand Up @@ -308,39 +328,62 @@ def create_area_weights_file(area: str, write_file: bool = True):
A = BCOO.from_scipy_sparse(csr_matrix(A_dense)) # A is JAX sparse matrix
b = target_array
print(
f"OPTIMIZE_WEIGHT_RATIOS: target_matrix.shape= {target_matrix.shape}\n"
f"REGULARIZATION_DELTA= {REGULARIZATION_DELTA:e}"
)
time0 = time.time()
res = minimize(
fun=JIT_FVAL_AND_GRAD, # objective function and its gradient
x0=np.ones(num_weights), # initial guess for weight ratios
jac=True, # use gradient from JIT_FVAL_AND_GRAD function
args=(A, b, REGULARIZATION_DELTA), # fixed arguments of objective func
method="L-BFGS-B", # use L-BFGS-B algorithm
bounds=Bounds(0.0, np.inf), # consider only non-negative weight ratios
options={
"maxiter": OPTIMIZE_MAXITER,
"ftol": OPTIMIZE_FTOL,
"gtol": OPTIMIZE_GTOL,
"iprint": OPTIMIZE_IPRINT,
"disp": OPTIMIZE_IPRINT != 0,
},
"OPTIMIZE WEIGHT RATIOS IN A REGULARIZATION LOOP\n"
f" where REGULARIZATION DELTA starts at {DELTA_INIT_VALUE:e}\n"
f" and where target_matrix.shape= {target_matrix.shape}"
)
time1 = time.time()
# ... reduce value of regularization delta if not all targets are hit
loop = 1
delta = DELTA_INIT_VALUE
wght0 = np.ones(num_weights)
while loop <= DELTA_MAX_LOOPS:
time0 = time.time()
res = minimize(
fun=JIT_FVAL_AND_GRAD, # objective function and its gradient
x0=wght0, # initial guess for weight ratios
jac=True, # use gradient from JIT_FVAL_AND_GRAD function
args=(A, b, delta), # fixed arguments of objective function
method="L-BFGS-B", # use L-BFGS-B algorithm
bounds=Bounds(0.0, np.inf), # consider only non-negative weights
options={
"maxiter": OPTIMIZE_MAXITER,
"ftol": OPTIMIZE_FTOL,
"gtol": OPTIMIZE_GTOL,
"iprint": OPTIMIZE_IPRINT,
"disp": OPTIMIZE_IPRINT != 0,
},
)
time1 = time.time()
wght_area = res.x * wght_us
misses = target_misses(wght_area, target_matrix, target_array)
print(
f" ::loop,delta,misses,exectime(secs): {loop}"
f" {delta:e} {misses} {(time1 - time0):.1f}"
)
if misses == 0 or res.success is False:
break # out of regularization delta loop
# prepare for next regularization delta loop
loop += 1
delta -= DELTA_LOOP_DECREMENT
if delta < 1e-20:
delta = 0.0
####wght0 = res.x
# ... show regularization/optimization results
res_summary = (
f">>> optimization execution time: {(time1-time0):.1f} secs"
f">>> final delta loop exectime= {(time1-time0):.1f} secs"
f" iterations={res.nit} success={res.success}\n"
f">>> message: {res.message}\n"
f">>> L-BFGS-B optimized objective function value: {res.fun:.9e}"
)
print(res_summary)
if OPTIMIZE_RESULTS:
print(">>> full optimization results:\n", res)
print(">>> full final delta loop optimization results:\n", res)
wght_area = res.x * wght_us
rmse = target_rmse(wght_area, target_matrix, target_array)
misses = target_misses(wght_area, target_matrix, target_array)
print(f"AREA-OPTIMIZED_TARGET_MISSES= {misses}")
rmse = target_rmse(wght_area, target_matrix, target_array, delta)
print(f"AREA-OPTIMIZED_TARGET_RMSE= {rmse:.9e}")
weight_ratio_distribution(res.x)
weight_ratio_distribution(res.x, delta)

if not write_file:
return rmse
Expand Down

0 comments on commit e32487a

Please sign in to comment.