Skip to content

Commit

Permalink
removed the upload function; added jupyter notebook to replicate appe…
Browse files Browse the repository at this point in the history
…ndix figures
  • Loading branch information
Vedant Nanda committed May 14, 2019
1 parent c5eb2f7 commit 2d221f7
Show file tree
Hide file tree
Showing 12 changed files with 1,960 additions and 65 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,6 @@ Code for preprocessing the [Student Performance Dataset](http://archive.ics.uci.


** To run Fairness Constraints you need the file ``trained_linregfc_StudentPerf.mat`` in the ``./effort_reward_fairness`` directory. For easy reproduction of results we have included this file, however this is generated using MATLAB code in the directory ``./Fairness_constraints`` and can be found in the directory ``./Fairness_constraints/Output`` after you run ``./Fairness_constraints/Social_welfare_constrained_ERM_regularized.m``.


Caution: This is a work in progress, some TODOs (particularly in group_explanations.py need to be carefully looked at).
4 changes: 2 additions & 2 deletions effort_reward_fairness/act_exp_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ def plot_one_var_vs_other(res_dir, model, x_vals, y_vals_sens, y_vals_nosens, x_
plt.rcParams['axes.linewidth'] = 3
plt.rcParams['xtick.labelsize'] = 18
plt.rcParams['ytick.labelsize'] = 18
plt.rcParams['legend.fontsize'] = 18
plt.rcParams['legend.fontsize'] = 22
plt.rcParams['figure.titlesize'] = 28
plt.rcParams['lines.linewidth'] = 3.0

Expand Down Expand Up @@ -790,7 +790,7 @@ def plot_one_var_vs_other_together(res_dir, sens_dict, nosens_dict, x_label, y_l
plt.rcParams['axes.linewidth'] = 3
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 16
plt.rcParams['legend.fontsize'] = 8
plt.rcParams['legend.fontsize'] = 10
plt.rcParams['figure.titlesize'] = 28
plt.rcParams['lines.linewidth'] = 3.0

Expand Down
88 changes: 44 additions & 44 deletions effort_reward_fairness/cost_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,37 +482,37 @@ def get_pre_proc_credit_default_cost_funcs(feature_info, data, sens_group=None,
STUDENT_PERF_DIRS = {
'school': (DIR_BOTH, EXP_LOWER),
'sex': (DIR_IMMUT, None),
# 'age': (DIR_IMMUT, None),
'age': (DIR_IMMUT, None),
'address': (DIR_BOTH, EXP_HIGHER),
# 'famsize': (DIR_IMMUT, None),
# 'Pstatus': (DIR_IMMUT, None),
'famsize': (DIR_IMMUT, None),
'Pstatus': (DIR_IMMUT, None),
'Medu': (DIR_UP, EXP_HIGHER),
'Fedu': (DIR_UP, EXP_HIGHER),
# 'Mjob_at_home': (DIR_IMMUT, None),
# 'Mjob_health': (DIR_IMMUT, None),
# 'Mjob_other': (DIR_IMMUT, None),
# 'Mjob_services': (DIR_IMMUT, None),
# 'Mjob_teacher': (DIR_IMMUT, None),
# 'Fjob_at_home': (DIR_IMMUT, None),
# 'Fjob_health': (DIR_IMMUT, None),
# 'Fjob_other': (DIR_IMMUT, None),
# 'Fjob_services': (DIR_IMMUT, None),
# 'Fjob_teacher': (DIR_IMMUT, None),
# 'reason_course': (DIR_IMMUT, None),
# 'reason_home': (DIR_IMMUT, None),
# 'reason_other': (DIR_IMMUT, None),
# 'reason_reputation': (DIR_IMMUT, None),
# 'guardian_father': (DIR_IMMUT, None),
# 'guardian_mother': (DIR_IMMUT, None),
# 'guardian_other': (DIR_IMMUT, None),
'Mjob_at_home': (DIR_IMMUT, None),
'Mjob_health': (DIR_IMMUT, None),
'Mjob_other': (DIR_IMMUT, None),
'Mjob_services': (DIR_IMMUT, None),
'Mjob_teacher': (DIR_IMMUT, None),
'Fjob_at_home': (DIR_IMMUT, None),
'Fjob_health': (DIR_IMMUT, None),
'Fjob_other': (DIR_IMMUT, None),
'Fjob_services': (DIR_IMMUT, None),
'Fjob_teacher': (DIR_IMMUT, None),
'reason_course': (DIR_IMMUT, None),
'reason_home': (DIR_IMMUT, None),
'reason_other': (DIR_IMMUT, None),
'reason_reputation': (DIR_IMMUT, None),
'guardian_father': (DIR_IMMUT, None),
'guardian_mother': (DIR_IMMUT, None),
'guardian_other': (DIR_IMMUT, None),
'traveltime': (DIR_BOTH, EXP_HIGHER),
'studytime': (DIR_BOTH, EXP_HIGHER),
# 'failures': (DIR_IMMUT, None), # depends on past, can't change it
'failures': (DIR_IMMUT, None), # depends on past, can't change it
'schoolsup': (DIR_BOTH, EXP_HIGHER),
'famsup': (DIR_BOTH, EXP_HIGHER),
'paid': (DIR_BOTH, EXP_HIGHER),
'activities': (DIR_BOTH, EXP_HIGHER),
# 'nursery': (DIR_IMMUT, None), # depends on history, can't change it
'nursery': (DIR_IMMUT, None), # depends on history, can't change it
'higher': (DIR_BOTH, EXP_HIGHER),
'internet': (DIR_BOTH, EXP_HIGHER),
'romantic': (DIR_BOTH, EXP_HIGHER),
Expand All @@ -530,37 +530,37 @@ def get_pre_proc_credit_default_cost_funcs(feature_info, data, sens_group=None,
STUDENT_PERF_DIRS_REV = {
'school': (DIR_BOTH, EXP_LOWER),
'sex': (DIR_IMMUT, None),
# 'age': (DIR_IMMUT, None),
'age': (DIR_IMMUT, None),
'address': (DIR_BOTH, EXP_LOWER),
# 'famsize': (DIR_IMMUT, None),
# 'Pstatus': (DIR_IMMUT, None),
'famsize': (DIR_IMMUT, None),
'Pstatus': (DIR_IMMUT, None),
'Medu': (DIR_UP, EXP_HIGHER),
'Fedu': (DIR_UP, EXP_HIGHER),
# 'Mjob_at_home': (DIR_IMMUT, None),
# 'Mjob_health': (DIR_IMMUT, None),
# 'Mjob_other': (DIR_IMMUT, None),
# 'Mjob_services': (DIR_IMMUT, None),
# 'Mjob_teacher': (DIR_IMMUT, None),
# 'Fjob_at_home': (DIR_IMMUT, None),
# 'Fjob_health': (DIR_IMMUT, None),
# 'Fjob_other': (DIR_IMMUT, None),
# 'Fjob_services': (DIR_IMMUT, None),
# 'Fjob_teacher': (DIR_IMMUT, None),
# 'reason_course': (DIR_IMMUT, None),
# 'reason_home': (DIR_IMMUT, None),
# 'reason_other': (DIR_IMMUT, None),
# 'reason_reputation': (DIR_IMMUT, None),
# 'guardian_father': (DIR_IMMUT, None),
# 'guardian_mother': (DIR_IMMUT, None),
# 'guardian_other': (DIR_IMMUT, None),
'Mjob_at_home': (DIR_IMMUT, None),
'Mjob_health': (DIR_IMMUT, None),
'Mjob_other': (DIR_IMMUT, None),
'Mjob_services': (DIR_IMMUT, None),
'Mjob_teacher': (DIR_IMMUT, None),
'Fjob_at_home': (DIR_IMMUT, None),
'Fjob_health': (DIR_IMMUT, None),
'Fjob_other': (DIR_IMMUT, None),
'Fjob_services': (DIR_IMMUT, None),
'Fjob_teacher': (DIR_IMMUT, None),
'reason_course': (DIR_IMMUT, None),
'reason_home': (DIR_IMMUT, None),
'reason_other': (DIR_IMMUT, None),
'reason_reputation': (DIR_IMMUT, None),
'guardian_father': (DIR_IMMUT, None),
'guardian_mother': (DIR_IMMUT, None),
'guardian_other': (DIR_IMMUT, None),
'traveltime': (DIR_BOTH, EXP_LOWER),
'studytime': (DIR_BOTH, EXP_LOWER),
# 'failures': (DIR_IMMUT, None), # depends on past, can't change it
'failures': (DIR_IMMUT, None), # depends on past, can't change it
'schoolsup': (DIR_BOTH, EXP_LOWER),
'famsup': (DIR_BOTH, EXP_LOWER),
'paid': (DIR_BOTH, EXP_LOWER),
'activities': (DIR_BOTH, EXP_LOWER),
# 'nursery': (DIR_IMMUT, None), # depends on history, can't change it
'nursery': (DIR_IMMUT, None), # depends on history, can't change it
'higher': (DIR_BOTH, EXP_LOWER),
'internet': (DIR_BOTH, EXP_LOWER),
'romantic': (DIR_BOTH, EXP_LOWER),
Expand Down
18 changes: 12 additions & 6 deletions effort_reward_fairness/effort_reward_function_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def run(self, test_or_train):
y_train_pred = model.predict(self.role_model_users).astype(bool if exp.dataset_info[self.dataset]['prediction_task'] == exp.CLASSIFICATION else float)

print ("Model: {}, MAE: {}, MSE: {}".format(model, mean_absolute_error(self.users_gt, y_test_pred), mean_squared_error(self.users_gt, y_test_pred)))
continue
# continue

self.role_model_users_pred = y_train_pred # This should not change
self.users_preds = y_test_pred if test_or_train == 'test' else y_train_pred # change this based on which group's explanations are needed (test or train)
Expand Down Expand Up @@ -341,7 +341,7 @@ def run(self, test_or_train):
assert role_model_utility == role_model_reward - role_model_effort
sens_rewards.append(role_model_reward)
print ("[Sens] Model: {}, Effort threshold: {}, Effort value: {}, Max Reward: {}".format(model, delta,role_model_effort, role_model_reward))
break
# break
# role_model, role_model_effort, role_model_reward, role_model_utility = \
# self.sampling_based_explanations(
# user,
Expand Down Expand Up @@ -409,7 +409,7 @@ def run(self, test_or_train):
assert role_model_utility == role_model_reward - role_model_effort
nosens_rewards.append(role_model_reward)
print ("[Nosens] Model: {}, Effort threshold: {}, Effort value: {}, Max Reward: {}".format(model, delta, role_model_effort, role_model_reward))
break
# break
# role_model, role_model_effort, role_model_reward, role_model_utility = \
# self.sampling_based_explanations(
# user,
Expand Down Expand Up @@ -556,15 +556,21 @@ def run(self, test_or_train):
aeio.plot_one_var_vs_other(self.res_dir, model, self.effort_deltas, sens_reward_with_effort, nosens_reward_with_effort, 'Effort', 'Average Reward'),
aeio.plot_one_var_vs_other(self.res_dir, model, self.reward_deltas, sens_effort_with_reward, nosens_effort_with_reward, 'Reward', 'Average Effort')
))
# model_to_utility_sens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_utility_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_sens_fc.pkl')
# model_to_utility_nosens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_utility_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_nosens_fc.pkl')
# model_to_reward_sens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_reward_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_reward_sens_fc.pkl')
# model_to_reward_nosens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_reward_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_reward_nosens_fc.pkl')
# model_to_effort_nosens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_effort_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_effort_nosens_fc.pkl')
# model_to_effort_sens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_effort_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_effort_sens_fc.pkl')
with open(self.res_file_path, 'a') as res_file:
res_file.write("== All Models in One ==\n\n".format(str(model)))
res_file.write("== All Models in One ==\n\n")
res_file.write("{}\n\n{}\n\n{}\n\n".format(
aeio.plot_one_var_vs_other_together(self.res_dir, model_to_utility_sens, model_to_utility_nosens, 'Effort', 'Average Utility'),
aeio.plot_one_var_vs_other_together(self.res_dir, model_to_reward_sens, model_to_reward_nosens, 'Effort', 'Average Reward'),
aeio.plot_one_var_vs_other_together(self.res_dir, model_to_effort_sens, model_to_effort_nosens, 'Reward', 'Average Effort')
))
out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png')
out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
# out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png')
# out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
out.create_dir(self.res_dir + '/plots_pickled_data')
joblib.dump(model_to_utility_sens, self.res_dir + '/plots_pickled_data/model_to_utility_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_sens_fc.pkl')
joblib.dump(model_to_utility_nosens, self.res_dir + '/plots_pickled_data/model_to_utility_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_nosens_fc.pkl')
Expand Down
23 changes: 17 additions & 6 deletions effort_reward_fairness/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

CLASSIFICATION = 'classification'
REGRESSION = 'regression'
# Only set this to True if you have trained a fairness constraints model for the particular dataset you are evaluating and have put the weights file (.mat) in the root folder
FAIRNESS_CONSTRAINTS = False

dataset_info = {
Expand All @@ -32,6 +33,10 @@
'variable_constraints': cf.PRE_PROC_CREDIT_DEFAULT_DIRS, 'prediction_task': CLASSIFICATION},
'StudentPerf': {'cost_funcs': cf.get_student_perf_cost_funcs, 'sens_f': ('0', '1', 'sex_Male'),
'variable_constraints': cf.STUDENT_PERF_DIRS, 'variable_constraints_rev': cf.STUDENT_PERF_DIRS_REV, 'prediction_task': REGRESSION},
'StudentPerfMut': {'cost_funcs': cf.get_student_perf_cost_funcs, 'sens_f': ('0', '1', 'sex_Male'),
'variable_constraints': cf.STUDENT_PERF_DIRS, 'variable_constraints_rev': cf.STUDENT_PERF_DIRS_REV, 'prediction_task': REGRESSION},
'StudentPerfMutPlusImmut': {'cost_funcs': cf.get_student_perf_cost_funcs, 'sens_f': ('0', '1', 'sex_Male'),
'variable_constraints': cf.STUDENT_PERF_DIRS, 'variable_constraints_rev': cf.STUDENT_PERF_DIRS_REV, 'prediction_task': REGRESSION},
'CrimesCommunities': {'cost_funcs': cf.get_student_perf_cost_funcs, 'sens_f': ('0', '1', 'MajorityRaceWhite'),
'variable_constraints': cf.CRIMES_DIRS, 'variable_constraints_rev': cf.CRIMES_DIRS_REV, 'prediction_task': REGRESSION}
}
Expand All @@ -43,6 +48,8 @@ def base_exp(return_vars=False, test_or_train=None):
#dataset = "CreditDefault"
#dataset = "PreprocCreditCardDefault"
dataset = "StudentPerf"
# dataset = "StudentPerfMut"
# dataset = "StudentPerfMutPlusImmut"
# dataset = "CrimesCommunities"
if FAIRNESS_CONSTRAINTS:
models = [lm.LinRegFC(11.9, dataset), lm.LinRegFC(12.9, dataset), lm.LinRegFC(13.9, dataset), lm.LinRegFC(14.9, dataset)]
Expand All @@ -51,6 +58,10 @@ def base_exp(return_vars=False, test_or_train=None):
models = [lm.LogReg(), lm.DT(), lm.SVM(), lm.NN()]
elif dataset_info[dataset]['prediction_task'] == REGRESSION:
models = [lm.LinReg(), lm.NNReg(), lm.DTReg()]
if dataset == "StudentPerfMut":
models = [lm.RidgeReg(0.1)]
elif dataset == "StudentPerfMutPlusImmut":
models = [lm.RidgeReg(200)]
if return_vars:
return dataset, models
evaluate_models(dataset, models, test_or_train)
Expand Down Expand Up @@ -358,12 +369,12 @@ def evaluate_models(dataset, models, test_or_train, subsample_size=None, num_inv

if len(disparity_table_heading) <= 1:
heading, formats, values = eval_formula.get_disparity_measures(users_gt, users_preds, users_sens_group,
np.mean(user_utility_sens), np.mean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=True)
np.nanmean(user_utility_sens), np.nanmean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=True)
disparity_table_heading += heading
disparity_table_formats += formats
else:
values = eval_formula.get_disparity_measures(users_gt, users_preds, users_sens_group,
np.mean(user_utility_sens), np.mean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=False)
np.nanmean(user_utility_sens), np.nanmean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=False)
disparity_table_values.append([str(clf)] + values)

with open(group_res_file_path, 'a') as group_res_file:
Expand Down Expand Up @@ -393,10 +404,10 @@ def evaluate_models(dataset, models, test_or_train, subsample_size=None, num_inv
# group_res_file.write("== IGNORE STUFF BELOW THIS FOR NOW ==\n\n")
# group_res_file.write("=== Group Explanations ===\n\n")

out.upload_results([res_dir], 'results', aeio.SERVER_PROJECT_PATH, '.png')
out.upload_results([res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png')
out.upload_results([res_dir], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
out.upload_results([res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
# out.upload_results([res_dir], 'results', aeio.SERVER_PROJECT_PATH, '.png')
# out.upload_results([res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png')
# out.upload_results([res_dir], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
# out.upload_results([res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')

def get_dataset_statistics_temp(y, sens_group, prediction_task):
if prediction_task == REGRESSION:
Expand Down
6 changes: 4 additions & 2 deletions effort_reward_fairness/group_explanations.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,12 @@ def get_possible_role_models(self, user, user_ground_truth, user_predicted_val,
## Keeping track of focal points/anchors
indices_of_users = np.array(list(zip(*enumerate(users)))[0])
# Forcing role models to have Higher ground truth label than that of user
mask_higher_gt = np.where(users_predicted_val >= user_predicted_val)[0]
mask_higher_gt = np.where(users_predicted_val >= user_predicted_val)[0] # TODO: In some cases > makes more sense here
users = users[mask_higher_gt]
users_predicted_val = users_predicted_val[mask_higher_gt]
users_ground_truth = users_ground_truth[mask_higher_gt]
indices_of_users = indices_of_users[mask_higher_gt]
assert np.all(users_predicted_val >= user_predicted_val)
assert np.all(users_predicted_val >= user_predicted_val) # TODO: In some cases > makes more sense here

# #### TODO: dirty, make it nicer
if self.dataset != 'CrimesCommunities':
Expand Down Expand Up @@ -197,6 +197,7 @@ def sampling_based_explanations(self, user, users, users_ground_truth, users_pre
right_users, corresponding_gt, corresponding_preds, indices_of_users = self.get_possible_role_models(user.flatten(), user_ground_truth, user_predicted_val,
users, users_ground_truth, users_predicted_val)
if right_users.shape[0] == 0:
# TODO: This should return np.nan
return user[0], 0, 0, user_ground_truth == False and user_predicted_val == True, user_ground_truth, np.nan
efforts = pairwise_distances(user, right_users, metric=self.effort_measure, n_jobs=-1).flatten()
utilities = cf.compute_utility(corresponding_gt, corresponding_preds, np.array([user_ground_truth] * len(right_users)),
Expand All @@ -208,6 +209,7 @@ def sampling_based_explanations(self, user, users, users_ground_truth, users_pre
role_model_utility = utilities[idx]
role_model_effort = efforts[idx]
anchor = indices_of_users[idx]
print (role_model_utility)
if return_only_user:
# used for effort_reward_function_plots.py
return (role_model, role_model_gt, role_model_pred)
Expand Down
4 changes: 2 additions & 2 deletions effort_reward_fairness/learning_env/dec_rule_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def load_data(self, seed=4194, feature_engineering=False):
data['attr_info'] = f_info
elif self.ds_name == 'PreprocCreditCardDefault':
data = credit_card_default_data.load_preproc_credit_card_default_data(normalize_cont_features=False)
elif self.ds_name == 'StudentPerf':
data = load_student_perf_data.load_student_perf_data()
elif self.ds_name == 'StudentPerf' or self.ds_name == "StudentPerfMutPlusImmut" or self.ds_name == "StudentPerfMut":
data = load_student_perf_data.load_student_perf_data(self.ds_name)
elif self.ds_name == 'CrimesCommunities':
data = load_crimes_and_communities.load_crimes_and_communities()
else:
Expand Down
Loading

0 comments on commit 2d221f7

Please sign in to comment.