removed the upload function; added jupyter notebook to replicate appe…

…ndix figures
nvedant07 · May 14, 2019 · 2d221f7 · 2d221f7
1 parent c5eb2f7
commit 2d221f7
Show file tree

Hide file tree

Showing 12 changed files with 1,960 additions and 65 deletions.
diff --git a/README.md b/README.md
@@ -65,3 +65,6 @@ Code for preprocessing the [Student Performance Dataset](http://archive.ics.uci.
 
 
 ** To run Fairness Constraints you need the file ``trained_linregfc_StudentPerf.mat`` in the ``./effort_reward_fairness`` directory. For easy reproduction of results we have included this file, however this is generated using MATLAB code in the directory ``./Fairness_constraints`` and can be found in the directory ``./Fairness_constraints/Output`` after you run ``./Fairness_constraints/Social_welfare_constrained_ERM_regularized.m``.
+
+
+Caution: This is a work in progress, some TODOs (particularly in group_explanations.py need to be carefully looked at).
diff --git a/effort_reward_fairness/act_exp_io.py b/effort_reward_fairness/act_exp_io.py
@@ -750,7 +750,7 @@ def plot_one_var_vs_other(res_dir, model, x_vals, y_vals_sens, y_vals_nosens, x_
     plt.rcParams['axes.linewidth'] = 3
     plt.rcParams['xtick.labelsize'] = 18
     plt.rcParams['ytick.labelsize'] = 18
-    plt.rcParams['legend.fontsize'] = 18
+    plt.rcParams['legend.fontsize'] = 22
     plt.rcParams['figure.titlesize'] = 28
     plt.rcParams['lines.linewidth'] = 3.0
 
@@ -790,7 +790,7 @@ def plot_one_var_vs_other_together(res_dir, sens_dict, nosens_dict, x_label, y_l
     plt.rcParams['axes.linewidth'] = 3
     plt.rcParams['xtick.labelsize'] = 16
     plt.rcParams['ytick.labelsize'] = 16
-    plt.rcParams['legend.fontsize'] = 8
+    plt.rcParams['legend.fontsize'] = 10
     plt.rcParams['figure.titlesize'] = 28
     plt.rcParams['lines.linewidth'] = 3.0
 

diff --git a/effort_reward_fairness/cost_funcs.py b/effort_reward_fairness/cost_funcs.py
@@ -482,37 +482,37 @@ def get_pre_proc_credit_default_cost_funcs(feature_info, data, sens_group=None,
 STUDENT_PERF_DIRS = {
     'school': (DIR_BOTH, EXP_LOWER),
     'sex': (DIR_IMMUT, None),
-    # 'age': (DIR_IMMUT, None),
+    'age': (DIR_IMMUT, None),
     'address': (DIR_BOTH, EXP_HIGHER),
-    # 'famsize': (DIR_IMMUT, None),
-    # 'Pstatus': (DIR_IMMUT, None),
+    'famsize': (DIR_IMMUT, None),
+    'Pstatus': (DIR_IMMUT, None),
     'Medu': (DIR_UP, EXP_HIGHER),
     'Fedu': (DIR_UP, EXP_HIGHER),
-    # 'Mjob_at_home': (DIR_IMMUT, None),
-    # 'Mjob_health': (DIR_IMMUT, None),
-    # 'Mjob_other': (DIR_IMMUT, None),
-    # 'Mjob_services': (DIR_IMMUT, None),
-    # 'Mjob_teacher': (DIR_IMMUT, None),
-    # 'Fjob_at_home': (DIR_IMMUT, None),
-    # 'Fjob_health': (DIR_IMMUT, None),
-    # 'Fjob_other': (DIR_IMMUT, None),
-    # 'Fjob_services': (DIR_IMMUT, None),
-    # 'Fjob_teacher': (DIR_IMMUT, None),
-    # 'reason_course': (DIR_IMMUT, None),
-    # 'reason_home': (DIR_IMMUT, None),
-    # 'reason_other': (DIR_IMMUT, None),
-    # 'reason_reputation': (DIR_IMMUT, None),
-    # 'guardian_father': (DIR_IMMUT, None),
-    # 'guardian_mother': (DIR_IMMUT, None),
-    # 'guardian_other': (DIR_IMMUT, None),
+    'Mjob_at_home': (DIR_IMMUT, None),
+    'Mjob_health': (DIR_IMMUT, None),
+    'Mjob_other': (DIR_IMMUT, None),
+    'Mjob_services': (DIR_IMMUT, None),
+    'Mjob_teacher': (DIR_IMMUT, None),
+    'Fjob_at_home': (DIR_IMMUT, None),
+    'Fjob_health': (DIR_IMMUT, None),
+    'Fjob_other': (DIR_IMMUT, None),
+    'Fjob_services': (DIR_IMMUT, None),
+    'Fjob_teacher': (DIR_IMMUT, None),
+    'reason_course': (DIR_IMMUT, None),
+    'reason_home': (DIR_IMMUT, None),
+    'reason_other': (DIR_IMMUT, None),
+    'reason_reputation': (DIR_IMMUT, None),
+    'guardian_father': (DIR_IMMUT, None),
+    'guardian_mother': (DIR_IMMUT, None),
+    'guardian_other': (DIR_IMMUT, None),
     'traveltime': (DIR_BOTH, EXP_HIGHER),
     'studytime': (DIR_BOTH, EXP_HIGHER),
-    # 'failures': (DIR_IMMUT, None), # depends on past, can't change it
+    'failures': (DIR_IMMUT, None), # depends on past, can't change it
     'schoolsup': (DIR_BOTH, EXP_HIGHER),
     'famsup': (DIR_BOTH, EXP_HIGHER),
     'paid': (DIR_BOTH, EXP_HIGHER),
     'activities': (DIR_BOTH, EXP_HIGHER),
-    # 'nursery': (DIR_IMMUT, None), # depends on history, can't change it
+    'nursery': (DIR_IMMUT, None), # depends on history, can't change it
     'higher': (DIR_BOTH, EXP_HIGHER),
     'internet': (DIR_BOTH, EXP_HIGHER),
     'romantic': (DIR_BOTH, EXP_HIGHER),
@@ -530,37 +530,37 @@ def get_pre_proc_credit_default_cost_funcs(feature_info, data, sens_group=None,
 STUDENT_PERF_DIRS_REV = {
     'school': (DIR_BOTH, EXP_LOWER),
     'sex': (DIR_IMMUT, None),
-    # 'age': (DIR_IMMUT, None),
+    'age': (DIR_IMMUT, None),
     'address': (DIR_BOTH, EXP_LOWER),
-    # 'famsize': (DIR_IMMUT, None),
-    # 'Pstatus': (DIR_IMMUT, None),
+    'famsize': (DIR_IMMUT, None),
+    'Pstatus': (DIR_IMMUT, None),
     'Medu': (DIR_UP, EXP_HIGHER),
     'Fedu': (DIR_UP, EXP_HIGHER),
-    # 'Mjob_at_home': (DIR_IMMUT, None),
-    # 'Mjob_health': (DIR_IMMUT, None),
-    # 'Mjob_other': (DIR_IMMUT, None),
-    # 'Mjob_services': (DIR_IMMUT, None),
-    # 'Mjob_teacher': (DIR_IMMUT, None),
-    # 'Fjob_at_home': (DIR_IMMUT, None),
-    # 'Fjob_health': (DIR_IMMUT, None),
-    # 'Fjob_other': (DIR_IMMUT, None),
-    # 'Fjob_services': (DIR_IMMUT, None),
-    # 'Fjob_teacher': (DIR_IMMUT, None),
-    # 'reason_course': (DIR_IMMUT, None),
-    # 'reason_home': (DIR_IMMUT, None),
-    # 'reason_other': (DIR_IMMUT, None),
-    # 'reason_reputation': (DIR_IMMUT, None),
-    # 'guardian_father': (DIR_IMMUT, None),
-    # 'guardian_mother': (DIR_IMMUT, None),
-    # 'guardian_other': (DIR_IMMUT, None),
+    'Mjob_at_home': (DIR_IMMUT, None),
+    'Mjob_health': (DIR_IMMUT, None),
+    'Mjob_other': (DIR_IMMUT, None),
+    'Mjob_services': (DIR_IMMUT, None),
+    'Mjob_teacher': (DIR_IMMUT, None),
+    'Fjob_at_home': (DIR_IMMUT, None),
+    'Fjob_health': (DIR_IMMUT, None),
+    'Fjob_other': (DIR_IMMUT, None),
+    'Fjob_services': (DIR_IMMUT, None),
+    'Fjob_teacher': (DIR_IMMUT, None),
+    'reason_course': (DIR_IMMUT, None),
+    'reason_home': (DIR_IMMUT, None),
+    'reason_other': (DIR_IMMUT, None),
+    'reason_reputation': (DIR_IMMUT, None),
+    'guardian_father': (DIR_IMMUT, None),
+    'guardian_mother': (DIR_IMMUT, None),
+    'guardian_other': (DIR_IMMUT, None),
     'traveltime': (DIR_BOTH, EXP_LOWER),
     'studytime': (DIR_BOTH, EXP_LOWER),
-    # 'failures': (DIR_IMMUT, None), # depends on past, can't change it
+    'failures': (DIR_IMMUT, None), # depends on past, can't change it
     'schoolsup': (DIR_BOTH, EXP_LOWER),
     'famsup': (DIR_BOTH, EXP_LOWER),
     'paid': (DIR_BOTH, EXP_LOWER),
     'activities': (DIR_BOTH, EXP_LOWER),
-    # 'nursery': (DIR_IMMUT, None), # depends on history, can't change it
+    'nursery': (DIR_IMMUT, None), # depends on history, can't change it
     'higher': (DIR_BOTH, EXP_LOWER),
     'internet': (DIR_BOTH, EXP_LOWER),
     'romantic': (DIR_BOTH, EXP_LOWER),

diff --git a/effort_reward_fairness/effort_reward_function_plots.py b/effort_reward_fairness/effort_reward_function_plots.py
@@ -286,7 +286,7 @@ def run(self, test_or_train):
             y_train_pred = model.predict(self.role_model_users).astype(bool if exp.dataset_info[self.dataset]['prediction_task'] == exp.CLASSIFICATION else float)
 
             print ("Model: {}, MAE: {}, MSE: {}".format(model, mean_absolute_error(self.users_gt, y_test_pred), mean_squared_error(self.users_gt, y_test_pred)))
-            continue
+            # continue
 
             self.role_model_users_pred = y_train_pred # This should not change
             self.users_preds = y_test_pred if test_or_train == 'test' else y_train_pred # change this based on which group's explanations are needed (test or train)
@@ -341,7 +341,7 @@ def run(self, test_or_train):
                     assert role_model_utility == role_model_reward - role_model_effort
                     sens_rewards.append(role_model_reward)
                     print ("[Sens] Model: {}, Effort threshold: {}, Effort value: {}, Max Reward: {}".format(model, delta,role_model_effort, role_model_reward))
-                    break
+                    # break
                     # role_model, role_model_effort, role_model_reward, role_model_utility = \
                     #     self.sampling_based_explanations(
                     #         user, 
@@ -409,7 +409,7 @@ def run(self, test_or_train):
                     assert role_model_utility == role_model_reward - role_model_effort
                     nosens_rewards.append(role_model_reward)
                     print ("[Nosens] Model: {}, Effort threshold: {}, Effort value: {}, Max Reward: {}".format(model, delta, role_model_effort, role_model_reward))
-                    break
+                    # break
                     # role_model, role_model_effort, role_model_reward, role_model_utility = \
                     #     self.sampling_based_explanations(
                     #         user, 
@@ -556,15 +556,21 @@ def run(self, test_or_train):
                     aeio.plot_one_var_vs_other(self.res_dir, model, self.effort_deltas, sens_reward_with_effort, nosens_reward_with_effort, 'Effort', 'Average Reward'),
                     aeio.plot_one_var_vs_other(self.res_dir, model, self.reward_deltas, sens_effort_with_reward, nosens_effort_with_reward, 'Reward', 'Average Effort')
                     ))
+        # model_to_utility_sens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_utility_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_sens_fc.pkl')
+        # model_to_utility_nosens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_utility_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_nosens_fc.pkl')
+        # model_to_reward_sens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_reward_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_reward_sens_fc.pkl')
+        # model_to_reward_nosens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_reward_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_reward_nosens_fc.pkl')
+        # model_to_effort_nosens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_effort_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_effort_nosens_fc.pkl')
+        # model_to_effort_sens = joblib.load(self.res_dir + '/plots_pickled_data/model_to_effort_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_effort_sens_fc.pkl')
         with open(self.res_file_path, 'a') as res_file:
-            res_file.write("== All Models in One ==\n\n".format(str(model)))
+            res_file.write("== All Models in One ==\n\n")
             res_file.write("{}\n\n{}\n\n{}\n\n".format(
                 aeio.plot_one_var_vs_other_together(self.res_dir, model_to_utility_sens, model_to_utility_nosens, 'Effort', 'Average Utility'),
                 aeio.plot_one_var_vs_other_together(self.res_dir, model_to_reward_sens, model_to_reward_nosens, 'Effort', 'Average Reward'),
                 aeio.plot_one_var_vs_other_together(self.res_dir, model_to_effort_sens, model_to_effort_nosens, 'Reward', 'Average Effort')
             ))
-        out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png')
-        out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
+        # out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png')
+        # out.upload_results([self.res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
         out.create_dir(self.res_dir + '/plots_pickled_data')
         joblib.dump(model_to_utility_sens, self.res_dir + '/plots_pickled_data/model_to_utility_sens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_sens_fc.pkl')
         joblib.dump(model_to_utility_nosens, self.res_dir + '/plots_pickled_data/model_to_utility_nosens.pkl' if not exp.FAIRNESS_CONSTRAINTS else self.res_dir + '/plots_pickled_data/model_to_utility_nosens_fc.pkl')

diff --git a/effort_reward_fairness/experiment.py b/effort_reward_fairness/experiment.py
@@ -22,6 +22,7 @@
 
 CLASSIFICATION = 'classification'
 REGRESSION = 'regression'
+# Only set this to True if you have trained a fairness constraints model for the particular dataset you are evaluating and have put the weights file (.mat) in the root folder
 FAIRNESS_CONSTRAINTS = False
 
 dataset_info = {
@@ -32,6 +33,10 @@
                                 'variable_constraints': cf.PRE_PROC_CREDIT_DEFAULT_DIRS, 'prediction_task': CLASSIFICATION},
     'StudentPerf': {'cost_funcs': cf.get_student_perf_cost_funcs, 'sens_f': ('0', '1', 'sex_Male'), 
                     'variable_constraints': cf.STUDENT_PERF_DIRS, 'variable_constraints_rev': cf.STUDENT_PERF_DIRS_REV, 'prediction_task': REGRESSION},
+    'StudentPerfMut': {'cost_funcs': cf.get_student_perf_cost_funcs, 'sens_f': ('0', '1', 'sex_Male'), 
+                    'variable_constraints': cf.STUDENT_PERF_DIRS, 'variable_constraints_rev': cf.STUDENT_PERF_DIRS_REV, 'prediction_task': REGRESSION},
+    'StudentPerfMutPlusImmut': {'cost_funcs': cf.get_student_perf_cost_funcs, 'sens_f': ('0', '1', 'sex_Male'), 
+                    'variable_constraints': cf.STUDENT_PERF_DIRS, 'variable_constraints_rev': cf.STUDENT_PERF_DIRS_REV, 'prediction_task': REGRESSION},
     'CrimesCommunities': {'cost_funcs': cf.get_student_perf_cost_funcs, 'sens_f': ('0', '1', 'MajorityRaceWhite'), 
                     'variable_constraints': cf.CRIMES_DIRS, 'variable_constraints_rev': cf.CRIMES_DIRS_REV, 'prediction_task': REGRESSION}
 }
@@ -43,6 +48,8 @@ def base_exp(return_vars=False, test_or_train=None):
     #dataset = "CreditDefault"
     #dataset = "PreprocCreditCardDefault"
     dataset = "StudentPerf"
+    # dataset = "StudentPerfMut"
+    # dataset = "StudentPerfMutPlusImmut"
     # dataset = "CrimesCommunities"
     if FAIRNESS_CONSTRAINTS:
         models = [lm.LinRegFC(11.9, dataset), lm.LinRegFC(12.9, dataset), lm.LinRegFC(13.9, dataset), lm.LinRegFC(14.9, dataset)]
@@ -51,6 +58,10 @@ def base_exp(return_vars=False, test_or_train=None):
             models = [lm.LogReg(), lm.DT(), lm.SVM(), lm.NN()]
         elif dataset_info[dataset]['prediction_task'] == REGRESSION:
             models = [lm.LinReg(), lm.NNReg(), lm.DTReg()]
+            if dataset == "StudentPerfMut":
+                models = [lm.RidgeReg(0.1)]
+            elif dataset == "StudentPerfMutPlusImmut":
+                models = [lm.RidgeReg(200)]
     if return_vars:
         return dataset, models
     evaluate_models(dataset, models, test_or_train)
@@ -358,12 +369,12 @@ def evaluate_models(dataset, models, test_or_train, subsample_size=None, num_inv
 
             if len(disparity_table_heading) <= 1:
                 heading, formats, values = eval_formula.get_disparity_measures(users_gt, users_preds, users_sens_group, 
-                                np.mean(user_utility_sens), np.mean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=True)
+                                np.nanmean(user_utility_sens), np.nanmean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=True)
                 disparity_table_heading += heading
                 disparity_table_formats += formats
             else:
                 values = eval_formula.get_disparity_measures(users_gt, users_preds, users_sens_group, 
-                                np.mean(user_utility_sens), np.mean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=False)
+                                np.nanmean(user_utility_sens), np.nanmean(user_utility_nosens), dataset_info[dataset]['prediction_task'], return_heading_and_formats=False)
             disparity_table_values.append([str(clf)] + values)
 
             with open(group_res_file_path, 'a') as group_res_file:
@@ -393,10 +404,10 @@ def evaluate_models(dataset, models, test_or_train, subsample_size=None, num_inv
             # group_res_file.write("== IGNORE STUFF BELOW THIS FOR NOW ==\n\n")
             # group_res_file.write("=== Group Explanations ===\n\n")
 
-    out.upload_results([res_dir], 'results', aeio.SERVER_PROJECT_PATH, '.png')
-    out.upload_results([res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png')
-    out.upload_results([res_dir], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
-    out.upload_results([res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
+    # out.upload_results([res_dir], 'results', aeio.SERVER_PROJECT_PATH, '.png')
+    # out.upload_results([res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.png')
+    # out.upload_results([res_dir], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
+    # out.upload_results([res_dir + '/disparity_plots'], 'results', aeio.SERVER_PROJECT_PATH, '.pdf')
 
 def get_dataset_statistics_temp(y, sens_group, prediction_task):
     if prediction_task == REGRESSION:

diff --git a/effort_reward_fairness/group_explanations.py b/effort_reward_fairness/group_explanations.py
@@ -80,12 +80,12 @@ def get_possible_role_models(self, user, user_ground_truth, user_predicted_val,
         ## Keeping track of focal points/anchors
         indices_of_users = np.array(list(zip(*enumerate(users)))[0])
         # Forcing role models to have Higher ground truth label than that of user
-        mask_higher_gt = np.where(users_predicted_val >= user_predicted_val)[0]
+        mask_higher_gt = np.where(users_predicted_val >= user_predicted_val)[0] # TODO: In some cases > makes more sense here
         users = users[mask_higher_gt]
         users_predicted_val = users_predicted_val[mask_higher_gt]
         users_ground_truth = users_ground_truth[mask_higher_gt]
         indices_of_users = indices_of_users[mask_higher_gt]
-        assert np.all(users_predicted_val >= user_predicted_val)
+        assert np.all(users_predicted_val >= user_predicted_val) # TODO: In some cases > makes more sense here
 
         # #### TODO: dirty, make it nicer
         if self.dataset != 'CrimesCommunities':
@@ -197,6 +197,7 @@ def sampling_based_explanations(self, user, users, users_ground_truth, users_pre
         right_users, corresponding_gt, corresponding_preds, indices_of_users = self.get_possible_role_models(user.flatten(), user_ground_truth, user_predicted_val, 
             users, users_ground_truth, users_predicted_val)
         if right_users.shape[0] == 0:
+            # TODO: This should return np.nan
             return user[0], 0, 0, user_ground_truth == False and user_predicted_val == True, user_ground_truth, np.nan
         efforts = pairwise_distances(user, right_users, metric=self.effort_measure, n_jobs=-1).flatten()
         utilities = cf.compute_utility(corresponding_gt, corresponding_preds, np.array([user_ground_truth] * len(right_users)), 
@@ -208,6 +209,7 @@ def sampling_based_explanations(self, user, users, users_ground_truth, users_pre
         role_model_utility = utilities[idx]
         role_model_effort = efforts[idx]
         anchor = indices_of_users[idx]
+        print (role_model_utility)
         if return_only_user:
             # used for effort_reward_function_plots.py
             return (role_model, role_model_gt, role_model_pred)

diff --git a/effort_reward_fairness/learning_env/dec_rule_env.py b/effort_reward_fairness/learning_env/dec_rule_env.py
@@ -54,8 +54,8 @@ def load_data(self, seed=4194, feature_engineering=False):
                     data['attr_info'] = f_info
             elif self.ds_name == 'PreprocCreditCardDefault':
                 data = credit_card_default_data.load_preproc_credit_card_default_data(normalize_cont_features=False)
-            elif self.ds_name == 'StudentPerf':
-                data = load_student_perf_data.load_student_perf_data()
+            elif self.ds_name == 'StudentPerf' or self.ds_name == "StudentPerfMutPlusImmut" or self.ds_name == "StudentPerfMut":
+                data = load_student_perf_data.load_student_perf_data(self.ds_name)
             elif self.ds_name == 'CrimesCommunities':
                 data = load_crimes_and_communities.load_crimes_and_communities()
             else:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -65,3 +65,6 @@ Code for preprocessing the [Student Performance Dataset](http://archive.ics.uci.


		** To run Fairness Constraints you need the file ``trained_linregfc_StudentPerf.mat`` in the ``./effort_reward_fairness`` directory. For easy reproduction of results we have included this file, however this is generated using MATLAB code in the directory ``./Fairness_constraints`` and can be found in the directory ``./Fairness_constraints/Output`` after you run ``./Fairness_constraints/Social_welfare_constrained_ERM_regularized.m``.


		Caution: This is a work in progress, some TODOs (particularly in group_explanations.py need to be carefully looked at).