Skip to content

Commit

Permalink
added lcs_ek and lcs_rc parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
raptor419 committed Aug 14, 2024
1 parent 1fe0fab commit f4fd1de
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 17 deletions.
5 changes: 4 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,10 @@ def run(params):
lcs_nu=params['lcs_nu'],
lcs_n=params['lcs_n'],
lcs_iterations=params['lcs_iterations'],
lcs_timeout=params['lcs_timeout'], resubmit=params['model_resubmit'],
lcs_timeout=params['lcs_timeout'],
lcs_ek=params['lcs_ek'],
lcs_rc=params['lcs_rc'],
resubmit=params['model_resubmit'],
random_state=params['random_state'], n_jobs=params['n_jobs'],
run_cluster=params['run_cluster'],
queue=params['queue'],
Expand Down
4 changes: 3 additions & 1 deletion run_configs/upenn.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ top_fi_features = 40
overwrite_cv_feat = True

[modeling - phase 5]
algorithms = ['LR', 'NB', 'DT']
algorithms = ['XGB', 'ExSTraCS']
exclude = ['eLCS', 'XCS']
training_subsample = 0
use_uniform_fi = True
Expand All @@ -87,6 +87,8 @@ lcs_iterations = 200000
lcs_n = 2000
lcs_timeout = 1200
model_resubmit = False
lcs_ek = True
lcs_rc = 'QRF'

[post-analysis - phase 6]
exclude_plots = None
Expand Down
16 changes: 11 additions & 5 deletions streamline/legacy/ModelJobSubmit.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def run_cluster(argv):
lcs_iterations = int(argv[18])
lcs_n = int(argv[19])
lcs_nu = int(argv[20])
lcs_ek = eval(argv[21])
lcs_rc = None if argv[22] == "None" else argv[22]

file = open(output_path + '/' + experiment_name + '/' + "metadata.pickle", 'rb')
metadata = pickle.load(file)
Expand All @@ -52,10 +54,13 @@ def run_cluster(argv):
cv=None, n_jobs=n_jobs)
else:
if algorithm == 'ExSTraCS':
expert_knowledge = get_fi_for_ExSTraCS(output_path, experiment_name,
dataset_directory_path,
class_label, instance_label, cv_count,
filter_poor_features)
if lcs_ek and lcs_ek == "None":
expert_knowledge = get_fi_for_ExSTraCS(output_path, experiment_name,
dataset_directory_path,
class_label, instance_label, cv_count,
filter_poor_features)
else:
expert_knowledge = None
if do_lcs_sweep:
model = model_str_to_obj(algorithm)(cv_folds=3,
scoring_metric=scoring_metric,
Expand All @@ -71,7 +76,8 @@ def run_cluster(argv):
cv=None, n_jobs=n_jobs,
iterations=lcs_iterations,
N=lcs_n, nu=lcs_nu,
expert_knowledge=expert_knowledge)
expert_knowledge=expert_knowledge,
lcs_rc=lcs_rc)
else:
if do_lcs_sweep:
model = model_str_to_obj(algorithm)(cv_folds=3,
Expand Down
4 changes: 2 additions & 2 deletions streamline/models/learning_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class ExSTraCSClassifier(BaseModel, ABC):

def __init__(self, cv_folds=3, scoring_metric='balanced_accuracy',
metric_direction='maximize', random_state=None, cv=None, n_jobs=None,
iterations=None, N=None, nu=None, expert_knowledge=None):
iterations=None, N=None, nu=None, expert_knowledge=None, lcs_rc='QRF'):
super().__init__(ExSTraCS, "ExSTraCS", cv_folds, scoring_metric, metric_direction, random_state, cv)
self.param_grid = get_parameters(self.model_name)
if iterations:
Expand All @@ -90,7 +90,7 @@ def __init__(self, cv_folds=3, scoring_metric='balanced_accuracy',
self.param_grid['nu'] = [nu, ]
if len(self.param_grid['learning_iterations']) == 1 and len(self.param_grid['N']) == 1 and \
len(self.param_grid['nu']) == 1:
self.param_grid['rule_compaction'] = ['QRF', ]
self.param_grid['rule_compaction'] = [lcs_rc, ]
logging.info("Printing Rule Compaction Parameters")
logging.info(str(self.param_grid['rule_compaction']))
self.param_grid['expert_knowledge'] = expert_knowledge
Expand Down
23 changes: 15 additions & 8 deletions streamline/runners/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self, output_path, experiment_name, algorithms=None, exclude=("XCS"
instance_label=None, scoring_metric='balanced_accuracy', metric_direction='maximize',
training_subsample=0, use_uniform_fi=True, n_trials=200,
timeout=900, save_plots=False, do_lcs_sweep=False, lcs_nu=1, lcs_n=2000, lcs_iterations=200000,
lcs_timeout=1200, resubmit=False, random_state=None, n_jobs=None,
lcs_timeout=1200, lcs_ek=True, lcs_rc='QRF', resubmit=False, random_state=None, n_jobs=None,
run_cluster=False,
queue='defq', reserved_memory=4):

Expand Down Expand Up @@ -105,6 +105,8 @@ def __init__(self, output_path, experiment_name, algorithms=None, exclude=("XCS"
self.lcs_n = lcs_n
self.lcs_iterations = lcs_iterations
self.lcs_timeout = lcs_timeout
self.lcs_ek = lcs_ek
self.lcs_rc = lcs_rc

self.resubmit = resubmit
self.random_state = random_state
Expand Down Expand Up @@ -190,17 +192,21 @@ def run(self, run_parallel=False):
cv=None, n_jobs=self.n_jobs)
else:
if algorithm == 'ExSTraCS':
expert_knowledge = get_fi_for_ExSTraCS(self.output_path, self.experiment_name,
dataset_directory_path,
self.class_label, self.instance_label, cv_count,
filter_poor_features)
if self.lcs_ek and self.lcs_ek != "None":
expert_knowledge = get_fi_for_ExSTraCS(self.output_path, self.experiment_name,
dataset_directory_path,
self.class_label, self.instance_label, cv_count,
filter_poor_features)
else:
expert_knowledge = None
if self.do_lcs_sweep:
model = model_str_to_obj(algorithm)(cv_folds=3,
scoring_metric=self.scoring_metric,
metric_direction=self.metric_direction,
random_state=self.random_state,
cv=None, n_jobs=self.n_jobs,
expert_knowledge=copy.deepcopy(expert_knowledge))
expert_knowledge=copy.deepcopy(expert_knowledge),
lcs_rc=self.lcs_rc)
else:
model = model_str_to_obj(algorithm)(cv_folds=3,
scoring_metric=self.scoring_metric,
Expand All @@ -209,7 +215,8 @@ def run(self, run_parallel=False):
cv=None, n_jobs=self.n_jobs,
iterations=self.lcs_iterations,
N=self.lcs_n, nu=self.lcs_nu,
expert_knowledge=copy.deepcopy(expert_knowledge))
expert_knowledge=copy.deepcopy(expert_knowledge),
lcs_rc=self.lcs_rc)
else:
if self.do_lcs_sweep:
model = model_str_to_obj(algorithm)(cv_folds=3,
Expand Down Expand Up @@ -305,7 +312,7 @@ def get_cluster_params(self, full_path, algorithm, cv_count):
self.n_trials, self.timeout, self.training_subsample,
self.uniform_fi, self.save_plots, self.random_state]
cluster_params += [algorithm, self.n_jobs, self.do_lcs_sweep,
self.lcs_iterations, self.lcs_n, self.lcs_nu]
self.lcs_iterations, self.lcs_n, self.lcs_nu, self.lcs_ek, self.lcs_rc]
cluster_params = [str(i) for i in cluster_params]
return cluster_params

Expand Down
4 changes: 4 additions & 0 deletions streamline/utils/parser_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,10 @@ def parse_model(argv, params_dict=None):
default=2000)
parser.add_argument('--lcs-timeout', dest='lcs_timeout', type=int, help='seconds until hyper parameter sweep stops '
'for LCS algorithms', default=1200)
parser.add_argument('--rc', dest='lcs_rc', type=str,
help='do ExSTraCS rule compaction', default='QRF')
parser.add_argument('--ek', dest='lcs_ek', type=str,
help='use MS feature importance as expert knowledge in ExSTraCS', default='QRF')
return update_dict_from_parser(argv, parser, params_dict)


Expand Down

0 comments on commit f4fd1de

Please sign in to comment.