diff --git a/test/test_datasets/test_resampling_strategies.py b/test/test_datasets/test_resampling_strategies.py index 7f14275a3..473f17182 100644 --- a/test/test_datasets/test_resampling_strategies.py +++ b/test/test_datasets/test_resampling_strategies.py @@ -1,6 +1,15 @@ import numpy as np -from autoPyTorch.datasets.resampling_strategy import CrossValFuncs, HoldOutFuncs +import pytest + +from autoPyTorch.datasets.resampling_strategy import ( + CrossValFuncs, + CrossValTypes, + HoldOutFuncs, + HoldoutValTypes, + NoResamplingStrategyTypes, + check_resampling_strategy +) def test_holdoutfuncs(): @@ -40,3 +49,12 @@ def test_crossvalfuncs(): splits = split.stratified_k_fold_cross_validation(0, 10, X, stratify=y) assert len(splits) == 10 assert all([0 in y[s[1]] for s in splits]) + + +def test_check_resampling_strategy(): + for rs in (CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes): + for rs_func in rs: + check_resampling_strategy(rs_func) + + with pytest.raises(ValueError): + check_resampling_strategy(None) diff --git a/test/test_evaluation/test_evaluators.py b/test/test_evaluation/test_evaluators.py index 449bf8d4a..2371522d8 100644 --- a/test/test_evaluation/test_evaluators.py +++ b/test/test_evaluation/test_evaluators.py @@ -143,26 +143,7 @@ def tearDown(self): if os.path.exists(self.ev_path): shutil.rmtree(self.ev_path) - def test_evaluate_loss(self): - D = get_binary_classification_datamanager() - backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') - backend_api.load_datamanager = lambda: D - fixed_params_dict = self.fixed_params._asdict() - fixed_params_dict.update(backend=backend_api) - evaluator = Evaluator( - queue=multiprocessing.Queue(), - fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), - evaluator_params=self.eval_params - ) - evaluator.splits = None - with pytest.raises(ValueError): - evaluator.evaluate_loss() - - @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') - def test_holdout(self, pipeline_mock): - pipeline_mock.fit_dictionary = {'budget_type': 'epochs', 'epochs': 50} - # Binary iris, contains 69 train samples, 31 test samples - D = get_binary_classification_datamanager() + def _get_evaluator(self, pipeline_mock, data): pipeline_mock.predict_proba.side_effect = \ lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1)) pipeline_mock.side_effect = lambda **kwargs: pipeline_mock @@ -170,7 +151,7 @@ def test_holdout(self, pipeline_mock): _queue = multiprocessing.Queue() backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') - backend_api.load_datamanager = lambda: D + backend_api.load_datamanager = lambda: data fixed_params_dict = self.fixed_params._asdict() fixed_params_dict.update(backend=backend_api) @@ -184,56 +165,72 @@ def test_holdout(self, pipeline_mock): evaluator.evaluate_loss() + return evaluator + + def _check_results(self, evaluator, ans): rval = read_queue(evaluator.queue) self.assertEqual(len(rval), 1) result = rval[0]['loss'] self.assertEqual(len(rval[0]), 3) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) - + self.assertEqual(result, ans) self.assertEqual(evaluator._save_to_backend.call_count, 1) - self.assertEqual(result, 0.5652173913043479) - self.assertEqual(pipeline_mock.fit.call_count, 1) - # 3 calls because of train, holdout and test set - self.assertEqual(pipeline_mock.predict_proba.call_count, 3) - call_args = evaluator._save_to_backend.call_args - self.assertEqual(call_args[0][0].shape[0], len(D.splits[0][1])) - self.assertIsNone(call_args[0][1]) - self.assertEqual(call_args[0][2].shape[0], D.test_tensors[1].shape[0]) - self.assertEqual(evaluator.pipelines[0].fit.call_count, 1) - @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') - def test_cv(self, pipeline_mock): - D = get_binary_classification_datamanager(resampling_strategy=CrossValTypes.k_fold_cross_validation) + def _check_whether_save_y_opt_is_correct(self, resampling_strategy, ans): + backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') + D = get_binary_classification_datamanager(resampling_strategy) + backend_api.load_datamanager = lambda: D + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update(backend=backend_api, save_y_opt=True) + evaluator = Evaluator( + queue=multiprocessing.Queue(), + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), + evaluator_params=self.eval_params + ) + assert evaluator.fixed_pipeline_params.save_y_opt == ans - pipeline_mock.predict_proba.side_effect = \ - lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1)) - pipeline_mock.side_effect = lambda **kwargs: pipeline_mock - pipeline_mock.get_additional_run_info.return_value = None + def test_whether_save_y_opt_is_correct_for_no_resampling(self): + self._check_whether_save_y_opt_is_correct(NoResamplingStrategyTypes.no_resampling, False) - _queue = multiprocessing.Queue() + def test_whether_save_y_opt_is_correct_for_resampling(self): + self._check_whether_save_y_opt_is_correct(CrossValTypes.k_fold_cross_validation, True) + + def test_evaluate_loss(self): + D = get_binary_classification_datamanager() backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') backend_api.load_datamanager = lambda: D - fixed_params_dict = self.fixed_params._asdict() fixed_params_dict.update(backend=backend_api) evaluator = Evaluator( - queue=_queue, + queue=multiprocessing.Queue(), fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), evaluator_params=self.eval_params ) - evaluator._save_to_backend = unittest.mock.Mock(spec=evaluator._save_to_backend) - evaluator._save_to_backend.return_value = True + evaluator.splits = None + with pytest.raises(ValueError): + evaluator.evaluate_loss() - evaluator.evaluate_loss() + @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') + def test_holdout(self, pipeline_mock): + D = get_binary_classification_datamanager() + evaluator = self._get_evaluator(pipeline_mock, D) + self._check_results(evaluator, ans=0.5652173913043479) - rval = read_queue(evaluator.queue) - self.assertEqual(len(rval), 1) - result = rval[0]['loss'] - self.assertEqual(len(rval[0]), 3) - self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) + self.assertEqual(pipeline_mock.fit.call_count, 1) + # 3 calls because of train, holdout and test set + self.assertEqual(pipeline_mock.predict_proba.call_count, 3) + call_args = evaluator._save_to_backend.call_args + self.assertEqual(call_args[0][0].shape[0], len(D.splits[0][1])) + self.assertIsNone(call_args[0][1]) + self.assertEqual(call_args[0][2].shape[0], D.test_tensors[1].shape[0]) + self.assertEqual(evaluator.pipelines[0].fit.call_count, 1) + + @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') + def test_cv(self, pipeline_mock): + D = get_binary_classification_datamanager(resampling_strategy=CrossValTypes.k_fold_cross_validation) + evaluator = self._get_evaluator(pipeline_mock, D) + self._check_results(evaluator, ans=0.463768115942029) - self.assertEqual(evaluator._save_to_backend.call_count, 1) - self.assertEqual(result, 0.463768115942029) self.assertEqual(pipeline_mock.fit.call_count, 5) # 15 calls because of the training, holdout and # test set (3 sets x 5 folds = 15) @@ -251,38 +248,10 @@ def test_cv(self, pipeline_mock): @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') def test_no_resampling(self, pipeline_mock): - pipeline_mock.fit_dictionary = {'budget_type': 'epochs', 'epochs': 10} - # Binary iris, contains 69 train samples, 31 test samples D = get_binary_classification_datamanager(NoResamplingStrategyTypes.no_resampling) - pipeline_mock.predict_proba.side_effect = \ - lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1)) - pipeline_mock.side_effect = lambda **kwargs: pipeline_mock - pipeline_mock.get_additional_run_info.return_value = None - - _queue = multiprocessing.Queue() - backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') - backend_api.load_datamanager = lambda: D - - fixed_params_dict = self.fixed_params._asdict() - fixed_params_dict.update(backend=backend_api) - evaluator = Evaluator( - queue=_queue, - fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), - evaluator_params=self.eval_params - ) - evaluator._save_to_backend = unittest.mock.Mock(spec=evaluator._save_to_backend) - evaluator._save_to_backend.return_value = True + evaluator = self._get_evaluator(pipeline_mock, D) + self._check_results(evaluator, ans=0.5806451612903225) - evaluator.evaluate_loss() - - rval = read_queue(evaluator.queue) - self.assertEqual(len(rval), 1) - result = rval[0]['loss'] - self.assertEqual(len(rval[0]), 3) - self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) - - self.assertEqual(evaluator._save_to_backend.call_count, 1) - self.assertEqual(result, 0.5806451612903225) self.assertEqual(pipeline_mock.fit.call_count, 1) # 2 calls because of train and test set self.assertEqual(pipeline_mock.predict_proba.call_count, 2) diff --git a/test/test_evaluation/test_tae.py b/test/test_evaluation/test_tae.py index 0a187f6c2..eaf505ad7 100644 --- a/test/test_evaluation/test_tae.py +++ b/test/test_evaluation/test_tae.py @@ -90,6 +90,7 @@ def _create_taq(): backend=unittest.mock.Mock(), seed=1, metric=accuracy, + multi_objectives=["cost"], cost_for_crash=accuracy._cost_of_crash, abort_on_first_run_crash=True, pynisher_context=unittest.mock.Mock() @@ -104,6 +105,15 @@ def test_check_run_info(self): with pytest.raises(ValueError): taq.run_wrapper(run_info) + def test_check_and_get_default_budget(self): + taq = _create_taq() + budget = taq._check_and_get_default_budget() + assert isinstance(budget, float) + + taq.fixed_pipeline_params = taq.fixed_pipeline_params._replace(budget_type='test') + with pytest.raises(ValueError): + taq._check_and_get_default_budget() + def test_cutoff_update_in_run_wrapper(self): taq = _create_taq() run_info = RunInfo( diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py index ce1579caa..213671bb8 100644 --- a/test/test_pipeline/test_tabular_classification.py +++ b/test/test_pipeline/test_tabular_classification.py @@ -524,8 +524,8 @@ def test_train_pipeline_with_runtime_max_reached(fit_dictionary_tabular_dummy): def test_get_pipeline_representation(): pipeline = TabularClassificationPipeline( dataset_properties={ - 'numerical_columns': None, - 'categorical_columns': None, + 'numerical_columns': [], + 'categorical_columns': [], 'task_type': 'tabular_classification' } ) diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py index e21eb961f..8ef8d26bd 100644 --- a/test/test_pipeline/test_tabular_regression.py +++ b/test/test_pipeline/test_tabular_regression.py @@ -322,8 +322,8 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): def test_get_pipeline_representation(): pipeline = TabularRegressionPipeline( dataset_properties={ - 'numerical_columns': None, - 'categorical_columns': None, + 'numerical_columns': [], + 'categorical_columns': [], 'task_type': 'tabular_classification' } )