-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtest_model.py
86 lines (61 loc) · 3.92 KB
/
test_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from unittest import TestCase
from unittest import mock
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDClassifier
from model import BisectingClassifier, DistributionBin
class TestBisectingClassifier(TestCase):
def setUp(self):
self.base_models = [mock.create_autospec(SGDClassifier) for i in range(4)]
self.splits = [-.2, -.1, .1, .2]
self.model = BisectingClassifier(self.splits, self.base_models, 3)
def test_fit(self):
prices = pd.Series([10, 10, 10, 10, 11.5, 11, 9.5])
log_prices = np.log(prices / prices.shift(1))
self.model.fit(prices)
X2, y2 = self.base_models[2].fit.call_args[0]
# The third base classifier should be fit on all samples, and samples should be re-labeled with y>.1 (bool)
np.testing.assert_array_equal(X2,
[np.flip(log_prices[1:4]), np.flip(log_prices[2:5]), np.flip(log_prices[3:6])])
np.testing.assert_array_equal(y2, log_prices[4:] > .1)
X3, y3 = self.base_models[3].fit.call_args[0]
# The fourth base classifier should be fit on only the sample with y>.1, and samples should be re-labeled with y>.2 (bool)
np.testing.assert_array_equal(X3,
[np.flip(log_prices[1:4])])
np.testing.assert_array_equal(y3, log_prices[4] > .2)
X1, y1 = self.base_models[1].fit.call_args[0]
# The second base classifier should be fit on only samples with y<.1, and samples should be re-labeled with y>-.1 (bool)
np.testing.assert_array_equal(X1,
[np.flip(log_prices[2:5]), np.flip(log_prices[3:6])])
np.testing.assert_array_equal(y1, log_prices[5:] > -.1)
X0, y0 = self.base_models[0].fit.call_args[0]
# The first base classifier should be fit on only samples with y<-.1, and samples should be re-labeled with y>-.2 (bool)
np.testing.assert_array_equal(X0,
[np.flip(log_prices[3:6])])
np.testing.assert_array_equal(y0, log_prices[6] > -.2)
def test_predict(self):
# recent prices
prices = pd.Series([10, 11.5, 11, 9.5])
log_prices = np.log(prices / prices.shift(1))
# base model's predictions are of the form [[probability that price is less than split, probability that price is not]]
self.base_models[2].predict_proba.return_value = [[1]]
self.base_models[1].predict_proba.return_value = [[.6, .4]]
self.base_models[0].predict_proba.return_value = [[.1, .9]]
for model in self.base_models: model.classes_ = [0, 1]
self.base_models[2].classes_ = [0]
# convert splits from [ratio of future price difference to current price] to [future price]
abs_price_splits = (1 + np.array(self.splits)) * 9.5
# the model is fit on old training prices
self.model.fit([1.25, 1.25, 1.25, 1.25, 1.25, 1, .85])
# A DistributionBin should be included iff there is probability>0 that price will be in that interval
# DistributionBin.mean should be [the mean label for samples with corresponding labels] * [current price]
# DistributionBin.probability should be the product of base model predictions corresponding to adjacent/parent nodes in split tree
expected_pred = [DistributionBin(0, abs_price_splits[0], 1 / 1.25 * 9.5, .6 * .1),
DistributionBin(abs_price_splits[0], abs_price_splits[1], .85 * 9.5, .6 * .9),
DistributionBin(abs_price_splits[1], abs_price_splits[2], 9.5, .4)]
self.assertEqual(expected_pred, self.model.predict(prices))
# The fourth base model should not be fit because there are no samples with labels greater than the third split
self.base_models[3].fit.assert_not_called()
# Base models should be called with log of difference of recent prices
np.testing.assert_array_equal(self.base_models[2].predict_proba.call_args[0][0],
[np.flip(log_prices[1:])])