Skip to content

Commit

Permalink
Merge pull request #70 from abess-team/create-pull-request/patch
Browse files Browse the repository at this point in the history
Fixes by format action
  • Loading branch information
Mamba413 authored Dec 28, 2023
2 parents 45bfb06 + 0c6f302 commit e7031f6
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 35 deletions.
50 changes: 34 additions & 16 deletions pytest/test_skmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.neural_network import MLPRegressor

# from sksurv.datasets import load_veterans_lung_cancer
# from sksurv.preprocessing import OneHotEncoder
import warnings
Expand All @@ -26,8 +27,10 @@
def test_PortfolioSelection():
# load data
port = PortfolioSelection(sparsity=50, alpha=0.001, random_state=0)
dir = os.path.normpath("/docs/source/gallery/Miscellaneous/data/csi500-2020-2021.csv")
X = pd.read_csv(CURRENT+dir, encoding="gbk")
dir = os.path.normpath(
"/docs/source/gallery/Miscellaneous/data/csi500-2020-2021.csv"
)
X = pd.read_csv(CURRENT + dir, encoding="gbk")
keep_cols = X.columns[(X.isnull().sum() <= 20)]
X = X[keep_cols]
X = X.fillna(0)
Expand All @@ -51,7 +54,8 @@ def test_PortfolioSelection():
grid_search.fit(X)
grid_search.cv_results_
assert grid_search.best_score_ > 0.05
print('PortfolioSelection passed test!')
print("PortfolioSelection passed test!")


test_PortfolioSelection()

Expand Down Expand Up @@ -109,7 +113,7 @@ def test_NonlinearSelection():
# grid_search.fit(X, y)
# grid_search.cv_results_
# assert set(np.nonzero(grid_search.best_estimator_.coef_)[0]) == set(true_support_set)
print('NonlinearSelection passed test!')
print("NonlinearSelection passed test!")


test_NonlinearSelection()
Expand All @@ -135,27 +139,36 @@ def test_RobustRegression():
score = model.score(X, y, sample_weight)
est_support_set = np.nonzero(model.coef_)[0]
assert set(est_support_set) == set(true_support_set)
print('RobustRegression passed test!')
print("RobustRegression passed test!")


test_RobustRegression()


def test_MultivariateFailure():
def make_Clayton2_data(n, theta=15, lambda1=1, lambda2=1, c1=1, c2=1):
u1 = np.random.uniform(0, 1, n)
u2 = np.random.uniform(0, 1, n)
time2 = -np.log(1-u2)/lambda2
time1 = np.log(1-np.power((1-u2),-theta) + np.power((1-u1), -theta/(1+theta))*np.power((1-u2),-theta))/theta/lambda1
time2 = -np.log(1 - u2) / lambda2
time1 = (
np.log(
1
- np.power((1 - u2), -theta)
+ np.power((1 - u1), -theta / (1 + theta)) * np.power((1 - u2), -theta)
)
/ theta
/ lambda1
)
ctime1 = np.random.uniform(0, c1, n)
ctime2 = np.random.uniform(0, c2, n)
ctime2 = np.random.uniform(0, c2, n)
delta1 = (time1 < ctime1) * 1
delta2 = (time2 < ctime2) * 1
censoringrate1 = 1 - sum(delta1) / n
censoringrate2 = 1 - sum(delta2) / n
# print("censoring rate1:" + str(censoringrate1))
# print("censoring rate2:" + str(censoringrate2))
time1 = np.minimum(time1,ctime1)
time2 = np.minimum(time2,ctime2)
time1 = np.minimum(time1, ctime1)
time2 = np.minimum(time2, ctime2)
y = np.hstack((time1.reshape((-1, 1)), time2.reshape((-1, 1))))
delta = np.hstack((delta1.reshape((-1, 1)), delta2.reshape((-1, 1))))
return y, delta
Expand All @@ -165,18 +178,23 @@ def make_Clayton2_data(n, theta=15, lambda1=1, lambda2=1, c1=1, c2=1):
n, p, s, rho = 100, 100, 10, 0.5
beta = np.zeros(p)
beta[:s] = 5
Sigma = np.power(rho, np.abs(np.linspace(1, p, p) - np.linspace(1, p, p).reshape(p, 1)))
Sigma = np.power(
rho, np.abs(np.linspace(1, p, p) - np.linspace(1, p, p).reshape(p, 1))
)
X = np.random.multivariate_normal(mean=np.zeros(p), cov=Sigma, size=(n,))
lambda1 = 1*np.exp(np.matmul(X, beta))
lambda2 = 10*np.exp(np.matmul(X, beta))
lambda1 = 1 * np.exp(np.matmul(X, beta))
lambda2 = 10 * np.exp(np.matmul(X, beta))

y, delta = make_Clayton2_data(n, theta=50, lambda1=lambda1, lambda2=lambda2, c1=5, c2=5)
y, delta = make_Clayton2_data(
n, theta=50, lambda1=lambda1, lambda2=lambda2, c1=5, c2=5
)

model = MultivariateFailure(s)
model = model.fit(X, y, delta)
assert (np.nonzero(model.coef_)[0] == np.nonzero(beta)[0]).all()
pred = model.predict(X)
score = model.score(X, y, delta)
print('MultivariateFailure passed test!')
print("MultivariateFailure passed test!")


test_MultivariateFailure()
test_MultivariateFailure()
41 changes: 26 additions & 15 deletions skscope/skmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,9 +432,10 @@ class MultivariateFailure(BaseEstimator):
_parameter_constraints: dict = {
"sparsity": [Interval(Integral, 1, None, closed="left")],
}

def __init__(self, sparsity=5):
self.sparsity = sparsity

def fit(self, X, y, delta, sample_weight=None):
r"""
Minimize negative partial log-likelihood with sparsity constraint for provided data.
Expand All @@ -446,13 +447,13 @@ def fit(self, X, y, delta, sample_weight=None):
y : array-like, shape = (n_samples, n_events)
Observed time of multiple events.
delta : array-like, shape = (n_samples, n_events)
Indicator matrix of censoring.
sample_weight : ignored
Not used, present here for API consistency by convention.
Returns
--------
self : object
Expand All @@ -463,18 +464,23 @@ def fit(self, X, y, delta, sample_weight=None):
K = delta.shape[1]
self.n_features_in_ = p
self.n_events = K

def multivariate_failure_objective(params):
Xbeta = jnp.matmul(X, params)
tmp = jnp.ones((n, K))
for i in range(n):
for k in range(K):
tmp = tmp.at[i, k].set(X[i] @ params - jnp.log(jnp.matmul(y[:,k] >= y[i,k], jnp.exp(Xbeta))))
loss = - jnp.mean(tmp * delta)
tmp = tmp.at[i, k].set(
X[i] @ params
- jnp.log(jnp.matmul(y[:, k] >= y[i, k], jnp.exp(Xbeta)))
)
loss = -jnp.mean(tmp * delta)
return loss

solver = ScopeSolver(p, self.sparsity)
self.coef_ = solver.solve(multivariate_failure_objective, jit=True)
return self

def predict(self, X):
r"""
Given the features, predict the hazard function up to some constant independent of the sample.
Expand All @@ -483,19 +489,19 @@ def predict(self, X):
----------
X : array-like, shape(n_samples, n_features)
Feature matrix.
Returns
--------
hazard : array, shape = (n_samples,)
the quantity :math:`e^{\beta^{\top}X_i}` proportional to the harzard function up to
some constant independent of the sample index :math:`i` such that
the quantity :math:`e^{\beta^{\top}X_i}` proportional to the harzard function up to
some constant independent of the sample index :math:`i` such that
:math:`\lambda_k(t;X_{i})=\lambda_{0k}(t)e^{\beta^{\top}X_i}`.
"""
check_is_fitted(self)
X, _, _ = check_data(X)
Xbeta = X @ self.coef_
return np.exp(Xbeta)

def score(self, X, y, delta, sample_weight=None):
r"""
Give test data, and it return the test score of this fitted model.
Expand All @@ -507,7 +513,7 @@ def score(self, X, y, delta, sample_weight=None):
y : array-like, shape = (n_samples, n_events)
Observed time of multiple events.
delta : array-like, shape = (n_samples, n_events)
Indicator matrix of censoring.
Expand All @@ -525,12 +531,17 @@ def score(self, X, y, delta, sample_weight=None):
if p != self.n_features_in_:
raise ValueError("X.shape[1] should be " + str(self.n_features_in_))
if K != self.n_events:
raise ValueError("y.shape[1] and delta.shape[1] should be " + str(self.events))

raise ValueError(
"y.shape[1] and delta.shape[1] should be " + str(self.events)
)

Xbeta = np.matmul(X, self.coef_)
tmp = np.ones((n, K))
for k in range(K):
tmp[:, k] = X @ self.coef_ - np.log(np.matmul(y[:,k].reshape(1,-1) >= y[:,k].reshape(-1,1), np.exp(Xbeta)))
tmp[:, k] = X @ self.coef_ - np.log(
np.matmul(
y[:, k].reshape(1, -1) >= y[:, k].reshape(-1, 1), np.exp(Xbeta)
)
)
score = np.mean(tmp * delta)
return score

17 changes: 13 additions & 4 deletions skscope/utilities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from sklearn.utils import check_array, check_consistent_length


def check_y_survival(y_or_event, *args, allow_all_censored=False):
"""Check that array correctly represents an outcome for survival analysis.
Expand Down Expand Up @@ -30,7 +31,11 @@ def check_y_survival(y_or_event, *args, allow_all_censored=False):
if len(args) == 0:
y = y_or_event

if not isinstance(y, np.ndarray) or y.dtype.fields is None or len(y.dtype.fields) != 2:
if (
not isinstance(y, np.ndarray)
or y.dtype.fields is None
or len(y.dtype.fields) != 2
):
raise ValueError(
"y must be a structured array with the first field"
" being a binary class event indicator and the second field"
Expand All @@ -46,7 +51,9 @@ def check_y_survival(y_or_event, *args, allow_all_censored=False):

event = check_array(y_event, ensure_2d=False)
if not np.issubdtype(event.dtype, np.bool_):
raise ValueError(f"elements of event indicator must be boolean, but found {event.dtype}")
raise ValueError(
f"elements of event indicator must be boolean, but found {event.dtype}"
)

if not (allow_all_censored or np.any(event)):
raise ValueError("all samples are censored")
Expand All @@ -59,7 +66,9 @@ def check_y_survival(y_or_event, *args, allow_all_censored=False):

yt = check_array(yt, ensure_2d=False)
if not np.issubdtype(yt.dtype, np.number):
raise ValueError(f"time must be numeric, but found {yt.dtype} for argument {i + 2}")
raise ValueError(
f"time must be numeric, but found {yt.dtype} for argument {i + 2}"
)

return_val.append(yt)

Expand Down Expand Up @@ -92,4 +101,4 @@ def check_array_survival(X, y):
"""
event, time = check_y_survival(y)
check_consistent_length(X, event, time)
return event, time
return event, time

0 comments on commit e7031f6

Please sign in to comment.