Merge pull request #70 from abess-team/create-pull-request/patch

Fixes by format action
abess-team · Dec 28, 2023 · e7031f6 · e7031f6
2 parents 45bfb06 + 0c6f302
commit e7031f6
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 35 deletions.
diff --git a/pytest/test_skmodel.py b/pytest/test_skmodel.py
@@ -14,6 +14,7 @@
 from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, train_test_split
 from sklearn.feature_selection import SelectFromModel
 from sklearn.neural_network import MLPRegressor
+
 # from sksurv.datasets import load_veterans_lung_cancer
 # from sksurv.preprocessing import OneHotEncoder
 import warnings
@@ -26,8 +27,10 @@
 def test_PortfolioSelection():
     # load data
     port = PortfolioSelection(sparsity=50, alpha=0.001, random_state=0)
-    dir = os.path.normpath("/docs/source/gallery/Miscellaneous/data/csi500-2020-2021.csv")
-    X = pd.read_csv(CURRENT+dir, encoding="gbk")
+    dir = os.path.normpath(
+        "/docs/source/gallery/Miscellaneous/data/csi500-2020-2021.csv"
+    )
+    X = pd.read_csv(CURRENT + dir, encoding="gbk")
     keep_cols = X.columns[(X.isnull().sum() <= 20)]
     X = X[keep_cols]
     X = X.fillna(0)
@@ -51,7 +54,8 @@ def test_PortfolioSelection():
     grid_search.fit(X)
     grid_search.cv_results_
     assert grid_search.best_score_ > 0.05
-    print('PortfolioSelection passed test!')
+    print("PortfolioSelection passed test!")
+
 
 test_PortfolioSelection()
 
@@ -109,7 +113,7 @@ def test_NonlinearSelection():
     # grid_search.fit(X, y)
     # grid_search.cv_results_
     # assert set(np.nonzero(grid_search.best_estimator_.coef_)[0]) == set(true_support_set)
-    print('NonlinearSelection passed test!')
+    print("NonlinearSelection passed test!")
 
 
 test_NonlinearSelection()
@@ -135,27 +139,36 @@ def test_RobustRegression():
     score = model.score(X, y, sample_weight)
     est_support_set = np.nonzero(model.coef_)[0]
     assert set(est_support_set) == set(true_support_set)
-    print('RobustRegression passed test!')
+    print("RobustRegression passed test!")
 
 
 test_RobustRegression()
 
+
 def test_MultivariateFailure():
     def make_Clayton2_data(n, theta=15, lambda1=1, lambda2=1, c1=1, c2=1):
         u1 = np.random.uniform(0, 1, n)
         u2 = np.random.uniform(0, 1, n)
-        time2 = -np.log(1-u2)/lambda2
-        time1 = np.log(1-np.power((1-u2),-theta) + np.power((1-u1), -theta/(1+theta))*np.power((1-u2),-theta))/theta/lambda1
+        time2 = -np.log(1 - u2) / lambda2
+        time1 = (
+            np.log(
+                1
+                - np.power((1 - u2), -theta)
+                + np.power((1 - u1), -theta / (1 + theta)) * np.power((1 - u2), -theta)
+            )
+            / theta
+            / lambda1
+        )
         ctime1 = np.random.uniform(0, c1, n)
-        ctime2 =  np.random.uniform(0, c2, n)
+        ctime2 = np.random.uniform(0, c2, n)
         delta1 = (time1 < ctime1) * 1
         delta2 = (time2 < ctime2) * 1
         censoringrate1 = 1 - sum(delta1) / n
         censoringrate2 = 1 - sum(delta2) / n
         # print("censoring rate1:" + str(censoringrate1))
         # print("censoring rate2:" + str(censoringrate2))
-        time1 = np.minimum(time1,ctime1)
-        time2 = np.minimum(time2,ctime2)
+        time1 = np.minimum(time1, ctime1)
+        time2 = np.minimum(time2, ctime2)
         y = np.hstack((time1.reshape((-1, 1)), time2.reshape((-1, 1))))
         delta = np.hstack((delta1.reshape((-1, 1)), delta2.reshape((-1, 1))))
         return y, delta
@@ -165,18 +178,23 @@ def make_Clayton2_data(n, theta=15, lambda1=1, lambda2=1, c1=1, c2=1):
     n, p, s, rho = 100, 100, 10, 0.5
     beta = np.zeros(p)
     beta[:s] = 5
-    Sigma = np.power(rho, np.abs(np.linspace(1, p, p) - np.linspace(1, p, p).reshape(p, 1)))
+    Sigma = np.power(
+        rho, np.abs(np.linspace(1, p, p) - np.linspace(1, p, p).reshape(p, 1))
+    )
     X = np.random.multivariate_normal(mean=np.zeros(p), cov=Sigma, size=(n,))
-    lambda1 = 1*np.exp(np.matmul(X, beta))
-    lambda2 = 10*np.exp(np.matmul(X, beta))
+    lambda1 = 1 * np.exp(np.matmul(X, beta))
+    lambda2 = 10 * np.exp(np.matmul(X, beta))
 
-    y, delta = make_Clayton2_data(n, theta=50, lambda1=lambda1, lambda2=lambda2, c1=5, c2=5)
+    y, delta = make_Clayton2_data(
+        n, theta=50, lambda1=lambda1, lambda2=lambda2, c1=5, c2=5
+    )
 
     model = MultivariateFailure(s)
     model = model.fit(X, y, delta)
     assert (np.nonzero(model.coef_)[0] == np.nonzero(beta)[0]).all()
     pred = model.predict(X)
     score = model.score(X, y, delta)
-    print('MultivariateFailure passed test!')
+    print("MultivariateFailure passed test!")
+
 
-test_MultivariateFailure()
+test_MultivariateFailure()
diff --git a/skscope/skmodel.py b/skscope/skmodel.py
@@ -432,9 +432,10 @@ class MultivariateFailure(BaseEstimator):
     _parameter_constraints: dict = {
         "sparsity": [Interval(Integral, 1, None, closed="left")],
     }
+
     def __init__(self, sparsity=5):
         self.sparsity = sparsity
-    
+
     def fit(self, X, y, delta, sample_weight=None):
         r"""
         Minimize negative partial log-likelihood with sparsity constraint for provided data.
@@ -446,13 +447,13 @@ def fit(self, X, y, delta, sample_weight=None):
 
         y : array-like, shape = (n_samples, n_events)
             Observed time of multiple events.
-        
+
         delta : array-like, shape = (n_samples, n_events)
             Indicator matrix of censoring.
 
         sample_weight : ignored
             Not used, present here for API consistency by convention.
-        
+
         Returns
         --------
         self : object
@@ -463,18 +464,23 @@ def fit(self, X, y, delta, sample_weight=None):
         K = delta.shape[1]
         self.n_features_in_ = p
         self.n_events = K
+
         def multivariate_failure_objective(params):
             Xbeta = jnp.matmul(X, params)
             tmp = jnp.ones((n, K))
             for i in range(n):
                 for k in range(K):
-                    tmp = tmp.at[i, k].set(X[i] @ params - jnp.log(jnp.matmul(y[:,k] >= y[i,k], jnp.exp(Xbeta))))
-            loss = - jnp.mean(tmp * delta)
+                    tmp = tmp.at[i, k].set(
+                        X[i] @ params
+                        - jnp.log(jnp.matmul(y[:, k] >= y[i, k], jnp.exp(Xbeta)))
+                    )
+            loss = -jnp.mean(tmp * delta)
             return loss
+
         solver = ScopeSolver(p, self.sparsity)
         self.coef_ = solver.solve(multivariate_failure_objective, jit=True)
         return self
-    
+
     def predict(self, X):
         r"""
         Given the features, predict the hazard function up to some constant independent of the sample.
@@ -483,19 +489,19 @@ def predict(self, X):
         ----------
         X : array-like, shape(n_samples, n_features)
             Feature matrix.
-        
+
         Returns
         --------
         hazard : array, shape = (n_samples,)
-            the quantity :math:`e^{\beta^{\top}X_i}` proportional to the harzard function up to 
-            some constant independent of the sample index :math:`i` such that 
+            the quantity :math:`e^{\beta^{\top}X_i}` proportional to the harzard function up to
+            some constant independent of the sample index :math:`i` such that
             :math:`\lambda_k(t;X_{i})=\lambda_{0k}(t)e^{\beta^{\top}X_i}`.
         """
         check_is_fitted(self)
         X, _, _ = check_data(X)
         Xbeta = X @ self.coef_
         return np.exp(Xbeta)
-    
+
     def score(self, X, y, delta, sample_weight=None):
         r"""
         Give test data, and it return the test score of this fitted model.
@@ -507,7 +513,7 @@ def score(self, X, y, delta, sample_weight=None):
 
         y : array-like, shape = (n_samples, n_events)
             Observed time of multiple events.
-        
+
         delta : array-like, shape = (n_samples, n_events)
             Indicator matrix of censoring.
 
@@ -525,12 +531,17 @@ def score(self, X, y, delta, sample_weight=None):
         if p != self.n_features_in_:
             raise ValueError("X.shape[1] should be " + str(self.n_features_in_))
         if K != self.n_events:
-            raise ValueError("y.shape[1] and delta.shape[1] should be " + str(self.events))
-
+            raise ValueError(
+                "y.shape[1] and delta.shape[1] should be " + str(self.events)
+            )
+
         Xbeta = np.matmul(X, self.coef_)
         tmp = np.ones((n, K))
         for k in range(K):
-            tmp[:, k] = X @ self.coef_ - np.log(np.matmul(y[:,k].reshape(1,-1) >= y[:,k].reshape(-1,1), np.exp(Xbeta)))
+            tmp[:, k] = X @ self.coef_ - np.log(
+                np.matmul(
+                    y[:, k].reshape(1, -1) >= y[:, k].reshape(-1, 1), np.exp(Xbeta)
+                )
+            )
         score = np.mean(tmp * delta)
         return score
-
diff --git a/skscope/utilities.py b/skscope/utilities.py
@@ -1,6 +1,7 @@
 import numpy as np
 from sklearn.utils import check_array, check_consistent_length
 
+
 def check_y_survival(y_or_event, *args, allow_all_censored=False):
     """Check that array correctly represents an outcome for survival analysis.
 
@@ -30,7 +31,11 @@ def check_y_survival(y_or_event, *args, allow_all_censored=False):
     if len(args) == 0:
         y = y_or_event
 
-        if not isinstance(y, np.ndarray) or y.dtype.fields is None or len(y.dtype.fields) != 2:
+        if (
+            not isinstance(y, np.ndarray)
+            or y.dtype.fields is None
+            or len(y.dtype.fields) != 2
+        ):
             raise ValueError(
                 "y must be a structured array with the first field"
                 " being a binary class event indicator and the second field"
@@ -46,7 +51,9 @@ def check_y_survival(y_or_event, *args, allow_all_censored=False):
 
     event = check_array(y_event, ensure_2d=False)
     if not np.issubdtype(event.dtype, np.bool_):
-        raise ValueError(f"elements of event indicator must be boolean, but found {event.dtype}")
+        raise ValueError(
+            f"elements of event indicator must be boolean, but found {event.dtype}"
+        )
 
     if not (allow_all_censored or np.any(event)):
         raise ValueError("all samples are censored")
@@ -59,7 +66,9 @@ def check_y_survival(y_or_event, *args, allow_all_censored=False):
 
         yt = check_array(yt, ensure_2d=False)
         if not np.issubdtype(yt.dtype, np.number):
-            raise ValueError(f"time must be numeric, but found {yt.dtype} for argument {i + 2}")
+            raise ValueError(
+                f"time must be numeric, but found {yt.dtype} for argument {i + 2}"
+            )
 
         return_val.append(yt)
 
@@ -92,4 +101,4 @@ def check_array_survival(X, y):
     """
     event, time = check_y_survival(y)
     check_consistent_length(X, event, time)
-    return event, time
+    return event, time