mmodels.py

from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import Normalizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import SelectFwe, f_classif
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.svm import LinearSVC
from tpot.builtins import StackingEstimator, ZeroCount
from copy import copy
from collections import defaultdict
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from xgboost import XGBClassifier
from sklearn.feature_selection import SelectPercentile
import sys

class mmodels():
    """ A list of models generated by tpot. 
    The data was planets from the exoplanet archive

    """
    def __init__(self,model):
        """ The model name must be supplied at __init__ .
        The model is then assigned it's correct classifier
        and a short description

        """
        self.mycollection = [
                ("Name", ["Precision CONFIRMED","Accuracy","Model","Comment"]),
                #                ("DT_vif_cap1", ["0.761","0.815","sklearn.tree.DecisionTreeClassifier", "Found using TPOT light"]),
                #                ("GB_vif_cap2", ["0.83","0.860","sklearn.ensemble.GradientBoostingClassifier","TPOT None: duration=35:59:3 overfitted- overfitted 100%"]),
                ("BernoulliNB", ["0.441","0.555","sklearn.naive_bayes.BernoulliNB","TPOT coerced to use BernoulliNB"]),
                ("DT", ["0.793","0.844","sklearn.tree.DecisionTreeClassifier", "TPOT light - train AUC approx test AUC"]),
                ("DT_vif", ["0.769","0.809","sklearn.tree.DecisionTreeClassifier", "TPOT light - very slight overfit"]),
                ("DT_vif_cap2", ["-","0.804","sklearn.tree.DecisionTreeClassifier","coerced - rerun"]),
                ("GB", ["0.816","0.855","sklearn.ensemble.GradientBoostingClassifier","Best model found Early stop=3 Time=6:7:44 i- overfitted - 100%"]),
                ("GB_vif", ["0.830","0.859","sklearn.ensemble.GradientBoostingClassifier","TK_vif_100g_None Early stop=3 Time=26:32:36 but overfitted - 100%"]),
                ("GB_vif_cap2", ["0.835","0.859","sklearn.ensemble.GradientBoostingClassifier","TPOT None: EARLY_STOP=3 duration=4:52:25 overfitted- overfitted 100%"]),
                ("GBtest", ["0.810","0.863","sklearn.ensemble.GradientBoostingClassifier","GB copy for testing"]),
                ("GaussianNB", ["0.348","0.409","sklearn.naive_bayes.GaussianNB","TPOT coerced to use GaussianNB"]),
                ("KNeighborsClassifier", ["0.416","0.522","sklearn.neighbors.KNeighborsClassifier","TPOT coerced to use KNeighborsClassifier"]),
                ("LR", ["0.804","0.850","sklearn.linear_model.LogisticRegression","TPOT light - train AUC = test AUC"]),
                ("LR_vif_cap2", ["0.791","0.834","sklearn.linear_model.LogisticRegression","TPOT light - train AUC= test AUC"]),
                ("RF", ["0.806","0.855","sklearn.ensemble.RandomForestClassifier","TK_100g_RF Early Stop=3 overfitted 100%"]),
                ("RF_vif", ["0.817","0.855","sklearn.ensemble.RandomForestClassifier","Early Stop=3 - overfit - 100%"]),
                ("RF_vif_cap2", ["0.811","0.859","sklearn.ensemble.RandomForestClassifier","Early Stop=3 - overfit"]),
                ("ET_vif", ["0.835","0.859","sklearn.ensemble.ExtraTreesClassifier","Early Stop=3 - generated when using TPOT_nn?"]),
#                ("LinearSVC", ["-","-","sklearn.svm.SVC","TPOT coerced to use LinearSVC"]),
#                ("XGB", ["0.82","0.850","xgboost.XGBClassifier","-", "Coerced to use XGB - overfitted"]),
#                ("XGB_vif", ["0.83","0.851","xgboost.XGBClassifier","Coerced to use XGB - overfitted"]),
                ("XGB_vif_cap2", ["0.828","0.858","xgboost.XGBClassifier", "Generated by TPOT_nn"]),
                # Drop b it just complicates
                #                ("bDT", ["-","0.876","sklearn.tree.DecisionTreeClassifier", "light early stop=3"]),
                #                ("bDT_vif", ["-","0.881","sklearn.tree.DecisionTreeClassifier","light early stop=3"]),
                #                ("bDT_vif_cap2", ["-","0.880","sklearn.tree.DecisionTreeClassifier","light early stop=3"]),
                ##                ("bGB", ["0.835,"0.909","sklearn.ensemble.GradientBoostingClassifier","TPOT coerced to use Gradient Boosting"]),
                #                ("bGB_vif", ["-","-","sklearn.ensemble.GradientBoostingClassifier","coerced"]),
                #                ("bGB_vif_cap2", ["0.839","0.914","sklearn.ensemble.GradientBoostingClassifier",""]),
                ##                ("bGBtest", ["0.835","0.909","sklearn.ensemble.GradientBoostingClassifier","Copy of bGB for testing"]),
                #                ("bLR", ["0.839","0.932","sklearn.linear_model.LogisticRegression","Found using TPOT light"]),
                #                ("bLR_vif", ["0.834","0.894","sklearn.linear_model.LogisticRegression","Found using TPOT light"]),
                #                ("bLR_vif_cap2", ["0.831","0.903","sklearn.linear_model.LogisticRegression","Found using TPOT light"]),
                #                ("bRF", ["0.84","0.908","sklearn.ensemble.RandomForestClassifier","Early Stop=3"]),
                #                ("bRF_vif", ["0.85","0.905","sklearn.ensemble.RandomForestClassifier","Early Stop=3"]),
                #                ("bRF_vif_cap2", ["-","0.907","sklearn.ensemble.RandomForestClassifier","Early Stop=3"]),
                #                ("bSVC", ["-","0.717","sklearn.svm.SVC", "TPOT coerced to use SVC"]),
                #                ("bXGB", ["-","-","xgboost.XGBClassifier",""]),
                #                ("bXGB_vif", ["0.83","0.859","xgboost.XGBClassifier", "None 100g"]),
                #                ("bXGB_vif_cap2", ["-","-","xgboost.XGBClassifier",""]),
                ]

        # Extract sklearn_name
        self.config = {}
        for k,v in self.mycollection:
            self.config[k]=v[2]

        # Extract Accurracy
        self.accuracy = {}
        for k,v in self.mycollection:
            self.accuracy[k]=v[1]

        # Extract description
#        self.desc = {}
#        for k,v in self.mycollection:
#            self.desc[k]=v[0]+','+v[1]+','+v[2]+','+v[3]

#        for k,v in sorted(self.mycollection, key=lambda x: str(x[1]),reverse=True)
        sortc=sorted(self.mycollection, key=lambda x: str(x[1]),reverse=True)

        self.desc = {}
        for k,v in sortc:
            self.desc[k]=v[0]+','+v[1]+','+v[2]
#            self.desc[k]=v[0]+','+v[1]+','+v[2]+','+v[3]

        self.name=model

        # Get the classifier
#        print(f"model is {model}")
        myclf="self."+model+"()"
        self.clf=eval(myclf)

    def bGB(self): # Average CV score on the training set was: 0.9093943826630808
        """  indicates Binary model i.e. CONFIRM v REST
        Tpot: Average CV score on the training set was: 0.9093943826630808

        """
        exported_pipeline = make_pipeline(
                make_union(
                    FunctionTransformer(copy),
                    FunctionTransformer(copy)
                    ),
                GradientBoostingClassifier()
                )
        #    clf = OneVsRestClassifier(exported_pipeline)
        clf = exported_pipeline
        return clf

    def bGBtest(self): # Average CV score on the training set was: 0.9093943826630808
        """  b indicates Binary model i.e. CONFIRM v REST
        Tpot: Average CV score on the training set was: 0.9093943826630808

        """
        exported_pipeline = make_pipeline(
                make_union(
                    FunctionTransformer(copy),
                    FunctionTransformer(copy)
                    ),
                GradientBoostingClassifier()
                )
        #    clf = OneVsRestClassifier(exported_pipeline)
        clf = exported_pipeline
        return clf

    def LR(self): 
        """ Logistic Regression
        # Average CV score on the training set was: 0.8474651723959203

        """
        exported_pipeline = make_pipeline(
                make_union(
                    FunctionTransformer(copy),
                    Normalizer(norm="l1")
                    ),
                StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=1, min_samples_leaf=16, min_samples_split=7)),
                StandardScaler(),
                StackingEstimator(estimator=GaussianNB()),
                StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=5, min_samples_leaf=16, min_samples_split=10)),
                Normalizer(norm="max"),
                #                LogisticRegression(C=10.0, dual=False, penalty="l2") - Needed to increae max_iter manually
                LogisticRegression(C=10.0, dual=False, penalty="l2",max_iter=12000)
                )
        return OneVsRestClassifier(exported_pipeline)

    def LR_vif_cap2(self): 
        """ Logistic Regression
        # Average CV score on the training set was: 0.8474651723959203
        # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK_vif_cap2.csv  model=Tpot_light Tpot_file=data/TK_vif_cap2_100g_Tpot_light_Tpot.py, Tpot_score=0.3229426433915212, starttime2020-07-22 20:15:56.499910, endtime=2020-07-22 22:54:03.656962 duration=2:38:7

        """
        exported_pipeline = make_pipeline(
                RobustScaler(),
                StackingEstimator(estimator=BernoulliNB(alpha=0.001, fit_prior=False)),
                Normalizer(norm="l2"),
                ZeroCount(),
                SelectFwe(score_func=f_classif, alpha=0.023),
                MaxAbsScaler(),
                LogisticRegression(C=1.0, dual=False, penalty="l2",max_iter=12000)
                )
        return OneVsRestClassifier(exported_pipeline)


    def bLR(self):
        """ Average CV score on the training set was: 0.9317001744979585 
        ifile=data/TKb.csv,ofile=data/TKb_100g_light.csv, generations=100, config_dict=TPOT light                                            
        model=light: TPOT light,Tpot_file=data/TKb_100g_light_Tpot.py, Tpot_score=0.06608478802992519, sdata=29/06/2020_13:33:57, edate=30/06/2020_09:03:48

"""             
        exported_pipeline = make_pipeline(
                MinMaxScaler(),
                StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=6, min_samples_leaf=1, min_samples_split=19)),                      
                StackingEstimator(estimator=BernoulliNB(alpha=1.0, fit_prior=True)),
                RobustScaler(), 
                VarianceThreshold(threshold=0.2),
                StackingEstimator(estimator=KNeighborsClassifier(n_neighbors=9, p=1, weights="uniform")),
                StackingEstimator(estimator=KNeighborsClassifier(n_neighbors=9, p=1, weights="uniform")),
                StackingEstimator(estimator=GaussianNB()),
                Normalizer(norm="l2"),
                #                LogisticRegression(C=15.0, dual=False, penalty="l2")
                LogisticRegression(C=15.0, dual=False, penalty="l2",max_iter=1200)
                )       
        clf = OneVsRestClassifier(exported_pipeline)
        return clf  

    def DT(self): 
        """ # Average CV score on the training set was: 0.8071513448244749                  
        # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=1,random_state=42,early_stop=3)# ifile data/TK.csv  model=Tpot_light Tpot_file=data/TK_100g_Tpot_light_Tpot.py, Tpot_score=0.32418952618453867, starttime2020-07-21 11:04:53.610002, endtime=2020-07-21 11:05:55.391966 duration=0:1:1 


        """
        exported_pipeline = DecisionTreeClassifier(criterion="gini", max_depth=7, min_samples_leaf=20, min_samples_split=4) 

        clf = OneVsRestClassifier(exported_pipeline)
        return clf  

#    def DT(self): 
#        """  Forced tpot to train on Decision Tree
#        TK_tpot_light.py :Average CV score on the training set was: 0.845523714637288
#        """
#        exported_pipeline = make_pipeline(
#                make_union(
#                    make_union(
#                        Normalizer(norm="l1"),
#                        FunctionTransformer(copy)
#                        ),
#                    FunctionTransformer(copy)
#                    ),
#                SelectFwe(score_func=f_classif, alpha=0.012),
#                RobustScaler(),
#                StandardScaler(),
#                StackingEstimator(estimator=DecisionTreeClassifier(criterion="entropy", max_depth=7, min_samples_leaf=8, min_samples_split=7)),
#                Normalizer(norm="max"),
#                StackingEstimator(estimator=BernoulliNB(alpha=0.1, fit_prior=False)),
#                #            LogisticRegression(C=20.0, dual=False, penalty="l2")
#                LogisticRegression(C=20.0, dual=False, penalty="l2",max_iter=1200)
#                )
#        #    clf = OneVsRestClassifier(exported_pipeline)
#        #    return clf
#        #    return(exported_pipeline)
#        clf = OneVsRestClassifier(exported_pipeline)
#        return clf

    def bDT(self): 
        """ # TPOT Average CV score on the training set was: 0.8559145977428525 .
        # ifile=data/bTK.csv,ofile=data/bTK_100g_bDT.csv, generations=100, config_dict={'sklearn.tree.DecisionTreeClassifier': {}}, EARLY_STOP=3
        # model=DT: sklearn.tree.DecisionTreeClassifier,Tpot_file=data/bTK_100g_bDT_Tpot.py, Tpot_score=0.8478802992518704, sdata=07/07/2020_21:20:22, edate=07/07/2020_21:21:21

        """
        exported_pipeline = DecisionTreeClassifier()
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def RF(self): 
        """ 
        # Average CV score on the training set was: 0.8560568452535211
        # TPOTClassifier(verbosity=2,generations=100,config_dict={'sklearn.ensemble.RandomForestClassifier': {}},max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK.csv  model=RF Tpot_file=data/TK_100g_RF_Tpot.py, Tpot_score=0.3104738154613466, starttime2020-07-24 12:53:16.337163, endtime=2020-07-24 14:00:53.313894 duration=1:7:36
        """
#        exported_pipeline = make_pipeline( Without early_stop
#                make_union(
#                    make_union(
#                        FunctionTransformer(copy),
#                        StackingEstimator(estimator=make_pipeline(
#                            make_union(
#                                FunctionTransformer(copy),
#                                FunctionTransformer(copy)
#                                ),
#                            RandomForestClassifier()
#                            ))
#                        ),
#                    make_union(
#                        FunctionTransformer(copy),
#                        FunctionTransformer(copy)
#                        )
#                    ),
#                RandomForestClassifier()
#                )
#        #    return exported_pipeline
#        exported_pipeline = make_pipeline(
#                MinMaxScaler(),
#                RandomForestClassifier(bootstrap=True, criterion="gini", max_features=0.2, min_samples_leaf=8, min_samples_split=4, n_estimators=100)
#                )

        exported_pipeline = make_pipeline(
                make_union(
                    make_union(
                        FunctionTransformer(copy),
                        FunctionTransformer(copy)
                        ),
                    FunctionTransformer(copy)
                    ),
                StackingEstimator(estimator=RandomForestClassifier()),
                StackingEstimator(estimator=RandomForestClassifier()),
                RandomForestClassifier()
                )
#        return OneVsRestClassifier(exported_pipeline)
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def bRF(self): 
        """ Average CV score on the training set was: 0.9443064522011891
        ifile=data/TKb.csv,ofile=data/TKb_100g_None.csv, generations=100, config_dict=None                                                   
        model=None: None,Tpot_file=data/TKb_100g_None_Tpot.py, Tpot_score=0.05361596009975062, sdata=29/06/2020_13:28:44, edate=06/07/2020_20:46:49

        """
        exported_pipeline = make_pipeline(
                #		StackingEstimator(estimator=LinearSVC(C=0.0001, dual=True, loss="squared_hinge", penalty="l2", tol=0.01)),
                StackingEstimator(estimator=LinearSVC(C=0.0001, dual=True, loss="squared_hinge", penalty="l2", tol=0.01,max_iter=1200000)),
                StackingEstimator(estimator=GaussianNB()),
                StackingEstimator(estimator=MLPClassifier(alpha=0.1, learning_rate_init=1.0)),
                RandomForestClassifier(bootstrap=False, criterion="gini", max_features=0.15000000000000002, min_samples_leaf=2, min_samples_split=7, n_estimators=100)
                )   
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def GB(self):
        """ 
        # TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=None
        ,random_state=42,early_stop=3)# ifile data/TK.csv  model=None Tpot_file=data/TK_
        100g_None_Tpot.py, Tpot_score=0.3254364089775561, starttime2020-07-23 01:27:45.2
        31167, endtime=2020-07-23 07:35:29.470305 duration=6:7:44
        # Average CV score on the training set was: 0.856887678217318
        """
        exported_pipeline = GradientBoostingClassifier(learning_rate=0.1, max_depth=9, max_features=0.55, min_samples_leaf=11, min_samples_split=4, n_estimators=100, subsample=0.8500000000000001)
        return OneVsRestClassifier(exported_pipeline)
#        return exported_pipeline

    def GBtest(self):
        """ Best solution using 10 generations and default
            test_file=data/TK_10g_test.csv test_size=0.1
            tpot_model=data/TK_10g_tpot.csv, generations=10
            Average CV score on the training set was: 0.862841149267742 

        """
        exported_pipeline = GradientBoostingClassifier(learning_rate=0.1, max_depth=9, max_features=0.55, min_samples_leaf=11, min_samples_split=4, n_estimators=100, subsample=0.8500000000000001)
        return exported_pipeline

    def GB2(self):
        """ Best solution using 10 generations and default
            test_file=data/TK_10g_test.csv test_size=0.1
            tpot_model=data/TK_10g_tpot.csv, generations=10
            Average CV score on the training set was: 0.862841149267742 

        """
        exported_pipeline = GradientBoostingClassifier()
        return exported_pipeline

#    def SVC(self):
#        """ # Average CV score on the training set was: 0.4948739065082556
#            # TPOTClassifier(verbosity=2,generations=100,config_dict={'sklearn.svm.SVC': {}},max_time_mins=None,random_state=42,early_stop=4)# model=SVC: sklearn.svm.SVC,Tpot_file=data/TK_100g_SVC_Tpot.py, Tpot_score=0.2119700748129676, sdata=10/07/2020_13:48:56, edate=10/07/2020_14:05:01 """
#        exported_pipeline = SVC()
#        return(OneVsRestClassifier(exported_pipeline))
#    def LinearSVC(self):
#        print("TODO")

    def bSVC(self):
        """ # Average CV score on the training set was: 0.7166804882871365
            # TPOTClassifier(verbosity=2,generations=100,config_dict={'sklearn.svm.SVC': {}},max_time_mins=None,random_state=42,early_stop=4)# model=SVC: sklearn.svm.SVC,Tpot_file=data/bTK_100g_bSVC_Tpot.py, Tpot_score=0.699501246882793, sdata=10/07/2020_14:05:02, edate=10/07/2020_14:14:26 """
        exported_pipeline = SVC()
        return(OneVsRestClassifier(exported_pipeline))

    def BernoulliNB(self):
        """ Average CV score on the training set was: 0.5568028288249895 """
        exported_pipeline = make_pipeline(
                make_union(
                    StackingEstimator(estimator=make_pipeline(
                        StackingEstimator(estimator=BernoulliNB()),
                        BernoulliNB()
                        )),
                    FunctionTransformer(copy)
                    ),
                BernoulliNB()
                )
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def GaussianNB(self):
        """ Average CV score on the training set was: 0.3801595437329509 """
        exported_pipeline = GaussianNB()
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def KNeighborsClassifier(self):
        """ Average CV score on the training set was: 0.48933143669985785 """
        exported_pipeline = KNeighborsClassifier()
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def bXGB_vif_cap1(self):
        """ 
        # Average CV score on the training set was: 0.9141047717225002
        TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=None,random_state=42,early_stop=3)# model=None Tpot_file=data/bTK_vif_cap_100g_bNone_Tpot.py, Tpot_score=0.9189526184538653, sdata=13/07/2020_01:17:47, edate=13/07/2020_03:31:22 """
        exported_pipeline = XGBClassifier(learning_rate=0.1, max_depth=9, min_child_weight=6, n_estimators=100, nthread=1, subsample=0.8)
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def bLR_vif_cap1(self):
        """ # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=None,random_state=42,early_stop=3)# model=TPOT_light Tpot_file=data/bTK_vif_cap_100g_bTPOT_light_Tpot.py, Tpot_score=0.8977556109725686, sdata=13/07/2020_00:36:50, edate=13/07/2020_00:47:38
        # Average CV score on the training set was: 0.8969253613297935 """
        exported_pipeline = make_pipeline(
                ZeroCount(),
                StackingEstimator(estimator=DecisionTreeClassifier(criterion="entropy", max_depth=9, min_samples_leaf=5, min_samples_split=4)),
                RobustScaler(),
                #            LogisticRegression(C=0.1, dual=False, penalty="l2")
                LogisticRegression(C=0.1, dual=False, penalty="l2",max_iter=12000)
                )
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

#    def XGB_vif_cap1(self):
#        """ # TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=None,random_state=42,early_stop=3)# model=None Tpot_file=data/TK_vif_cap_100g_None_Tpot.py, Tpot_score=0.3316708229426434, sdata=13/07/2020_03:31:24, edate=13/07/2020_07:53:47
#        # Average CV score on the training set was: 0.857164974477994 """
#        exported_pipeline = make_pipeline(
#                SelectPercentile(score_func=f_classif, percentile=82),
#                XGBClassifier(learning_rate=0.1, max_depth=6, min_child_weight=20, n_estimators=100, nthread=1, subsample=0.9000000000000001)
#                )
#        clf = OneVsRestClassifier(exported_pipeline)
#        return clf

    def DT_vif_cap1(self):
        """ # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=None,random_state=42,early_stop=3)# model=TPOT_light Tpot_file=data/TK_vif_cap_100g_TPOT_light_Tpot.py, Tpot_score=0.3316708229426434, sdata=13/07/2020_00:47:39, edate=13/07/2020_01:17:46 
        # Average CV score on the training set was: 0.8146333527219953
        """
        exported_pipeline = make_pipeline(
                ZeroCount(),
                MaxAbsScaler(),
                StackingEstimator(estimator=GaussianNB()),
                DecisionTreeClassifier(criterion="entropy", max_depth=6, min_samples_leaf=3, min_samples_split=8) # needd to increase iter
                #            DecisionTreeClassifier(criterion="entropy", max_depth=6, min_samples_leaf=3, min_samples_split=8,max_iter=120000)
                )
        clf = OneVsRestClassifier(exported_pipeline)
        return clf


    def bRF_vif(self):
        """
        # Average CV score on the training set was: 0.9048227857092123
        # TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=1,random_state=42,early_stop=3)# ifile data/bTK_vif.csv  model=None Tpot_file=data/bTK_vif_100g_None_Tpot.py, Tpot_score=0.9114713216957606, starttime2020-07-21 11:18:27.132279, endtime=2020-07-21 11:19:44.062352 duration=0:1:16 
        """
        exported_pipeline = make_pipeline(
                MinMaxScaler(),
                RandomForestClassifier(bootstrap=True, criterion="gini", max_features=0.2, min_samples_leaf=8, min_samples_split=4, n_estimators=100)
                )
        return OneVsRestClassifier(exported_pipeline)

#    def bRF_vif(self):
#        """ TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=None,random_state=42,early_stop=3)# ifile data/bTK_vif.csv  model=None Tpot_file=data/bTK_vif_100g_bNone_Tpot.py, Tpot_score=0.9114713216957606, starttime2020-07-14 02:43:25.364161, endtime=2020-07-14 12:05:53.637406 duration=9:22:28 
#            # Average CV score on the training set was: 0.9150750686761768
#        """
#        exported_pipeline = RandomForestClassifier(bootstrap=False, criterion="entropy", max_features=0.3, min_samples_leaf=2, min_samples_split=5, n_estimators=100)
#        clf = OneVsRestClassifier(exported_pipeline)
#        return clf

    def bGB_vif_cap2(self):
        """ # TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=None,random_state=42,early_stop=3)# ifile data/bTK_vif_cap2.csv  model=None Tpot_file=data/bTK_vif_cap2_100g_bNone_Tpot.py, Tpot_score=0.9177057356608479, starttime2020-07-13 23:37:04.289265, endtime=2020-07-14 08:51:47.675034 duration=9:14:43
        # Average CV score on the training set was: 0.9136890672901753
        """
        exported_pipeline = make_pipeline(
                make_union(
                    FunctionTransformer(copy),
                    FunctionTransformer(copy)
                    ),
                GradientBoostingClassifier(learning_rate=0.1, max_depth=9, max_features=0.25, min_samples_leaf=14, min_samples_split=18, n_estimators=100, subsample=0.7500000000000001))

        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def bLR_vif_cap2(self):
        # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=None,random_state=42,early_stop=3)# ifile data/bTK_vif_cap2.csv  model=TPOT_light Tpot_file=data/bTK_vif_cap2_100g_bTPOT_light_Tpot.py, Tpot_score=0.9102244389027432, starttime2020-07-13 19:43:03.874438, endtime=2020-07-13 20:59:11.172981 duration=1:16:7
        # Average CV score on the training set was: 0.9034351526041278 """
        exported_pipeline = make_pipeline(
                ZeroCount(),
                StackingEstimator(estimator=LogisticRegression(C=0.01, dual=False, penalty="l2")),
                RobustScaler(),
                StackingEstimator(estimator=LogisticRegression(C=15.0, dual=False, penalty="l2")),
                StackingEstimator(estimator=LogisticRegression(C=20.0, dual=False, penalty="l2")),
                LogisticRegression(C=0.1, dual=False, penalty="l2",max_iter=12000)
                )
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

#    def LR_vif_cap2(self):
#        """ # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK_vif_cap2.csv  model=TPOT_light Tpot_file=data/TK_vif_cap2_100g_TPOT_light_Tpot.py, Tpot_score=0.3229426433915212, starttime2020-07-13 20:59:12.459185, endtime=2020-07-13 23:37:02.960396 duration=2:37:50
#        # Average CV score on the training set was: 0.8474651723959203 """
#        exported_pipeline = make_pipeline(
#                RobustScaler(),
#                StackingEstimator(estimator=BernoulliNB(alpha=0.001, fit_prior=False)),
#                Normalizer(norm="l2"),
#                ZeroCount(),
#                SelectFwe(score_func=f_classif, alpha=0.023),
#                MaxAbsScaler(),
#                LogisticRegression(C=1.0, dual=False, penalty="l2",max_iter=12000))
#        clf = OneVsRestClassifier(exported_pipeline)
#        return clf

    def DT_vif(self):
        """ # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK_vif.csv  model=TPOT_light Tpot_file=data/TK_vif_100g_TPOT_light_Tpot.py, Tpot_score=0.314214463840399, starttime2020-07-15 06:56:13.616787, endtime=2020-07-15 13:10:19.144924 duration=6:14:5
        # Average CV score on the training set was: 0.8194796543827015"""

        exported_pipeline = make_pipeline(
                StandardScaler(),
                StackingEstimator(estimator=KNeighborsClassifier(n_neighbors=24, p=1, weights="uniform")),
                SelectPercentile(score_func=f_classif, percentile=70),
                DecisionTreeClassifier(criterion="gini", max_depth=6, min_samples_leaf=5, min_samples_split=15)
                )
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def GB_vif(self):
        """
        # Average CV score on the training set was: 0.8584123757253952
        # TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK_vif.csv  model=None Tpot_file=data/TK_vif_100g_None_Tpot.py, Tpot_score=0.3329177057356609, starttime2020-07-22 23:57:18.320662, endtime=2020-07-24 02:29:55.024490 duration=26:32:36
        """
        exported_pipeline = GradientBoostingClassifier(learning_rate=0.5, max_depth=8, max_features=0.7500000000000001, min_samples_leaf=12, min_samples_split=15, n_estimators=100, subsample=0.9500000000000001)
        clf = OneVsRestClassifier(exported_pipeline)
        return clf
#        return OneVsRestClassifier(exported_pipeline)

    def GB_vif_cap2(self):
        """
        # Average CV score on the training set was: 0.8584124717088706
        # TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK_vif_cap2.csv  model=None Tpot_file=data/TK_vif_cap2_100g_None_Tpot.py, Tpot_score=0.3117206982543641, starttime2020-07-24 00:59:15.653017, endtime=2020-07-25 11:19:50.437555 duration=34:20:34 
        """
        exported_pipeline = GradientBoostingClassifier(learning_rate=0.1, max_depth=9, max_features=0.4, min_samples_leaf=9, min_samples_split=17, n_estimators=100, subsample=1.0)

#        exported_pipeline = GradientBoostingClassifier(learning_rate=0.5, max_depth=9, max_features=0.55, min_samples_leaf=13, min_samples_split=7, n_estimators=100, subsample=1.0)
#        return OneVsRestClassifier(exported_pipeline)
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

#    def GB_vif_cap2(self):
#        """ # TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK_vif_cap2.csv  model=None Tpot_file=data/TK_vif_cap2_100g_None_Tpot.py, Tpot_score=0.3192019950124688, starttime2020-07-14 13:32:07.050677, endtime=2020-07-16 01:31:10.808011 duration=35:59:3
#        # Average CV score on the training set was: 0.8595200250324904 """
#        exported_pipeline = make_pipeline(
#                StackingEstimator(estimator=MLPClassifier(alpha=0.01, learning_rate_init=0.5)),
#                GradientBoostingClassifier(learning_rate=0.1, max_depth=9, max_features=0.55, min_samples_leaf=13, min_samples_split=7, n_estimators=100, subsample=1.0)
#                )
#        clf = OneVsRestClassifier(exported_pipeline)
#        return clf

    def bLR_vif(self):
        """ # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=None,random_state=42,early_stop=3)# ifile data/bTK_vif.csv  model=TPOT_light Tpot_file=data/bTK_vif_100g_bTPOT_light_Tpot.py, Tpot_score=0.9039900249376559, starttime2020-07-16 16:49:59.084335, endtime=2020-07-16 17:18:04.305834 duration=0:28:5 
        # Average CV score on the training set was: 0.894015046369617

        """
        exported_pipeline = make_pipeline(
                StandardScaler(),
                StackingEstimator(estimator=DecisionTreeClassifier(criterion="entropy", max_depth=7, min_samples_leaf=12, min_samples_split=20)),
                LogisticRegression(C=5.0, dual=False, penalty="l2",max_iter=12000)
                )
        clf = OneVsRestClassifier(exported_pipeline)
        return clf

    def XGB_vif(self):
        """ 
        # TPOTClassifier(verbosity=2,generations=100,config_dict={'xgboost.XGBClassifier': {}},max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK_vif_cap2.csv  model=XGB Tpot_file=data/TK_vif_cap2_100g_XGB_Tpot.py, Tpot_score=0.3341645885286783, starttime2020-07-22 20:43:39.517557, endtime=2020-07-22 21:01:36.369396 duration=0:17:56
        # Average CV score on the training set was: 0.8512074721215995
        """
        exported_pipeline = make_pipeline(
                StackingEstimator(estimator=XGBClassifier()),
                XGBClassifier()
                )
        clf = OneVsRestClassifier(exported_pipeline)
        return clf


    def XGBdummy(self):
        print("NOT TO be done")

    def RF_vif_cap2(self):
        """ 
        # Average CV score on the training set was: 0.8498227185207794
        # TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=1,random_state=42,early_stop=3)# ifile data/TK_vif_cap2.csv  model=None Tpot_file=data/TK_vif_cap2_100g_None_Tpot.py, Tpot_score=0.3204488778054863, starttime2020-07-20 02:34:14.453438, endtime=2020-07-20 02:35:54.489856 duration=0:1:40
        """
        exported_pipeline = make_pipeline(
                MinMaxScaler(),
                RandomForestClassifier(bootstrap=True, criterion="gini", max_features=0.2, min_samples_leaf=8, min_samples_split=4, n_estimators=100)
                )
        clf=OneVsRestClassifier(exported_pipeline)
        return clf

    def DT_vif_cap2(self):
        """ # Average CV score on the training set was: 0.8042392061782643
            # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=1,random_state=42,early_stop=3)# ifile data/TK_vif_cap2.csv  model=Tpot_light Tpot_file=data/TK_vif_cap2_100g_Tpot_light_Tpot.py, Tpot_score=0.31546134663341646, starttime2020-07-20 02:29:16.873318, endtime=2020-07-20 02:30:18.711808 duration=0:1:1
        """
        exported_pipeline = DecisionTreeClassifier(criterion="entropy", max_depth=8, min_samples_leaf=11, min_samples_split=9)
#        clf = OneVsRestClassifier(exported_pipeline)
#        return clf
        clf=OneVsRestClassifier(exported_pipeline)
        return clf

    def bXGB(self):
        print("TO be done")
    def bXGB_vif(self):
        print("TO be done")
    def bXGB_vif_cap2(self):
        """ 
        # Average CV score on the training set was: 0.8498223345868775
        # TPOTClassifier(verbosity=2,generations=100,config_dict={'xgboost.XGBClassifier': {}},max_time_mins=1,random_state=42,early_stop=3)# ifile data/TK_vif_cap2.csv  model=XGB Tpot_file=data/TK_vif_cap2_100g_XGB_Tpot.py, Tpot_score=0.3341645885286783, starttime2020-07-21 11:25:09.549731, endtime=2020-07-21 11:26:45.259745 duration=0:1:35
        """
        exported_pipeline = XGBClassifier()
        return OneVsRestClassifier(exported_pipeline)

    def XGB(self):
        """ 
        # TPOTClassifier(verbosity=2,generations=100,config_dict={'xgboost.XGBClassifier': {}},max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK.csv  model=XGB Tpot_file=data/TK_100g_XGB_Tpot.py, Tpot_score=0.33042394014962595, starttime2020-07-22 19:59:49.891472, endtime=2020-07-22 20:22:06.655478 duration=0:22:16
        # Average CV score on the training set was: 0.8506532635341498
        """
        exported_pipeline = XGBClassifier()
        clf=OneVsRestClassifier(exported_pipeline)
        return clf

    def XGB_vif_cap2(self):
        """
        # Average CV score on the training set was: 0.8586890960852178

        """
        exported_pipeline = make_pipeline(
                    RobustScaler(),                                                             
                        XGBClassifier(learning_rate=0.1, max_depth=9, min_child_weight=2, n_estimators=100, nthread=1, subsample=0.6500000000000001)
                        )

        clf=OneVsRestClassifier(exported_pipeline)
        return clf
#        return OneVsRestClassifier(exported_pipeline)

    def RF_vif(self):
        """
        # TPOTClassifier(verbosity=2,generations=100,config_dict={'sklearn.ensemble.RandomForestClassifier': {}},max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK_vif.csv  model=RF Tpot_file=data/TK_vif_100g_RF_Tpot.py, Tpot_score=0.3192019950124688, starttime2020-07-24 14:00:54.554259, endtime=2020-07-24 17:41:10.967508 duration=3:40:16
        # Average CV score on the training set was: 0.8543946994085498
        """
        exported_pipeline = make_pipeline(
                make_union(
                    StackingEstimator(estimator=make_pipeline(
                        make_union(
                            FunctionTransformer(copy),
                            FunctionTransformer(copy)
                            ),
                        RandomForestClassifier()
                        )),
                    make_union(
                        StackingEstimator(estimator=make_pipeline(
                            StackingEstimator(estimator=RandomForestClassifier()),
                            RandomForestClassifier()
                            )),
                        FunctionTransformer(copy)
                        )
                    ),
                RandomForestClassifier()
                )
        clf=OneVsRestClassifier(exported_pipeline)
        return clf

    def bGB_vif(self):
        print("TO be done")

    def bDT_vif(self):
        """
        # Average CV score on the training set was: 0.8805749602148495
        # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=1,random_state=42,early_stop=3)# ifile data/bTK_vif.csv  model=Tpot_light Tpot_file=data/bTK_vif_100g_Tpot_light_Tpot.py, Tpot_score=0.8765586034912718, starttime2020-07-21 11:09:08.749381, endtime=2020-07-21 11:10:10.531303 duration=0:1:1
        """
        exported_pipeline = DecisionTreeClassifier(criterion="entropy", max_depth=9, min_samples_leaf=11, min_samples_split=14)
        return OneVsRestClassifier(exported_pipeline)


    def bDT_vif_cap2(self):
        """
        # Average CV score on the training set was: 0.8800213275282527
        # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT light,max_time_mins=1,random_state=42,early_stop=3)# ifile data/bTK_vif_cap2.csv  model=Tpot_light Tpot_file=data/bTK_vif_cap2_100g_Tpot_light_Tpot.py, Tpot_score=0.8753117206982544, starttime2020-07-21 11:10:11.748008, endtime=2020-07-21 11:11:14.304263 duration=0:1:2
        """
        exported_pipeline = DecisionTreeClassifier(criterion="entropy", max_depth=9, min_samples_leaf=11, min_samples_split=14) 
        return OneVsRestClassifier(exported_pipeline)

    def bRF_vif_cap2(self):
        """
        # Average CV score on the training set was: 0.907316052468407
        # TPOTClassifier(verbosity=2,generations=100,config_dict=None,max_time_mins=1,random_state=42,early_stop=3)# ifile data/bTK_vif_cap2.csv  model=None Tpot_file=data/bTK_vif_cap2_100g_None_Tpot.py, Tpot_score=0.899002493765586, starttime2020-07-21 11:19:45.366820, endtime=2020-07-21 11:21:01.380754 duration=0:1:16
        """
        exported_pipeline = make_pipeline(
                MinMaxScaler(),                                                             
                RandomForestClassifier(bootstrap=True, criterion="gini", max_features=0.2, min_samples_leaf=8, min_samples_split=4, n_estimators=100)
                )
        return OneVsRestClassifier(exported_pipeline)

    def ET_vif(self):
        """
        # Average CV score on the training set was: 0.8620149235107684
        # TPOTClassifier(verbosity=2,generations=100,config_dict=TPOT NN,max_time_mins=None,random_state=42,early_stop=3)# ifile data/TK_vif.csv  model=Tpot_nn Tpot_file=data/TK_vif_100g_Tpot_nn_Tpot.py, Tpot_score=0.3266832917705736, starttime2020-07-25 20:43:26.839161, endtime=2020-07-29 18:41:46.451535 duration=93:58:19
        """
        exported_pipeline = make_pipeline(
                MinMaxScaler(),
                StackingEstimator(estimator=GradientBoostingClassifier(learning_rate=0.1, max_depth=10, max_features=0.6000000000000001, min_samples_leaf=20, min_samples_split=3, n_estimators=100, subsample=0.9000000000000001)),
                ExtraTreesClassifier(bootstrap=True, criterion="entropy", max_features=0.5, min_samples_leaf=10, min_samples_split=14, n_estimators=100)
                )
        clf=OneVsRestClassifier(exported_pipeline)
        return clf