Source code for optimalflow.estimatorCV

#!/usr/bin/env python

import pandas as pd
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.linear_model import LogisticRegression,LinearRegression,HuberRegressor,RidgeCV,RidgeClassifierCV,LassoCV,SGDRegressor,SGDClassifier
from sklearn.svm import SVC,SVR,LinearSVR,LinearSVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor,AdaBoostRegressor,AdaBoostClassifier,HistGradientBoostingRegressor,HistGradientBoostingClassifier
from sklearn.neural_network import MLPClassifier,MLPRegressor
from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
import xgboost as xgb

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

import json
import os

json_path = os.path.join(os.path.dirname(__file__), 'parameters.json')
with open(json_path, encoding='utf-8') as data_file:
    para_data = json.load(data_file)
data_file.close()

def warn(*args, **kwargs):
    pass

[docs]class clf_cv: """This class stores classification estimators. Parameters ---------- random_state : int, default = None Random state value. cv_val : int, default = None # of folds for cross-validation. Example ------- .. [Example] References ---------- None """ def __init__(self,cv_val = None,random_state = None, fast_flag = False,n_comb = 10): self.cv = cv_val self.random_state = [random_state] self.fast_flag = fast_flag self.n_comb = n_comb warnings.warn = warn def lgr(self): warnings.warn = warn lgr_cv = LogisticRegression() # parameters = { # 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], # 'random_state': [self.random_state] # } parameters = para_data["cls"]["lgr"] parameters['random_state'] = self.random_state if(self.fast_flag): return (RandomizedSearchCV(lgr_cv, parameters,cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(lgr_cv, parameters,cv = self.cv)) def svm(self): warnings.warn = warn svm_cv = SVC() parameters = para_data["cls"]["svm"] # parameters = { # 'kernel':['linear', 'poly', 'rbf', 'sigmoid'], # 'C': [0.1, 1, 10] # } if(self.fast_flag): return (RandomizedSearchCV(svm_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(svm_cv, parameters, cv = self.cv)) def mlp(self): warnings.warn = warn mlp_cv = MLPClassifier() # parameters = { # 'hidden_layer_sizes': [(10,), (50,), (100,)], # 'activation': ['identity','relu', 'tanh', 'logistic'], # 'learning_rate': ['constant', 'invscaling', 'adaptive'], # 'activation' : ['identity', 'logistic', 'tanh', 'relu'], # 'solver' : ['lbfgs', 'sgd', 'adam'], # 'random_state': [self.random_state] # } tupled = [] ori = para_data["cls"]["mlp"]['hidden_layer_sizes'] if (not isinstance(ori[0], tuple)): for i in ori: li = tuple([i,]) tupled.append(li) para_data["cls"]["mlp"]['hidden_layer_sizes'] = tupled parameters = para_data["cls"]["mlp"] parameters['random_state'] = self.random_state if(self.fast_flag): return (RandomizedSearchCV(mlp_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(mlp_cv, parameters, cv = self.cv)) def ada(self): warnings.warn = warn ada_cv = AdaBoostClassifier() # parameters = { # 'n_estimators': [50,100,150], # 'learning_rate': [0.01,0.1, 1, 5, 10], # 'random_state': [self.random_state] # } parameters = para_data["cls"]["ada"] parameters['random_state'] = self.random_state if(self.fast_flag): return (RandomizedSearchCV(ada_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(ada_cv, parameters, cv = self.cv)) def rf(self): warnings.warn = warn rf_cv = RandomForestClassifier() # parameters = { # 'n_estimators': [5, 50, 250], # 'max_depth': [2, 4, 8, 16, 32], # 'random_state': [self.random_state] # } parameters = para_data["cls"]["rf"] parameters['random_state'] = self.random_state if(self.fast_flag): return (RandomizedSearchCV(rf_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(rf_cv, parameters, cv = self.cv)) def gb(self): warnings.warn = warn gb_cv = GradientBoostingClassifier() # parameters = { # 'n_estimators': [50,100,150,200,250,300], # 'max_depth': [1, 3, 5, 7, 9], # 'learning_rate': [0.01, 0.1, 1, 10, 100], # 'random_state': [self.random_state] # } parameters = para_data["cls"]["gb"] parameters['random_state'] = self.random_state if(self.fast_flag): return (RandomizedSearchCV(gb_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(gb_cv, parameters, cv = self.cv)) def xgb(self): warnings.warn = warn xgb_cv = xgb.XGBClassifier() # parameters = { # 'n_estimators': [50,100,150,200,250,300], # 'max_depth': [3, 5, 7, 9], # 'learning_rate': [0.01, 0.1, 0.2,0.3,0.4], # 'verbosity' : [0] # } parameters = para_data["cls"]["gb"] if(self.fast_flag): return (RandomizedSearchCV(xgb_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(xgb_cv, parameters, cv = self.cv)) # New add on 8/10/2020 def lsvc(self): warnings.warn = warn lsvc_cv = LinearSVC() parameters = para_data["cls"]["lsvc"] # parameters = { # 'C': [0.1, 1, 10] # } if(self.fast_flag): return (RandomizedSearchCV(lsvc_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(lsvc_cv, parameters, cv = self.cv)) def sgd(self): warnings.warn = warn sgd_cv = SGDClassifier() # parameters = { # 'shuffle': [True,False], # 'penalty': ['l2', 'l1', 'elasticnet'], # 'learning_rate': ['constant','optimal','invscaling'] # } parameters = para_data["cls"]["sgd"] if(self.fast_flag): return (RandomizedSearchCV(sgd_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(sgd_cv, parameters, cv = self.cv)) def hgboost(self): warnings.warn = warn hgboost_cv = HistGradientBoostingClassifier() # parameters = { # 'max_depth': [3, 5, 7, 9], # 'learning_rate': [0.1, 0.2,0.3,0.4] # } parameters = para_data["cls"]["hgboost"] if(self.fast_flag): return (RandomizedSearchCV(hgboost_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(hgboost_cv, parameters, cv = self.cv)) def rgcv(self): warnings.warn = warn rgcv_cv = RidgeClassifierCV() # parameters = { # 'fit_intercept': [True,False] # } parameters = para_data["cls"]["rgcv"] if(self.fast_flag): return (RandomizedSearchCV(rgcv_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(rgcv_cv, parameters, cv = self.cv))
[docs]class reg_cv: """This class stores regression estimators. Parameters ---------- random_state : int, default = None Random state value. cv_val : int, default = None # of folds for cross-validation. Example ------- .. [Example] References ---------- None """ def __init__(self,cv_val = None,random_state = None, fast_flag = False,n_comb = 10): self.cv = cv_val self.random_state = [random_state] self.fast_flag = fast_flag self.n_comb = n_comb warnings.warn = warn def lr(self): warnings.warn = warn lr_cv = LinearRegression() # parameters = { # 'normalize' : [True,False] # } parameters = para_data["reg"]["lr"] if(self.fast_flag): return (RandomizedSearchCV(lr_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(lr_cv, parameters, cv = self.cv)) def knn(self): warnings.warn = warn knn_cv = KNeighborsRegressor() # parameters = { # 'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'], # 'n_neighbors': [5, 10, 15, 20, 25], # 'weights': ['uniform', 'distance'], # } parameters = para_data["reg"]["knn"] if(self.fast_flag): return (RandomizedSearchCV(knn_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(knn_cv, parameters, cv = self.cv)) def svm(self): warnings.warn = warn svm_cv = SVR() # parameters = { # 'kernel':['linear', 'poly', 'rbf', 'sigmoid'], # 'C': [0.1, 1, 10] # } parameters = para_data["reg"]["svm"] if(self.fast_flag): return (RandomizedSearchCV(svm_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(svm_cv, parameters, cv = self.cv)) def mlp(self): warnings.warn = warn mlp_cv = MLPRegressor() # parameters = { # 'hidden_layer_sizes': [(10,), (50,), (100,)], # 'activation': ['identity','relu', 'tanh', 'logistic'], # 'learning_rate': ['constant', 'invscaling', 'adaptive'], # 'activation' : ['identity', 'logistic', 'tanh', 'relu'], # 'solver' : ['lbfgs', 'adam'], # 'random_state': [self.random_state] # } tupled = [] ori = para_data["reg"]["mlp"]['hidden_layer_sizes'] if (not isinstance(ori[0], tuple)): for i in ori: li = tuple([i,]) tupled.append(li) para_data["reg"]["mlp"]['hidden_layer_sizes'] = tupled parameters = para_data["reg"]["mlp"] parameters['random_state'] = self.random_state if(self.fast_flag): return (RandomizedSearchCV(mlp_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(mlp_cv, parameters, cv = self.cv)) def rf(self): warnings.warn = warn rf_cv = RandomForestRegressor() # parameters = { # 'n_estimators': [5, 50, 250], # 'max_depth': [2, 4, 8, 16, 32] # } parameters = para_data["reg"]["rf"] if(self.fast_flag): return (RandomizedSearchCV(rf_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(rf_cv, parameters, cv = self.cv)) def gb(self): warnings.warn = warn gb_cv = GradientBoostingRegressor() # parameters = { # 'n_estimators': [50,100,150,200,250,300], # 'max_depth': [3, 5, 7, 9], # 'learning_rate': [0.01, 0.1, 0.2,0.3,0.4] # } parameters = para_data["reg"]["gb"] if(self.fast_flag): return (RandomizedSearchCV(gb_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(gb_cv, parameters, cv = self.cv)) def tree(self): warnings.warn = warn tree_cv = DecisionTreeRegressor() # parameters = { # 'splitter':['best', 'random'], # 'max_depth': [1, 3, 5, 7, 9], # 'random_state': [self.random_state], # 'min_samples_leaf':[1,3,5] # } parameters = para_data["reg"]["tree"] if(self.fast_flag): return (RandomizedSearchCV(tree_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(tree_cv, parameters, cv = self.cv)) def ada(self): warnings.warn = warn ada_cv = AdaBoostRegressor() # parameters = { # 'n_estimators': [50,100,150,200,250,300], # 'loss':['linear','square','exponential'], # 'learning_rate': [0.01, 0.1, 0.2,0.3,0.4], # 'random_state': [self.random_state] # } parameters = para_data["reg"]["ada"] parameters['random_state'] = self.random_state if(self.fast_flag): return (RandomizedSearchCV(ada_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(ada_cv, parameters, cv = self.cv)) def xgb(self): warnings.warn = warn xgb_cv = xgb.XGBRegressor() # parameters = { # 'n_estimators': [50,100,150,200,250,300], # 'max_depth': [3, 5, 7, 9], # 'learning_rate': [0.01, 0.1, 0.2,0.3,0.4], # 'verbosity' : [0] # } parameters = para_data["reg"]["xgb"] if(self.fast_flag): return (RandomizedSearchCV(xgb_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(xgb_cv, parameters, cv = self.cv)) # # New add August 5,2020 def hgboost(self): warnings.warn = warn hgboost_cv = HistGradientBoostingRegressor() # parameters = { # 'max_depth': [3, 5, 7, 9], # 'learning_rate': [0.1, 0.2,0.3,0.4] # } parameters = para_data["reg"]["hgboost"] if(self.fast_flag): return (RandomizedSearchCV(hgboost_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(hgboost_cv, parameters, cv = self.cv)) def huber(self): warnings.warn = warn huber_cv = HuberRegressor() # parameters = { # 'fit_intercept' : [True,False] # } parameters = para_data["reg"]["huber"] if(self.fast_flag): return (RandomizedSearchCV(huber_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(huber_cv, parameters, cv = self.cv)) def rgcv(self): warnings.warn = warn rgcv_cv = RidgeCV() # parameters = { # 'fit_intercept': [True,False] # } parameters = para_data["reg"]["rgcv"] if(self.fast_flag): return (RandomizedSearchCV(rgcv_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(rgcv_cv, parameters, cv = self.cv)) def cvlasso(self): warnings.warn = warn cvlasso_cv = LassoCV() # parameters = { # 'fit_intercept': [True,False] # } parameters = para_data["reg"]["cvlasso"] if(self.fast_flag): return (RandomizedSearchCV(cvlasso_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(cvlasso_cv, parameters, cv = self.cv)) def sgd(self): warnings.warn = warn sgd_cv = SGDRegressor() # parameters = { # 'shuffle': [True,False], # 'penalty': ['l2', 'l1', 'elasticnet'], # 'learning_rate': ['constant','optimal','invscaling'] # } parameters = para_data["reg"]["sgd"] if(self.fast_flag): return (RandomizedSearchCV(sgd_cv, parameters, cv = self.cv, n_iter = self.n_comb)) else: return (GridSearchCV(sgd_cv, parameters, cv = self.cv))