Source code for optimalflow.utilis_func

#!/usr/bin/env python

import time, sys
from IPython.display import clear_output
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from time import time
import numpy as np
from pandas.io.json import json_normalize
import os
import json

def delete_old_log_files(directory = None, delete_flag = False, logger = None, extension_list = None,filename_list = None, log_ts = None):
    file_list = os.listdir(directory)
    Test_comment = '-' * 20 * 3
    logger.info("Copyright All Reserved by Tony Dong | e-mail: tonyleidong@gmail.com ")
    logger.info("Official Documentation: https://optimal-flow.readthedocs.io")
    logger.info(Test_comment)
    if delete_flag:
        logger.info("All previous logfiles will be deleted, when DELETE_FLAG is set to True.")

        for item in file_list:
            ext_flag = [item.startswith(i) for i in filename_list]
            if np.sum(ext_flag) and (log_ts not in item):
                os.remove(os.path.join(directory, item))
                logger.info(f"Deleted file:{item}")

    return None

def clear():
    os.system( 'cls' )

def update_progress(progress, clear_flag = False,process_name = None,time_est = None):
    if (clear_flag):
        clear()
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1
    block = int(round(bar_length * progress))
    clear_output(wait = True)
    if(time_est is None):
        text = "Now in Progress - " +process_name+ ": [{0}] {1:.1f}%".format("#" * block + "-" * (bar_length - block),progress * 100)
    else:
        text = "Now in Progress - " +process_name+ ": Estimate about "+str(time_est)+" minutes left " +" [{0}] {1:.1f}%".format("#" * block + "-" * (bar_length - block),progress * 100)
    
    print(text)

def pipeline_splitting_rule(val_size = 0.2, test_size = 0.2, random_state = 13):
    """Setup percentage of train, validate, and test of each pipeline's dataset in Pipeline Cluster Traversal Experiments.
    
    Parameters
    ----------
    val_size : float, default = None
        Value within [0~1]. Percentage of validate data.
    test_size : float, default = None
        Value within [0~1]. Percentage of test data.
    random_state : int, default = 13
        Random state value.
    Returns
    -------
    Deliver the percentage values to splitting tool function.

    """
    custom_val_size,custom_size,custom_random_state = val_size, test_size, random_state
    return(custom_val_size,custom_size,custom_random_state)

[docs]def data_splitting_tool(feature_cols = None, label_col = None ,val_size = 0.2, test_size = 0.2, random_state = 13): """Splitting each pipeline's dataset into train, validate, and test parts for Pipeline Cluster Traversal Experiments. NOTE: When in_pipeline = "True", this function will be built-in function in autoPipe module. So it needs to use pipeline_splitting_rule() to setup splitting rule. Parameters ---------- label_col : array/df, default = None Column of label. feature_cols : df, default = None Feature columns. val_size : float, default = 0.2 Value within [0~1]. Percentage of validate data. NOTE - When val_size with no input value will not return X_val & y_val test_size : float, default = 0.2 Value within [0~1]. Percentage of test data. random_state : int, default = 13 Random state value. Returns ------- X_train : array Train features dataset y_train : array Train label dataset X_val : array Validate features datset y_val : array Validate label dataset X_test : array Test features dataset y_test : array Test label dataset """ if (val_size != ''): total_test_size = val_size + test_size test_ratio = test_size/total_test_size X_train, X_test, y_train, y_test = train_test_split(feature_cols, label_col, test_size = total_test_size, random_state=random_state) X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=test_ratio, random_state=random_state) return(X_train, y_train, X_val,y_val, X_test, y_test) else: X_train, X_test, y_train, y_test = train_test_split(feature_cols, label_col, test_size = test_size, random_state=random_state) return(X_train, y_train, X_test, y_test)
[docs]def reset_parameters(): """Reset autoCV estimators hyperparameters and searching range to default values. Parameters ---------- None Returns ------- None Example ------- .. [Example]: https://optimal-flow.readthedocs.io/en/latest/demos.html#custom-estimators-parameters-setting-for-for-autocv """ try: json_p = os.path.join(os.path.dirname(__file__), 'reset_parameters.json') with open(json_p,'r') as d_file: para = json.load(d_file) json_p = os.path.join(os.path.dirname(__file__), 'parameters.json') w_file = open(json_p, "w",encoding='utf-8') w_file. truncate(0) json.dump(para, w_file) w_file.close() print('Done with the parameters reset.') except: print('Failed to reset the parameters.')
[docs]def update_parameters(mode = str(None), estimator_name = str(None), **kwargs): """Update autoCV estimators hyperparameters and searching range to custom values. NOTE: One line of command could only update one estimator. Parameters ---------- mode : str, default = None Value in ["cls","reg"]. "cls" will modify classification estimators; "reg" will modify regression estimators. estimator_name : str, default = None Name of estimator. **kwargs : list, default = None Lists of values using comma splitting, i.e. C=[0.1,0.2],kernel=["linear"]. Returns ------- None Example ------- .. [Example]: https://optimal-flow.readthedocs.io/en/latest/demos.html#custom-estimators-parameters-setting-for-for-autocv """ try: json_p = os.path.join(os.path.dirname(__file__), 'parameters.json') with open(json_p,'r',encoding='utf-8') as d_file: para = json.load(d_file) print(f"Previous Parameters are: {para[mode][estimator_name]}") para[mode][estimator_name] = kwargs print(f"Current Parameters are updated as: {para[mode][estimator_name]}") json_p = os.path.join(os.path.dirname(__file__), 'parameters.json') w_file = open(json_p, "w",encoding='utf-8') json.dump(para, w_file) w_file.close() print('Done with the parameters update.') except: print('Failed to update the parameters.')
[docs]def export_parameters(): """Export current autoCV estimators hyperparameters and searching range to current work dictionary. Parameters ---------- None Returns ------- None Example ------- .. [Example]: https://optimal-flow.readthedocs.io/en/latest/demos.html#custom-estimators-parameters-setting-for-for-autocv """ exp_folder = os.path.join(os.getcwd(),'exported') if not os.path.exists(exp_folder): os.makedirs(exp_folder) try: json_p = os.path.join(os.path.dirname(__file__), 'parameters.json') with open(json_p,"r") as d_file: para = json.load(d_file) para_pd = pd.json_normalize(para["cls"]) para_pd.to_csv(os.path.join(exp_folder,"exported_cls_parameters.csv"),index = False) para_pd = pd.json_normalize(para["reg"]) para_pd.to_csv(os.path.join(exp_folder,"exported_reg_parameters.csv"),index = False) print('Done with the parameters setting file export.') except: print('Failed to export the parameters file.')
import pickle def save_obj(obj, name ): with open(name + '.pkl', 'wb') as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) def load_obj(name ): with open(name + '.pkl', 'rb') as f: return pickle.load(f)