Tuning Hyperparameters with Optuna

import pandas as pd import numpy as np import optuna from sklearn import compose from sklearn import impute from sklearn import metrics from sklearn import model_selection from sklearn import pipeline from sklearn import preprocessing import xgboost as xgb import catboost as cat # This is nice handy constant to turn on and off the GPU. When `False` # the notebook will ignore the GPU even when present. GPU_ENABLED = True

train = pd.read_csv("train.csv").sample(frac=0.10, random_state=42) cont_features = [f for f in train.columns.tolist() if f.startswith('cont')] cat_features = [f for f in train.columns.tolist() if f.startswith('cat')] y = train.target X = train

numerical_preprocessor = pipeline.Pipeline(steps=[ ("imputer", impute.SimpleImputer(strategy="mean")), ("scaler", preprocessing.MinMaxScaler()) ]) categorical_preprocessor = pipeline.Pipeline(steps=[ ("imputer", impute.SimpleImputer(strategy="most_frequent")), ("ordinal", preprocessing.OrdinalEncoder()) ]) preprocessor = compose.ColumnTransformer( transformers=[ ("numerical_preprocessor", numerical_preprocessor, cont_features), ("categorical_preprocessor", categorical_preprocessor, cat_features) ] )

def train_model_for_study(X, y, model): X_train, X_valid, y_train, y_valid = model_selection.train_test_split( X, y, test_size=0.20, random_state=42 ) X_train = preprocessor.fit_transform(X_train, y_train) X_valid = preprocessor.transform(X_valid) model.fit( X_train, y_train, early_stopping_rounds=300, eval_set=[(X_valid, y_valid)], verbose=False ) yhat = model.predict(X_valid) return metrics.mean_squared_error(y_valid, yhat, squared=False)

def objective_xgb(trial): """ Objective function to tune an `XGBRegressor` model. """ params = { 'n_estimators': trial.suggest_int("n_estimators", 1000, 10000), 'reg_alpha': trial.suggest_loguniform("reg_alpha", 1e-8, 100.0), 'reg_lambda': trial.suggest_loguniform("reg_lambda", 1e-8, 100.0), "subsample": trial.suggest_float("subsample", 0.5, 1.0, step=0.1), "learning_rate": trial.suggest_float("learning_rate", 0.01, 1.0, log=True), 'max_depth': trial.suggest_int("max_depth", 2, 9), 'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 1.0), } if GPU_ENABLED: params["tree_method"] = "gpu_hist" params["predictor"] = "gpu_predictor" model = xgb.XGBRegressor( booster="gbtree", objective="reg:squarederror", random_state=42, **params ) return train_model_for_study(X, y, model)

def objective_cat(trial): """ Objective function to tune a `CatBoostRegressor` model. """ params = { 'iterations':trial.suggest_int("iterations", 4000, 25000), 'od_wait':trial.suggest_int('od_wait', 500, 2300), 'learning_rate' : trial.suggest_uniform('learning_rate',0.01, 1), 'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100), 'subsample': trial.suggest_uniform('subsample',0,1), 'random_strength': trial.suggest_uniform('random_strength',10,50), 'depth': trial.suggest_int('depth',1, 15), 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf',1,30), 'leaf_estimation_iterations': trial.suggest_int('leaf_estimation_iterations',1,15), } if GPU_ENABLED: params["task_type"] = "GPU" params["bootstrap_type"] = "Poisson" model = cat.CatBoostRegressor( loss_function="RMSE", random_state=42, **params, ) return train_model_for_study(X, y, model)

study_xgb = optuna.create_study(direction="minimize") study_xgb.optimize(objective_xgb, n_trials=5) study_xgb.best_params

study_cat = optuna.create_study(direction="minimize") study_cat.optimize(objective_cat, n_trials=1) study_cat.best_params

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Tuning Hyperparameters with Optuna

Tuning Hyperparameters with Optuna