

<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->

## Hyperparameter Tuning

Peshbeen provides two powerful tools for hyperparameter tuning:
[`hyperopt_tune`](https://mustafaslanCoto.github.io/peshbeen/modules/model_selection.html#hyperopt_tune)
and
[`optuna_tune`](https://mustafaslanCoto.github.io/peshbeen/modules/model_selection.html#optuna_tune).
These functions allow you to optimize the hyperparameters of your
forecasting models using the `hyperopt` and `optuna` libraries,
respectively. Both functions support cross-validation and can be used
with any of the forecasting models available in peshbeen.

### hyperopt_tune example for univariate forecasting using machine learning models

``` python
from peshbeen.datasets import load_wales_admissions
from peshbeen.metrics import RMSE
from lightgbm import LGBMRegressor
from peshbeen.models import ml_forecaster
from peshbeen.model_selection import hyperopt_tune
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(drop='first', sparse_output=False, handle_unknown="ignore")

wales_admissions = load_wales_admissions()
wales_admissions["day_of_week"] = wales_admissions.index.dayofweek
wales_admissions["month"] = wales_admissions.index.month
# split the data into train and test sets
train = wales_admissions[:-30]
test = wales_admissions[-30:]
cat_variables = ["day_of_week", "month"]
# import linear regression from sklearn
ml_model = ml_forecaster(model=LGBMRegressor(verbose=-1),
              target_col='admissions', lags = 30,
              cat_variables=cat_variables, categorical_encoder=ohe)
ml_model.fit(train)

# Define the hyperparameter search space for LightGBM
from hyperopt import hp
from hyperopt.pyll import scope
lgb_param_space={'learning_rate': hp.uniform('learning_rate', 0.001, 0.6),
            'num_leaves': scope.int(hp.quniform('num_leaves', 10, 200, 1)),
           'max_depth':scope.int(hp.quniform('max_depth', 2, 18, 1)),
            'bagging_fraction': hp.uniform('bagging_fraction', 0.5, 1),
            'feature_fraction': hp.uniform('feature_fraction', 0.5, 1),
            'lambda_l2' : hp.uniform('lambda_l2', 0,10),
           'lambda_l1' : hp.uniform('lambda_l1', 0, 10),
           'top_rate' : hp.quniform('top_rate', 0.05, 0.4, 0.0001),
            'other_rate' : hp.quniform('other_rate', 0.05, 0.3, 0.0001),
           'num_iterations': scope.int(hp.quniform("num_iterations", 30, 700, 1)),
           'lags': hp.choice("lags", [
                                 [1,2,3,4,5],
                                 [1,4,7],
                                 [1,2,3,4,5,6,7],
                                 [1,2,3,4,5,6,7,14],
                                 [1,2,3,4,5,6,7,14,21],
                                 [1,2,3],
                             ]),
                "seed":0,
                "box_cox": hp.uniform("box_cox", 0.0, 4),
                "box_cox_biasadj": hp.choice("box_cox_biasadj", [True, False])}

# Run hyperparameter tuning using hyperopt
best_params, best_lags, other_ = hyperopt_tune(model=ml_model, df=train, cv_split=5, step_size=10,
                                        test_size=1, eval_metric=RMSE, eval_num=10,
                                        param_space=lgb_param_space)

print("Best params:", best_params)
print("Best lags:", best_lags)
print("Other info:", other_)
```

    100%|██████████| 10/10 [00:31<00:00,  3.14s/trial, best loss: 49.93727556542508]
    Best params: {'bagging_fraction': 0.5029432263189173, 'feature_fraction': 0.7154093202769038, 'lambda_l1': 6.845195090636827, 'lambda_l2': 0.15567276670751973, 'learning_rate': 0.13824496021517887, 'max_depth': 13, 'num_iterations': 113, 'num_leaves': 80, 'other_rate': 0.19, 'seed': 0, 'top_rate': 0.0906}
    Best lags: [1, 4, 7]
    Other info: {'box_cox': 0.28901356410178414, 'box_cox_biasadj': False}

``` python
# now we can run our model with the best paramaters, best lags and other info such as box_cox and box_cox_biasadj
best_box_cox = other_["box_cox"]
best_box_cox_biasadj = other_["box_cox_biasadj"]

ml_model = ml_forecaster(model=LGBMRegressor(**best_params, verbose=-1),
              target_col='admissions', lags = list(best_lags), box_cox=best_box_cox, box_cox_biasadj=best_box_cox_biasadj,
              cat_variables=cat_variables, categorical_encoder=ohe)
ml_model.fit(train)
ml_forecasts = ml_model.forecast(H=30, exog=test[cat_variables])
ml_forecasts
```

    array([8907.68714632, 8823.0885695 , 8830.13250324, 8859.95076321,
           8907.68714632, 8922.41827534, 8918.61453165, 8922.41827534,
           8823.0885695 , 8831.40525749, 8859.95076321, 8911.48757609,
           8922.41827534, 8922.41827534, 8922.41827534, 8823.0885695 ,
           8831.40525749, 8859.95076321, 8911.48757609, 8922.41827534,
           8922.41827534, 8922.41827534, 8823.0885695 , 8831.40525749,
           8859.95076321, 8911.48757609, 8922.41827534, 8922.41827534,
           8922.41827534, 8823.0885695 ])

### optuna_tune example for univariate forecasting

``` python
from peshbeen.datasets import load_wales_admissions
from peshbeen.metrics import MAE, RMSE
from lightgbm import LGBMRegressor
from peshbeen.models import ml_forecaster
from peshbeen.model_selection import optuna_tune
wales_admissions = load_wales_admissions()
wales_admissions["day_of_week"] = wales_admissions.index.dayofweek
wales_admissions["month"] = wales_admissions.index.month
# split the data into train and test sets
train = wales_admissions[:-30]
test = wales_admissions[-30:]
cat_variables = ["day_of_week", "month"]
# import linear regression from sklearn
ml_model = ml_forecaster(model=LGBMRegressor(verbose=-1),
              target_col='admissions', lags = 30,
              cat_variables=cat_variables, categorical_encoder=ohe)
ml_model.fit(train)
# ml_model.data_prep(train)
forecasts = ml_model.forecast(H=30, exog=test[cat_variables])
lgb_param_space = {
    "learning_rate":     lambda t: t.suggest_float("learning_rate", 0.001, 0.6),
    "num_leaves":        lambda t: t.suggest_int("num_leaves", 10, 200),
    "max_depth":         lambda t: t.suggest_int("max_depth", 2, 18),
    "bagging_fraction":  lambda t: t.suggest_float("bagging_fraction", 0.5, 1.0),
    "feature_fraction":  lambda t: t.suggest_float("feature_fraction", 0.5, 1.0),
    "lambda_l2":         lambda t: t.suggest_float("lambda_l2", 0.0, 10.0),
    "lambda_l1":         lambda t: t.suggest_float("lambda_l1", 0.0, 10.0),
    "top_rate":          lambda t: t.suggest_float("top_rate", 0.05, 0.4),
    "other_rate":        lambda t: t.suggest_float("other_rate", 0.05, 0.3),
    "num_iterations":    lambda t: t.suggest_int("num_iterations", 30, 700),
    "lags":              lambda t: t.suggest_categorical(
                             "lags", [
                                 [1,2,3,4,5],
                                 [1,4,7],
                                 [1,2,3,4,5,6,7],
                                 [1,2,3,4,5,6,7,14],
                                 [1,2,3,4,5,6,7,14,21],
                                 [1,2,3],
                             ]),
                             
    "seed":              lambda t: 0,   # fixed, not sampled
}

best_params, best_lags, other_ = optuna_tune(
    model=ml_model,
    df=train,
    cv_split=10,
    step_size=10,
    test_size=30,
    eval_metric=RMSE,
    eval_num=10,
    param_space=lgb_param_space, verbose=False
)
print("Best params:", best_params)
print("Best lags:", best_lags)
```

    Best params: {'learning_rate': 0.0931934049554397, 'num_leaves': 77, 'max_depth': 5, 'bagging_fraction': 0.6697979954493452, 'feature_fraction': 0.5137123594504271, 'lambda_l2': 6.647002826173477, 'lambda_l1': 2.039191040991273, 'top_rate': 0.08662205820026357, 'other_rate': 0.13629227826162843, 'num_iterations': 253}
    Best lags: [1, 4, 7]

### Selecting the best ETS (Error, Trend, Seasonality) model usig [`optuna_tune`](https://mustafaslanCoto.github.io/peshbeen/modules/model_selection.html#optuna_tune) or [`hyperopt_tune`](https://mustafaslanCoto.github.io/peshbeen/modules/model_selection.html#hyperopt_tune)

``` python
from peshbeen.models import ets

ets_param_space = {
    "smoothing_level":     lambda t: t.suggest_float("smoothing_level", 0.001, 0.99),
    "trend":              lambda t: t.suggest_categorical(
                             "trend", [
                                 "add",
                                 "mul",
                                 None
                             ]),
    "seasonal":           lambda t: t.suggest_categorical(
                             "seasonal", [
                                 "add",
                                 "mul",
                                 None
                             ]),
    "smoothing_trend":    lambda t: t.suggest_float("smoothing_trend", 0.001, 0.99),
    "smoothing_seasonal": lambda t: t.suggest_float("smoothing_seasonal", 0.001, 0.99),
    "smoothing_level":     lambda t: t.suggest_float("smoothing_level", 0.001, 0.99),
    "seasonal_periods":              lambda t: 7,   # fixed, not sampled
        "box_cox":           lambda t: t.suggest_float("box_cox", 0.0, 4),
        "box_cox_biasadj":   lambda t: t.suggest_categorical("box_cox_biasadj", [True, False])
}

ets_model = ets(target_col='admissions')
best_params, _, other_ = optuna_tune(
    model=ets_model,
    df=train,
    cv_split=4,
    step_size=1,
    test_size=30,
    eval_metric=RMSE,
    eval_num=100,
    param_space=ets_param_space, verbose=False
)
print("Best params:", best_params)
print("Other info:", other_)
```

    Best params: {'smoothing_level': 0.43677960375353486, 'trend': None, 'seasonal': None, 'smoothing_trend': 0.5381861439868231, 'smoothing_seasonal': 0.960836924417792}
    Other info: {'box_cox': 0.005601306732476274, 'box_cox_biasadj': False}

``` python
# now forecast wit the best parameters
best_params.update(other_)
ets_model = ets(target_col='admissions', **best_params)
ets_model.fit(train)
ets_forecasts = ets_model.forecast(H=30)
```

``` python
# get cross validation results with the best parameters
cv_df = ets_model.cross_validate(
    df=train,
    cv_split=5,
        step_size=7,
        test_size=30,
        metrics=[RMSE, MAE])
cv_df.head()
```

<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }
&#10;    .dataframe tbody tr th {
        vertical-align: top;
    }
&#10;    .dataframe thead th {
        text-align: right;
    }
</style>

<table class="dataframe" data-quarto-postprocess="true" data-border="1">
<thead>
<tr style="text-align: right;">
<th data-quarto-table-cell-role="th"></th>
<th data-quarto-table-cell-role="th">cutoff</th>
<th data-quarto-table-cell-role="th">index</th>
<th data-quarto-table-cell-role="th">split</th>
<th data-quarto-table-cell-role="th">y_true</th>
<th data-quarto-table-cell-role="th">y_pred</th>
</tr>
</thead>
<tbody>
<tr>
<td data-quarto-table-cell-role="th">0</td>
<td>2022-12-07</td>
<td>2022-12-07</td>
<td>fold_1</td>
<td>8933</td>
<td>8930.750296</td>
</tr>
<tr>
<td data-quarto-table-cell-role="th">1</td>
<td>2022-12-07</td>
<td>2022-12-08</td>
<td>fold_1</td>
<td>9013</td>
<td>8930.750296</td>
</tr>
<tr>
<td data-quarto-table-cell-role="th">2</td>
<td>2022-12-07</td>
<td>2022-12-09</td>
<td>fold_1</td>
<td>9000</td>
<td>8930.750296</td>
</tr>
<tr>
<td data-quarto-table-cell-role="th">3</td>
<td>2022-12-07</td>
<td>2022-12-10</td>
<td>fold_1</td>
<td>8821</td>
<td>8930.750296</td>
</tr>
<tr>
<td data-quarto-table-cell-role="th">4</td>
<td>2022-12-07</td>
<td>2022-12-11</td>
<td>fold_1</td>
<td>8835</td>
<td>8930.750296</td>
</tr>
</tbody>
</table>

</div>

### Selecting the best orders for an ARIMA model using [`optuna_tune`](https://mustafaslanCoto.github.io/peshbeen/modules/model_selection.html#optuna_tune) or [`hyperopt_tune`](https://mustafaslanCoto.github.io/peshbeen/modules/model_selection.html#hyperopt_tune)

``` python
from peshbeen.models import arima
from itertools import product
# Define the hyperparameter search space for ARIMA
p_values = [0, 1, 2, 3]
d_values = [1]
q_values = [0, 1, 2, 3]

# create the list of orders using the product of p, d and q values
orders = list(product(p_values, d_values, q_values))

# Define the hyperparameter search space for seasonal ARIMA
P_values = [0, 1, 2, 3]
D_values = [0, 1]
Q_values = [0, 1, 2, 3]

# create the list of seasonal orders using the product of P, D and Q values
seasonal_orders = list(product(P_values, D_values, Q_values))

# let's define the hyperparameter space for arima using hyperopt
arima_param_space = {
    "order": hp.choice("order", orders),
    "seasonal_order": hp.choice("seasonal_order", seasonal_orders),
    "seasonal_length": 7,
    "box_cox": hp.uniform("box_cox", 0.0, 4),
    "box_cox_biasadj": hp.choice("box_cox_biasadj", [True, False])
}

arima_model = arima(target_col='admissions')
best_params, _, other_ = hyperopt_tune(
    model=arima_model,
    df=train,
    cv_split=10,
    step_size=10,
    test_size=30,
    eval_metric=RMSE,
    eval_num=5,
    param_space=arima_param_space
)
```

    100%|██████████| 5/5 [00:18<00:00,  3.62s/trial, best loss: 127.49023538536387]

### hyperopt_tune example for multivariate forecasting

``` python
from peshbeen.datasets import load_admission_calls
from peshbeen.models import ml_mv_forecaster
from peshbeen.metrics import RMSE
from peshbeen.model_selection import mv_hyperopt_tune
from lightgbm import LGBMRegressor
admission_calls = load_admission_calls()

admission_calls["day_of_week"] = admission_calls.index.dayofweek
admission_calls["month"] = admission_calls.index.month
train = admission_calls[:-30]
test = admission_calls[-30:]

cat_variables = ["day_of_week", "month"]
mv_model = ml_mv_forecaster(model=LGBMRegressor(verbose=-1),
              target_cols=['admissions', "calls"], lags = {"admissions": 7, "calls": 7},
                cat_variables=cat_variables,
                difference={"admissions": 1, "calls": 1}, categorical_encoder=ohe)
lgb_param_space={'learning_rate': hp.uniform('learning_rate', 0.001, 0.6),
            'num_leaves': scope.int(hp.quniform('num_leaves', 10, 200, 1)),
           'max_depth':scope.int(hp.quniform('max_depth', 2, 18, 1)),
            'bagging_fraction': hp.uniform('bagging_fraction', 0.5, 1),
            'feature_fraction': hp.uniform('feature_fraction', 0.5, 1),
           'min_data_in_leaf': scope.int(hp.quniform ('min_data_in_leaf', 5, 100, 1)), 
            'lambda_l2' : hp.uniform('lambda_l2', 0,10),
           'lambda_l1' : hp.uniform('lambda_l1', 0, 10),
            'other_rate' : hp.quniform('other_rate', 0.05, 0.3, 0.0001),
           'num_iterations': scope.int(hp.quniform("num_iterations", 30, 700, 1)),
           'top_k': scope.int(hp.quniform('top_k', 8, 30, 1)),
                "seed":0, 'lags': hp.choice("lags", [
                                 [1,2,3,4,5],
                                 [1,4,7],
                                 [1,2,3,4,5,6,7],
                                 [1,2,3,4,5,6,7,14]])}
best_params, best_lags = mv_hyperopt_tune(model=mv_model, df=train, target_col= "admissions", cv_split=5, step_size=10,
                                        test_size=30, eval_metric=RMSE, eval_num=4,
                                        param_space=lgb_param_space)
print("Best params:", best_params)
print("Best lags:", best_lags)
```

    100%|██████████| 4/4 [00:06<00:00,  1.55s/trial, best loss: 190.09637007259974]
    Best params: {'bagging_fraction': 0.9705115747734178, 'feature_fraction': 0.8988112612770922, 'lambda_l1': 6.121760222044134, 'lambda_l2': 6.042394421745886, 'learning_rate': 0.03210548837285872, 'max_depth': 4, 'min_data_in_leaf': 49, 'num_iterations': 675, 'num_leaves': 54, 'other_rate': 0.1, 'seed': 0, 'top_k': 11}
    Best lags: {'admissions': [1, 2, 3, 4, 5, 6, 7, 14], 'calls': [1, 2, 3, 4, 5, 6, 7, 14]}

``` python
# forecast with the best parameters and best lags
mv_model = ml_mv_forecaster(model=LGBMRegressor(**best_params, verbose=-1),
              target_cols=['admissions', "calls"], lags = best_lags,
                cat_variables=cat_variables, categorical_encoder=ohe,
                difference={"admissions": 1, "calls": 1})
mv_model.fit(train)
mv_forecasts = mv_model.forecast(H=30, exog=test[cat_variables])
mv_forecasts
```

    {'admissions': array([7986.67713017, 8081.88114259, 8059.62557178, 8026.16860976,
            7964.59237369, 7980.88870033, 7829.90524984, 7897.16738121,
            7932.80929262, 7950.46493542, 7889.34678449, 7969.02022076,
            7981.60303702, 7927.15558785, 7917.00669151, 8014.0660108 ,
            8020.89549505, 7991.55652125, 7957.76526547, 7986.70913661,
            7847.40029963, 7865.74700984, 7939.43590946, 7953.73946396,
            7936.13278677, 7876.93131132, 7843.68727293, 7760.8895935 ,
            7768.2549859 , 7828.12875094]),
     'calls': array([1211.51464049, 1251.67831014, 1230.82133764, 1253.46631829,
            1236.54463867, 1264.14612778, 1256.73102323, 1221.33241993,
            1304.18651463, 1244.84812696, 1297.13484614, 1252.49688932,
            1310.57191578, 1265.12348579, 1314.27678184, 1327.8692606 ,
            1303.78121004, 1282.13538468, 1293.06033736, 1347.61724845,
            1315.09032531, 1287.6588799 , 1338.57230067, 1332.15590366,
            1299.84411988, 1332.92774822, 1315.07865572, 1361.31842638,
            1290.35955218, 1389.62638303])}

### optuna_tune example for multivariate forecasting

``` python
from peshbeen.model_selection import mv_optuna_tune
lgb_param_space = {
    "learning_rate":     lambda t: t.suggest_float("learning_rate", 0.001, 0.6),
    "num_leaves":        lambda t: t.suggest_int("num_leaves", 10, 200),
    "max_depth":         lambda t: t.suggest_int("max_depth", 2, 18),
    "bagging_fraction":  lambda t: t.suggest_float("bagging_fraction", 0.5, 1.0),
    "feature_fraction":  lambda t: t.suggest_float("feature_fraction", 0.5, 1.0),
    "min_data_in_leaf":  lambda t: t.suggest_int("min_data_in_leaf", 5, 100),
    "lambda_l2":         lambda t: t.suggest_float("lambda_l2", 0.0, 10.0),
    "lambda_l1":         lambda t: t.suggest_float("lambda_l1", 0.0, 10.0),
    "other_rate":        lambda t: t.suggest_float("other_rate", 0.05, 0.3),
    "num_iterations":    lambda t: t.suggest_int("num_iterations", 30, 700),
    "top_k":             lambda t: t.suggest_int("top_k", 8, 30),
    "seed":              lambda t: 0,   # fixed, not sampled
    'lags': lambda t: t.suggest_categorical(
                             "lags", [1,2,3,4,5,
                                [1,2,3,4,5],
                                 [1,4,7],
                                 [1,2,3,4,5,6,7],
                                 [1,2,3,4,5,6,7,14]
                             ]),    
}

mv_model = ml_mv_forecaster(model=LGBMRegressor(verbose=-1),
              target_cols=['admissions', "calls"], lags = {"admissions": 7, "calls": 7},
                cat_variables=cat_variables, categorical_encoder=ohe,
                difference={"admissions": 1, "calls": 1})

best_params, best_lags = mv_optuna_tune(model=mv_model, df=train, target_col= "admissions", cv_split=5, step_size=10,
                                        test_size=30, eval_metric=RMSE, eval_num=4,
                                        param_space=lgb_param_space)
```

``` python
# forecast with the best parameters and best lags from optuna
mv_model = ml_mv_forecaster(model=LGBMRegressor(**best_params, verbose=-1),
              target_cols=['admissions', "calls"], lags = best_lags,
                cat_variables=cat_variables, categorical_encoder=ohe,
                difference={"admissions": 1, "calls": 1})
mv_model.fit(train)
mv_forecasts = mv_model.forecast(H=30, exog=test[cat_variables])
mv_forecasts
```

    {'admissions': array([7940.66762439, 7881.92803438, 7791.77565256, 7866.08124233,
            7938.33726931, 8035.56596449, 8137.09078795, 8126.55638373,
            8091.76400973, 8062.14318175, 8058.55705466, 7969.23427203,
            7942.98847788, 7980.40313447, 7895.51834588, 7933.05839914,
            7956.10839523, 7910.70312569, 7878.1254234 , 7867.55723762,
            7883.6632663 , 7924.3468859 , 7774.13404127, 7864.27817844,
            7981.38997893, 7978.90082111, 7945.56147374, 7982.62647032,
            7883.53545531, 7969.81638343]),
     'calls': array([1212.13583496, 1249.72339759, 1223.73746339, 1239.16825294,
            1293.37298791, 1213.482198  , 1185.23595117, 1185.852094  ,
            1196.29572847, 1187.18768749, 1156.71827123, 1177.44517146,
            1070.39445474, 1113.63041424, 1105.43274939, 1048.56267652,
            1138.91228993, 1123.01080934, 1070.61673101, 1190.19638216,
            1127.52013639, 1155.20637945, 1170.83223436, 1120.05401269,
            1218.72976791, 1160.56592482, 1161.19292567, 1182.08058398,
            1082.49086842, 1116.77134188])}
