from peshbeen.datasets import load_wales_admissions
from peshbeen.metrics import RMSE
from lightgbm import LGBMRegressor
from peshbeen.models import ml_forecaster
from peshbeen.model_selection import hyperopt_tune
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(drop='first', sparse_output=False, handle_unknown="ignore")
wales_admissions = load_wales_admissions()
wales_admissions["day_of_week"] = wales_admissions.index.dayofweek
wales_admissions["month"] = wales_admissions.index.month
# split the data into train and test sets
train = wales_admissions[:-30]
test = wales_admissions[-30:]
cat_variables = ["day_of_week", "month"]
# import linear regression from sklearn
ml_model = ml_forecaster(model=LGBMRegressor(verbose=-1),
target_col='admissions', lags = 30,
cat_variables=cat_variables, categorical_encoder=ohe)
ml_model.fit(train)
# Define the hyperparameter search space for LightGBM
from hyperopt import hp
from hyperopt.pyll import scope
lgb_param_space={'learning_rate': hp.uniform('learning_rate', 0.001, 0.6),
'num_leaves': scope.int(hp.quniform('num_leaves', 10, 200, 1)),
'max_depth':scope.int(hp.quniform('max_depth', 2, 18, 1)),
'bagging_fraction': hp.uniform('bagging_fraction', 0.5, 1),
'feature_fraction': hp.uniform('feature_fraction', 0.5, 1),
'lambda_l2' : hp.uniform('lambda_l2', 0,10),
'lambda_l1' : hp.uniform('lambda_l1', 0, 10),
'top_rate' : hp.quniform('top_rate', 0.05, 0.4, 0.0001),
'other_rate' : hp.quniform('other_rate', 0.05, 0.3, 0.0001),
'num_iterations': scope.int(hp.quniform("num_iterations", 30, 700, 1)),
'lags': hp.choice("lags", [
[1,2,3,4,5],
[1,4,7],
[1,2,3,4,5,6,7],
[1,2,3,4,5,6,7,14],
[1,2,3,4,5,6,7,14,21],
[1,2,3],
]),
"seed":0,
"box_cox": hp.uniform("box_cox", 0.0, 4),
"box_cox_biasadj": hp.choice("box_cox_biasadj", [True, False])}
# Run hyperparameter tuning using hyperopt
best_params, best_lags, other_ = hyperopt_tune(model=ml_model, df=train, cv_split=5, step_size=10,
test_size=1, eval_metric=RMSE, eval_num=10,
param_space=lgb_param_space)
print("Best params:", best_params)
print("Best lags:", best_lags)
print("Other info:", other_)