In [1]:
# pip install FLAML

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from flaml import AutoML
from sklearn.metrics import mean_squared_error  

# Load dataset
data = pd.read_csv("data.csv")

# Select numeric columns only
numeric_data = data.select_dtypes(include=['number'])

# Rename columns to remove special characters before dropping columns 
numeric_data = numeric_data.rename(columns=lambda x: x.replace('[', '').replace(']', '').replace('<', '').replace(' ', '_'))

# Split data chronologically (80% training, 20% testing)
train_size = int(0.8 * len(numeric_data))
train_data = numeric_data[:train_size]
test_data = numeric_data[train_size:]

# Target column (Make sure the name matches after renaming)
target_column = 'Day-ahead_prices_Germany/Luxembourg_€/MWh_Original_resolutions'

# Prepare features and target
features_train = train_data.drop(columns=[target_column])
target_train = train_data[target_column]

features_test = test_data.drop(columns=[target_column])
target_test = test_data[target_column]

# Initialize AutoML 
automl = AutoML()

# Configure AutoML settings
automl_settings = {
    "time_budget": 180,  # Total time in seconds for AutoML
    "metric": "rmse",  # Target metric
    "task": "regression",  # Task type
    "log_file_name": "automl.log",  # Log file
    "estimator_list": ["catboost", "lgbm", "rf", "xgboost"],  # Model list
    "n_jobs": -1  # Use all available CPUs
}

# Run FLAML AutoML
automl.fit(features_train, target_train, **automl_settings)

# Print the best model and its parameters
print("Best Model:", automl.model)  
print("Best Model Hyperparameters:", automl.best_config) 

# Make predictions with the best model
predictions = automl.predict(features_test)

# Evaluate the model
mse = mean_squared_error(target_test, predictions)
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')
