In [None]:
# train_model.py

# 1. Imports
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor 
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split

# 2. Load the Dataset
try:
    data = pd.read_csv("wind_large_dataset.csv")  
except FileNotFoundError:
    print("Error: 'wind_large_dataset.csv' not found. Check the file path.")
    exit()

# 3. Define Features (X) and Targets (Y)
X_columns = [
    'Wind Speed (m/s)', 
    'Wind Direction (°)', 
    'Temperature (°C)', 
    'Altitude (m)', 
    'Latitude', 
    'Longitude'
]
Y_columns = [
    'Pitch Angle (°)',  
    'Max Output (kW)'
]
X = data[X_columns]
Y = data[Y_columns]

# 4. Split the Data
X_train, _, Y_train, _ = train_test_split(X, Y, test_size=0.2, random_state=42)

# 5. Train the Model
print("Training Multi-Output Random Forest Regressor...")
base_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
multi_target_model = MultiOutputRegressor(base_model)
multi_target_model.fit(X_train, Y_train)
print("Training complete.")

# 6. Save the Model
MODEL_FILENAME = 'multi_output_wind_model.pkl'
joblib.dump(multi_target_model, MODEL_FILENAME)
print(f"✅ Model saved successfully as '{MODEL_FILENAME}'")