In [13]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from xgboost import XGBRegressor
import joblib
import os

# ============================
# 1. Load dataset
# ============================
df = pd.read_csv(r"C:\Users\User\Desktop\solar_code\Task\plant1\Plant1_Merged_Final.csv")

# Ensure datetime format
df['DATE_TIME'] = pd.to_datetime(df['DATE_TIME'])

# Time features
df['hour'] = df['DATE_TIME'].dt.hour
df['minute'] = df['DATE_TIME'].dt.minute
df['time_fraction'] = df['hour'] + df['minute']/60

# Cyclical encoding
df['sin_time'] = np.sin(2 * np.pi * df['time_fraction'] / 24)
df['cos_time'] = np.cos(2 * np.pi * df['time_fraction'] / 24)

# âœ… Keep only selected features
selected_features = ['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION', 'sin_time', 'cos_time']
X = df[selected_features]
y = df['AC_POWER'].values  # Prediction target

# ============================
# 2. Create sliding windows
# ============================
sequence_length = 6  # past 6 time steps

X_seq, y_seq = [], []
for i in range(len(X) - sequence_length):
    X_seq.append(X.iloc[i:i+sequence_length].values)
    y_seq.append(y[i+sequence_length])

X_seq = np.array(X_seq)   # shape: (samples, timesteps, features)
y_seq = np.array(y_seq)

# ============================
# 3. Scale X only
# ============================
scaler_X = MinMaxScaler()
X_flat = X_seq.reshape(-1, X_seq.shape[-1])  # flatten
X_scaled = scaler_X.fit_transform(X_flat).reshape(X_seq.shape)

# ============================
# 4. Train/Test Split
# ============================
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_seq, test_size=0.2, random_state=42)

# Flatten for XGBoost (2D input)
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

# ============================
# 5. Train XGBoost
# ============================
xgb_model = XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    random_state=42
)

print("ðŸš€ Training XGBoost Model...")
xgb_model.fit(X_train_flat, y_train)

# ============================
# 6. Evaluate Model
# ============================
y_pred = xgb_model.predict(X_test_flat)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("\nðŸ“Š Evaluation Metrics (XGBoost-Only):")
print(f"MSE  : {mse:.2f}")
print(f"RMSE : {rmse:.2f}")
print(f"RÂ²   : {r2:.4f}")
print(f"MAE  : {mae:.2f}")

# ============================
# 7. Save the Model and Scaler
# ============================
save_dir = r"C:\Users\User\Desktop\solar_code\Task\plant1\modxg_simple"
os.makedirs(save_dir, exist_ok=True)

xgb_model.save_model(os.path.join(save_dir, "xgb_model_simple.json"))
joblib.dump(scaler_X, os.path.join(save_dir, "scaler_X.pkl"))
print("âœ… Model and Scaler saved successfully!")


ðŸš€ Training XGBoost Model...

ðŸ“Š Evaluation Metrics (XGBoost-Only):
MSE  : 5547406.88
RMSE : 2355.29
RÂ²   : 0.9223
MAE  : 1157.17
âœ… Model and Scaler saved successfully!


In [15]:
import pandas as pd
import numpy as np
import joblib
from xgboost import XGBRegressor

# ============================
# 1. Load Saved Model and Scaler
# ============================
model_path = r"C:\Users\User\Desktop\solar_code\Task\plant1\modxg_simple\xgb_model_simple.json"
scaler_path = r"C:\Users\User\Desktop\solar_code\Task\plant1\modxg_simple\scaler_X.pkl"

xgb_model = XGBRegressor()
xgb_model.load_model(model_path)
scaler_X = joblib.load(scaler_path)

# ============================
# 2. Preprocessing Function
# ============================
def preprocess_input_for_inference(df_input, sequence_length=6):
    """
    df_input must include columns:
    [DATE_TIME, AMBIENT_TEMPERATURE, MODULE_TEMPERATURE, IRRADIATION]
    """

    df_input['DATE_TIME'] = pd.to_datetime(df_input['DATE_TIME'])

    # Extract time features
    df_input['hour'] = df_input['DATE_TIME'].dt.hour
    df_input['minute'] = df_input['DATE_TIME'].dt.minute
    df_input['time_fraction'] = df_input['hour'] + df_input['minute'] / 60.0

    # Cyclical time encoding
    df_input['sin_time'] = np.sin(2 * np.pi * df_input['time_fraction'] / 24)
    df_input['cos_time'] = np.cos(2 * np.pi * df_input['time_fraction'] / 24)

    # Use only the selected 5 simplified features
    selected_cols = ['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION', 'sin_time', 'cos_time']
    df_input = df_input[selected_cols]

    # Ensure we have enough rows
    if len(df_input) < sequence_length:
        raise ValueError(f"Need at least {sequence_length} rows of data for prediction")

    # Create sliding window (last sequence_length rows)
    X_seq = [df_input.iloc[-sequence_length:].values]
    X_seq = np.array(X_seq)  # shape: (1, timesteps, features)

    # Scale
    X_flat = X_seq.reshape(-1, X_seq.shape[-1])
    X_scaled = scaler_X.transform(X_flat).reshape(X_seq.shape)

    # Flatten to 2D for XGBoost
    X_scaled_flat = X_scaled.reshape(X_scaled.shape[0], -1)

    return X_scaled_flat

# ============================
# 3. Prediction Function
# ============================
def predict_next_15min_ac_power(new_data):
    """
    new_data: pandas DataFrame with latest 6 rows
    """
    X_processed = preprocess_input_for_inference(new_data)
    prediction = xgb_model.predict(X_processed)
    return prediction[0]  # return single value

# ============================
# 4. Example Usage
# ============================
sample_data = pd.DataFrame({
    "DATE_TIME": [
        "2020-05-15 06:00:00",
        "2020-05-15 06:15:00",
        "2020-05-15 06:30:00",
        "2020-05-15 06:45:00",
        "2020-05-15 07:00:00",
        "2020-05-15 07:15:00",
    ],
    "AMBIENT_TEMPERATURE": [24.08, 24.01, 23.97, 24.21, 24.53, 24.82],
    "MODULE_TEMPERATURE": [22.20, 22.35, 22.89, 24.44, 27.19, 28.88],
    "IRRADIATION": [0.0058, 0.0223, 0.0494, 0.0954, 0.1419, 0.1547]
})

predicted_power = predict_next_15min_ac_power(sample_data)
print(f"ðŸ”® Predicted AC Power for Next 15 Minutes: {predicted_power:.2f} kW")


ðŸ”® Predicted AC Power for Next 15 Minutes: 4825.71 kW
