In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.metrics import r2_score, mean_absolute_error
from lightgbm import LGBMRegressor, early_stopping, log_evaluation
import joblib

# ========== STEP 1: LOAD DATA ==========
df = pd.read_csv("battery_discharge.csv")

# ========== STEP 2: FEATURE ENGINEERING ==========
df['Cycle_squared'] = df['Cycle']**2
df['Cycle_sqrt'] = np.sqrt(df['Cycle'])
df['Voltage_Current'] = df['Voltage_V'] * df['Current_A']
df['Temp_Current'] = df['Temperature_C'] * df['Current_A']

X = df[['Cycle', 'Cycle_squared', 'Cycle_sqrt', 'Current_A', 'Voltage_V',
        'Temperature_C', 'Voltage_Current', 'Temp_Current']]
y = df['Discharge_Capacity_mAh']

# Save features to CSV
X.to_csv("ajaytestaa.csv", index=False)
print("Feature CSV saved successfully!")

# ========== STEP 3: SPLIT DATA ==========
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ========== STEP 4: SCALE & FEATURE SELECTION ==========
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

selector = SelectKBest(score_func=f_regression, k='all')  # keep all features
X_train_sel = selector.fit_transform(X_train_scaled, y_train)
X_test_sel = selector.transform(X_test_scaled)

# ========== STEP 5: TRAIN LIGHTGBM ==========
lgb = LGBMRegressor(
    n_estimators=3000,
    learning_rate=0.01,
    max_depth=10,
    num_leaves=64,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1
)

lgb.fit(
    X_train_sel, y_train,
    callbacks=[early_stopping(stopping_rounds=100), log_evaluation(0)],
    eval_set=[(X_test_sel, y_test)]
)

# ========== STEP 6: SAVE MODEL ==========
joblib.dump(lgb, "battery_model_new.pkl")
print("Model saved successfully as 'battery_model_new.pkl'")

# ========== STEP 7: EVALUATE ==========
y_pred_train = lgb.predict(X_train_sel)
y_pred_test = lgb.predict(X_test_sel)

print("Train R²:", r2_score(y_train, y_pred_train))
print("Test R²:", r2_score(y_test, y_pred_test))
print("Train MAE:", mean_absolute_error(y_train, y_pred_train))
print("Test MAE:", mean_absolute_error(y_test, y_pred_test))

# ========== STEP 8: MANUAL CHECK ==========
row_index = 10  # choose any row
X_manual = X_test_sel[row_index].reshape(1, -1)  # reshape for single prediction
y_actual = y_test.iloc[row_index]

y_pred_manual = lgb.predict(X_manual)

print("Manual Check X_manual:", X_manual)
print(f"Actual Capacity: {y_actual:.2f} mAh")
print(f"Predicted Capacity: {y_pred_manual[0]:.2f} mAh")

# ========== STEP 9: LOAD MODEL (if needed later) ==========
model = joblib.load("battery_model_new.pkl")
y_pred_loaded_model = model.predict(X_test_sel)
print("Prediction from loaded model successful!")


Feature CSV saved successfully!
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000362 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 8
[LightGBM] [Info] Start training from score 1111.072116
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[409]	valid_0's l2: 22887.6
Model saved successfully as 'battery_model_new.pkl'
Train R²: 0.9391516532554025
Test R²: 0.9198174902428672
Train MAE: 111.14388459199697
Test MAE: 126.18576364000357
Manual Check X_manual: [[-0.70203042 -0.82232002 -0.52074382  0.62645384  1.1098124   0.79754007
   0.75781969  1.02351565]]
Actual Capacity: 1422.54 mAh
Predicted Capacity: 1446.41 mAh
Prediction from loaded model successful!


