# # Loading the dataset

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('EcoPackAI_FinalDataset.csv')

# Select Features (X) and Target (y)
X = df[['Strength', 'Weight_Capacity', 'Cost_Per_Unit_INR', 'Biodegradability_Score', 'Recyclability']]
y = df['CO2_Score']

# Split the data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data loaded  successfully.")


Data loaded  successfully.


**Scaling the dataset**

In [17]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Mean of scaled training features:", np.mean(X_train_scaled, axis=0))
print("Std of scaled training features:", np.std(X_train_scaled, axis=0))
print("Data scaled successfully.")

Mean of scaled training features: [ 0.00000000e+00  2.35106052e-16  1.17553026e-16 -6.96610525e-17
  2.61228947e-17]
Std of scaled training features: [1. 1. 1. 1. 1.]
Data scaled successfully.


**Random Forest  for Cost Prediction**

In [18]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Initialize and Train
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Predict and Evaluate
rf_preds = rf_model.predict(X_test_scaled)

print("--- Random Forest Evaluation ---")
print(f"R2 Score: {r2_score(y_test, rf_preds):.4f}")
print(f"MAE:      {mean_absolute_error(y_test, rf_preds):.4f}")
print(f"RMSE:     {np.sqrt(mean_squared_error(y_test, rf_preds)):.4f}")

--- Random Forest Evaluation ---
R2 Score: 0.8487
MAE:      0.7769
RMSE:     0.9482


# XgBoost Training for Co2 Emission

In [19]:
from xgboost import XGBRegressor

# Initialize and Train
# We use a low learning_rate to maintain the RMSE below 1.3
xgb_model = XGBRegressor(n_estimators=150, learning_rate=0.05, max_depth=5, random_state=42)
xgb_model.fit(X_train_scaled, y_train)

# Predict and Evaluate
xgb_preds = xgb_model.predict(X_test_scaled)

print("--- XGBoost Evaluation ---")
print(f"R2 Score: {r2_score(y_test, xgb_preds):.4f}")
print(f"MAE:      {mean_absolute_error(y_test, xgb_preds):.4f}")
print(f"RMSE:     {np.sqrt(mean_squared_error(y_test, xgb_preds)):.4f}")

--- XGBoost Evaluation ---
R2 Score: 0.8342
MAE:      0.7749
RMSE:     0.9924


# # Normalization & Suitability Score

In [20]:
# 1. Define your Min-Max function
def minmax(series):
    return (series - series.min()) / (series.max() - series.min())

# 2. Normalize the specific features used in your formula
# Note: Ensure names match your CSV (e.g., 'Strength' vs 'strength')
df["strength_norm"] = minmax(df["Strength"])
df["recyclability_norm"] = minmax(df["Recyclability"])
df["biodegradability_norm"] = minmax(df["Biodegradability_Score"])

# 3. Calculate Suitability Score using your provided weights
df["suitability_score"] = (
    0.4 * df["strength_norm"] + 
    0.3 * df["recyclability_norm"] + 
    0.3 * df["biodegradability_norm"]
)

print("Suitability score calculated successfully using your formula.")
print(df[["Strength", "Recyclability", "Biodegradability_Score", "suitability_score"]].head())

Suitability score calculated successfully using your formula.
   Strength  Recyclability  Biodegradability_Score  suitability_score
0         1             88                      10           0.600000
1         2             85                       9           0.756202
2         3             82                       8           0.912403
3         1             80                       7           0.472093
4         2             78                       6           0.631783


In [None]:
df["cost_norm"] = minmax(df["Cost_Per_Unit_INR"])

df["co2_norm"] = minmax(df["CO2_Score"]) 
df["cost_score"] = 1 - df["cost_norm"]
df["co2_score_final"] = 1 - df["co2_norm"]


# Final Score

In [22]:
df["final score"]=(
    0.40*df["cost_score"]+
    0.40*df["co2_score_final"]+
    0.20*df["suitability_score"]
)
print("Final score calculated successfully using your formula.")

Final score calculated successfully using your formula.


# Ranking  the Materials

In [23]:
ranked_materials = df.sort_values(by="final score", ascending=False)
print("Materials ranked successfully based on the final score.")

Materials ranked successfully based on the final score.


In [24]:
print(ranked_materials[["Material_Type","cost_score","co2_score_final","suitability_score","final score"]].head(10))

            Material_Type  cost_score  co2_score_final  suitability_score  \
18       Areca Leaf Plate    1.000000         1.000000           0.467442   
19   Water Hyacinth Fiber    0.850103         0.888889           0.627132   
1       Seaweed Packaging    0.778234         0.888889           0.756202   
29        Tea Waste Fiber    0.891170         0.777778           0.724031   
3     Palm Leaf Packaging    0.987680         0.777778           0.472093   
2           Mycelium Foam    0.716632         0.777778           0.912403   
55   Magnesium Alloy Pack    0.837782         0.777778           0.644574   
28  Coffee Husk Composite    0.870637         0.777778           0.564341   
20       Reed Fiber Board    0.737166         0.777778           0.786822   
13      Sugarcane Bagasse    0.952772         0.666667           0.568992   

    final score  
18     0.893488  
19     0.821023  
1      0.818090  
29     0.812385  
3      0.800602  
2      0.780245  
55     0.775139  
28     0

# Ranking the materials and Saving as CSV

In [25]:
ranked_materials.to_csv('ranked_materials.csv', index=False)
print("Ranked materials saved to 'ranked_materials.csv' successfully.")

Ranked materials saved to 'ranked_materials.csv' successfully.


In [26]:
import joblib

# Save trained models and scaler
joblib.dump(rf_model, "rf_model.pkl")
joblib.dump(xgb_model, "xgb_model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("✅ Models and scaler saved successfully.")


✅ Models and scaler saved successfully.


In [27]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

# Function to display accuracy metrics
def check_model_accuracy(model_name, y_actual, y_predicted):
    r2 = r2_score(y_actual, y_predicted)
    mae = mean_absolute_error(y_actual, y_predicted)
    rmse = np.sqrt(mean_squared_error(y_actual, y_predicted))
    
    # Calculate a "Percentage Accuracy" equivalent (1 - Relative Error)
    mape = np.mean(np.abs((y_actual - y_predicted) / y_actual))
    accuracy_pct = (1 - mape) * 100

    print(f"--- {model_name} Accuracy Metrics ---")
    print(f"R2 Score (Fitness):    {r2:.4f} (Goal: Closer to 1.0)")
    print(f"Mean Absolute Error:   {mae:.4f}")
    print(f"RMSE (Root Mean Sq):   {rmse:.4f}")
    print(f"Estimated Accuracy:    {accuracy_pct:.2f}%")
    print("-" * 35)

# Check both models
check_model_accuracy("Random Forest", y_test, rf_preds)
check_model_accuracy("XGBoost", y_test, xgb_preds)

--- Random Forest Accuracy Metrics ---
R2 Score (Fitness):    0.8487 (Goal: Closer to 1.0)
Mean Absolute Error:   0.7769
RMSE (Root Mean Sq):   0.9482
Estimated Accuracy:    86.14%
-----------------------------------
--- XGBoost Accuracy Metrics ---
R2 Score (Fitness):    0.8342 (Goal: Closer to 1.0)
Mean Absolute Error:   0.7749
RMSE (Root Mean Sq):   0.9924
Estimated Accuracy:    85.49%
-----------------------------------


In [29]:
import joblib

# 1. Use the actual name of your trained model (xgb_model)
print(f"Model type being saved: {type(xgb_model)}") 

# 2. Save as 'material_recommender.pkl' for the app.py logic
joblib.dump(xgb_model, 'material_recommender.pkl')

# 3. Save the scaler (Crucial: Your app needs this to scale inputs!)
joblib.dump(scaler, 'scaler.pkl')

print("✅ 'material_recommender.pkl' and 'scaler.pkl' saved successfully!")

Model type being saved: <class 'xgboost.sklearn.XGBRegressor'>
✅ 'material_recommender.pkl' and 'scaler.pkl' saved successfully!
