<a href="https://colab.research.google.com/github/vamckrishnaaa01/final-year-project/blob/main/final_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ================================================================
# RANDOM FOREST + POWER-LAW REGRESSION + GENETIC ALGORITHM
# Optimization of Welding Parameters for Target UTS Range
# ================================================================

import numpy as np
import pandas as pd
import random, math

# ---------- Load dataset ----------
try:
    from google.colab import files
    uploaded = files.upload()  # Upload your dataset: UTS_150_experiments.xlsx
    file_name = list(uploaded.keys())[0]
except:
    file_name = "UTS_150_experiments.xlsx"

df = pd.read_excel(file_name).rename(columns={
    'PeakPower_kW': 'PP',
    'Frequency_Hz': 'CF',
    'Speed_mm_s': 'CS',
    'Predicted_UTS_MPa': 'UTS'
})

df = df[['PP','CF','CS','UTS']].dropna()
df = df[(df > 0).all(axis=1)].reset_index(drop=True)

# ---------- Random Forest Model ----------
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

X = df[['PP','CF','CS']]
y = df['UTS']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

rf = RandomForestRegressor(n_estimators=600, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)

rf_r2 = r2_score(y_test, rf.predict(X_test))
rf_mae = mean_absolute_error(y_test, rf.predict(X_test))

print(f"\n[RANDOM FOREST PERFORMANCE]")
print(f"R² Score  = {rf_r2:.4f}")
print(f"MAE       = {rf_mae:.3f} MPa")

print("\n[MODEL INTERPRETATION]")
print("Expected: R² ≈ 0.90–0.95 → Model explains 90–95% variation in UTS.")
print(f"Your Model R²: {rf_r2:.4f}")
if rf_r2 >= 0.90:
    print("✅ Interpretation: Excellent predictive model.")
else:
    print("⚠️ Model may require parameter tuning or additional features.")

# ---------- Power-Law Fit (From RF Predictions) ----------
from sklearn.linear_model import LinearRegression

rf_pred_all = rf.predict(X)
LX = np.log(df[['PP','CF','CS']])
LY = np.log(rf_pred_all)
lin = LinearRegression().fit(LX, LY)

a, b, c = lin.coef_
C = np.exp(lin.intercept_)

print("\n[POWER-LAW REGRESSION EQUATION]")
print(f"UTS ≈ {C:.3f} × (PP)^{a:.3f} × (CF)^{b:.3f} × (CS)^{c:.3f}")

def uts_powerlaw(PP, CF, CS):
    return C * (PP**a) * (CF**b) * (CS**c)

# ---------- GA Constraints ----------
UTS_MIN, UTS_MAX = 620, 660
PP_MIN, PP_MAX = 1.650, 1.800
CF_MIN, CF_MAX = df['CF'].min(), df['CF'].max()
CS_MIN, CS_MAX = df['CS'].min(), df['CS'].max()

bounds = [(PP_MIN, PP_MAX), (CF_MIN, CF_MAX), (CS_MIN, CS_MAX)]

def clip(ind):
    return [min(max(ind[i], bounds[i][0]), bounds[i][1]) for i in range(3)]

def fitness(ind):
    PP, CF, CS = ind
    if PP < PP_MIN or PP > PP_MAX:
        return -1e12
    uts = uts_powerlaw(PP, CF, CS)
    return uts if UTS_MIN <= uts <= UTS_MAX else -1e12

# ---------- GA Initialization ----------
POP = 80
GEN = 120
random.seed(42)

def rand_ind():
    return [random.uniform(*bounds[i]) for i in range(3)]

pop = [rand_ind() for _ in range(POP)]

def crossover(p1,p2):
    return clip([(p1[i]+p2[i])/2 for i in range(3)])

def mutate(ind, rate=0.12):
    out = ind[:]
    spans = [PP_MAX-PP_MIN, CF_MAX-CF_MIN, CS_MAX-CS_MIN]
    for i in range(3):
        if random.random() < 0.5:
            out[i] += random.gauss(0, spans[i]*rate)
    return clip(out)

# ---------- Run GA ----------
for g in range(GEN):
    scored = [(fitness(ind), ind) for ind in pop]
    scored.sort(reverse=True, key=lambda x: x[0])
    best_fit, best_ind = scored[0]

    new_pop = [best_ind]
    top_k = max(10, POP//3)

    while len(new_pop) < POP:
        p1 = random.choice(scored[:top_k])[1]
        p2 = random.choice(scored[:top_k])[1]
        child = mutate(crossover(p1,p2))
        new_pop.append(child)

    pop = new_pop





    if g % 10 == 0:
        print(f"Gen {g:3d} | Best UTS = {best_fit:.3f} MPa")

# ---------- Final Top-1 Solution ----------
best_fit, (PP_opt, CF_opt, CS_opt) = scored[0]

print("\n================== OPTIMAL WELDING PARAMETERS ==================")
print(f"Optimal PP = {PP_opt:.4f} kW")
print(f"Optimal CF = {CF_opt:.4f} Hz")
print(f"Optimal CS = {CS_opt:.4f} mm/s")
print(f"Predicted UTS = {best_fit:.3f} MPa (within 620–660 range ✅)")
print("================================================================")

Saving UTS_150_experiments.xlsx to UTS_150_experiments.xlsx

[RANDOM FOREST PERFORMANCE]
R² Score  = 0.9240
MAE       = 6.574 MPa

[MODEL INTERPRETATION]
Expected: R² ≈ 0.90–0.95 → Model explains 90–95% variation in UTS.
Your Model R²: 0.9240
✅ Interpretation: Excellent predictive model.

[POWER-LAW REGRESSION EQUATION]
UTS ≈ 315.767 × (PP)^0.370 × (CF)^0.078 × (CS)^0.066
Gen   0 | Best UTS = 659.853 MPa
Gen  10 | Best UTS = 659.991 MPa
Gen  20 | Best UTS = 659.991 MPa
Gen  30 | Best UTS = 659.991 MPa
Gen  40 | Best UTS = 659.991 MPa
Gen  50 | Best UTS = 659.995 MPa
Gen  60 | Best UTS = 659.995 MPa
Gen  70 | Best UTS = 659.995 MPa
Gen  80 | Best UTS = 659.995 MPa
Gen  90 | Best UTS = 660.000 MPa
Gen 100 | Best UTS = 660.000 MPa
Gen 110 | Best UTS = 660.000 MPa

Optimal PP = 1.6802 kW
Optimal CF = 120.1275 Hz
Optimal CS = 13.3957 mm/s
Predicted UTS = 660.000 MPa (within 620–660 range ✅)


In [None]:
# ================================================================
# RANDOM FOREST + POWER-LAW REGRESSION + GENETIC ALGORITHM
# Final Output: TWO BEST DISTINCT OPTIMAL SOLUTIONS
# ================================================================

import numpy as np
import pandas as pd
import random

# ---------- Load dataset ----------
try:
    from google.colab import files
    uploaded = files.upload()
    file_name = list(uploaded.keys())[0]
except:
    file_name = "UTS_150_experiments.xlsx"

df = pd.read_excel(file_name).rename(columns={
    'PeakPower_kW': 'PP',
    'Frequency_Hz': 'CF',
    'Speed_mm_s': 'CS',
    'Predicted_UTS_MPa': 'UTS'
})

df = df[['PP','CF','CS','UTS']].dropna()
df = df[(df > 0).all(axis=1)].reset_index(drop=True)

# ---------- RANDOM FOREST ----------
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

X = df[['PP','CF','CS']]
y = df['UTS']

Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.25, random_state=42)

rf = RandomForestRegressor(n_estimators=600, random_state=42, n_jobs=-1)
rf.fit(Xtr, ytr)

rf_r2 = r2_score(yte, rf.predict(Xte))
rf_mae = mean_absolute_error(yte, rf.predict(Xte))

print("\n[RANDOM FOREST PERFORMANCE]")
print(f"R² Score = {rf_r2:.4f}")
print("Expected: R² ≈ 0.90–0.95 → 90–95% variance explained")
print(f"MAE = {rf_mae:.2f} MPa")

if rf_r2 >= 0.90:
    print("✅ Model quality: Excellent")
else:
    print("⚠️ Model quality: Needs tuning")

# ---------- POWER-LAW REGRESSION ----------
from sklearn.linear_model import LinearRegression

rf_pred = rf.predict(X)
LX = np.log(df[['PP','CF','CS']])
LY = np.log(rf_pred)

lin = LinearRegression().fit(LX, LY)
a, b, c = lin.coef_
C = np.exp(lin.intercept_)

print("\n[POWER-LAW REGRESSION EQUATION]")
print(f"UTS ≈ {C:.2f} × (PP)^{a:.3f} × (CF)^{b:.3f} × (CS)^{c:.3f}")

def uts_powerlaw(PP, CF, CS):
    return C * (PP**a) * (CF**b) * (CS**c)

# ---------- GA CONSTRAINTS ----------
UTS_MIN, UTS_MAX = 620, 660
PP_MIN, PP_MAX = 1.650, 1.800
CF_MIN, CF_MAX = df['CF'].min(), df['CF'].max()
CS_MIN, CS_MAX = df['CS'].min(), df['CS'].max()

bounds = [(PP_MIN,PP_MAX),(CF_MIN,CF_MAX),(CS_MIN,CS_MAX)]

def clip(ind):
    return [
        min(max(ind[0],PP_MIN),PP_MAX),
        min(max(ind[1],CF_MIN),CF_MAX),
        min(max(ind[2],CS_MIN),CS_MAX)
    ]

def fitness(ind):
    uts = uts_powerlaw(*ind)
    return uts if (UTS_MIN <= uts <= UTS_MAX) else -1e12

# ---------- GA OPERATORS ----------
POP, GEN = 80, 120
random.seed(42)

def rand_ind():
    return [random.uniform(*bounds[i]) for i in range(3)]

def crossover(p1,p2):
    return clip([(p1[i]+p2[i])/2 for i in range(3)])

def mutate(ind, rate=0.12):
    spans = [PP_MAX-PP_MIN, CF_MAX-CF_MIN, CS_MAX-CS_MIN]
    out = ind[:]
    for i in range(3):
        if random.random() < 0.5:
            out[i] += random.gauss(0, spans[i]*rate)
    return clip(out)

# ---------- RUN GA ----------
pop = [rand_ind() for _ in range(POP)]

for g in range(GEN):
    scored = [(fitness(ind), ind) for ind in pop]
    scored.sort(reverse=True, key=lambda x:x[0])
    best = scored[0]

    new_pop = [best[1]]
    top_k = max(10, POP//3)

    while len(new_pop) < POP:
        p1 = random.choice(scored[:top_k])[1]
        p2 = random.choice(scored[:top_k])[1]
        new_pop.append(mutate(crossover(p1,p2)))

    pop = new_pop

# ---------- SELECT TWO BEST DISTINCT SOLUTIONS ----------
scored = [(fitness(ind), ind) for ind in pop]
scored.sort(reverse=True, key=lambda x:x[0])

solutions = []
DIST_THRESHOLD = 0.5

for fit, ind in scored:
    if fit < 0:
        continue
    if not solutions:
        solutions.append((fit, ind))
    else:
        if np.linalg.norm(np.array(ind) - np.array(solutions[0][1])) > DIST_THRESHOLD:
            solutions.append((fit, ind))
    if len(solutions) == 2:
        break

# ---------- FINAL OUTPUT ----------
print("\n================== FINAL TWO DISTINCT OPTIMAL SOLUTIONS ==================")
for i, (uts,(pp,cf,cs)) in enumerate(solutions,1):
    print(f"{i}) UTS = {uts:.3f} MPa | PP={pp:.4f} kW | CF={cf:.2f} Hz | CS={cs:.2f} mm/s")
print("============================================================================")


Saving UTS_150_experiments.xlsx to UTS_150_experiments.xlsx

[RANDOM FOREST PERFORMANCE]
R² Score = 0.9240
Expected: R² ≈ 0.90–0.95 → 90–95% variance explained
MAE = 6.57 MPa
✅ Model quality: Excellent

[POWER-LAW REGRESSION EQUATION]
UTS ≈ 315.77 × (PP)^0.370 × (CF)^0.078 × (CS)^0.066

1) UTS = 660.000 MPa | PP=1.6802 kW | CF=120.13 Hz | CS=13.40 mm/s
2) UTS = 659.830 MPa | PP=1.6641 kW | CF=114.23 Hz | CS=14.94 mm/s
