In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib


In [None]:
df = pd.read_csv("/content/Synthetic_JEE_Mock_Dataset.csv")

In [None]:
df

Unnamed: 0,physics_score,chemistry_score,maths_score,total_score,attempted_questions,correct_questions,wrong_questions,unattempted_questions,negative_marks,accuracy_percent,...,time_chemistry_sec,time_maths_sec,easy_accuracy_percent,medium_accuracy_percent,hard_accuracy_percent,attempt_consistency_score,guessing_tendency_score,speed_variation_score,approx_percentile,approx_rank
0,45,50,50,145,75,44,31,15,31,58.666667,...,3674.571127,3678.732050,73.333333,58.823529,18.181818,97.851526,41.333333,11.600828,63.136426,582444
1,40,0,15,55,75,24,51,15,51,32.000000,...,3374.280442,4197.517399,43.333333,32.352941,0.000000,91.024859,68.000000,16.732951,43.813775,887742
2,0,65,0,65,75,23,52,15,52,30.666667,...,3645.136279,3872.453307,40.000000,26.470588,18.181818,94.536640,69.333333,16.328132,47.694952,826419
3,65,25,15,105,75,36,39,15,39,48.000000,...,3231.687958,4242.056148,46.666667,52.941176,36.363636,90.644739,52.000000,18.298336,56.339175,689841
4,55,0,70,125,75,36,39,15,39,48.000000,...,3695.042127,3764.763057,53.333333,44.117647,45.454545,96.068813,52.000000,11.951774,58.436026,656710
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,60,0,15,75,75,30,45,15,45,40.000000,...,3446.539373,3853.955068,26.666667,47.058824,54.545455,96.227632,60.000000,12.410091,48.606198,812022
9996,35,75,85,195,75,54,21,15,21,72.000000,...,3410.979279,4056.486452,70.000000,76.470588,63.636364,93.296739,28.000000,18.177380,70.732758,462422
9997,0,80,65,145,75,40,35,15,35,53.333333,...,3460.315699,3647.334319,50.000000,58.823529,45.454545,97.851534,46.666667,23.627092,62.169909,597715
9998,25,10,65,100,75,35,40,15,40,46.666667,...,3590.616050,3947.619041,46.666667,50.000000,36.363636,93.649499,53.333333,10.670802,51.349697,768674


In [None]:
feature_cols = [
    'physics_score',
    'chemistry_score',
    'maths_score',
    'total_score',

    'correct_questions',
    'wrong_questions',
    'unattempted_questions',
    'attempted_questions',
    'negative_marks',

    'total_time_seconds',
    'avg_time_per_question_sec'
]

In [None]:
X = df[feature_cols]
y_p = df["approx_percentile"]
y_r = df["approx_rank"]


In [None]:
X_train, X_test, y_train_p, y_test_p, y_train_r, y_test_r = train_test_split(
    X, y_p, y_r, test_size=0.2, random_state=42
)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
percentile_model = GradientBoostingRegressor(random_state=42)
percentile_model.fit(X_train_scaled, y_train_p)


In [None]:
rank_model = GradientBoostingRegressor(random_state=42)
rank_model.fit(X_train_scaled, y_train_r)


In [None]:
p_pred = percentile_model.predict(X_test_scaled)
r_pred = rank_model.predict(X_test_scaled)


In [None]:
print("\n===== Percentile Model Performance (Final) =====")
print("MAE :", mean_absolute_error(y_test_p, p_pred))
print("RMSE:", mean_squared_error(y_test_p, p_pred)**0.5)
print("R2  :", r2_score(y_test_p, p_pred))

print("\n===== Rank Model Performance (Final) =====")
print("MAE :", mean_absolute_error(y_test_r, r_pred))
print("RMSE:", mean_squared_error(y_test_r, r_pred)**0.5)
print("R2  :", r2_score(y_test_r, r_pred))



===== Percentile Model Performance (Final) =====
MAE : 1.2368306392300983
RMSE: 1.5369315667571515
R2  : 0.9837888444794409

===== Rank Model Performance (Final) =====
MAE : 19541.91387676373
RMSE: 24283.51032796988
R2  : 0.9837888538164642


In [None]:
joblib.dump(percentile_model, "Model2_Percentile.pkl")
joblib.dump(rank_model, "Model2_Rank.pkl")
joblib.dump(scaler, "Model2_Scaler.pkl")

print("\nSaved:")
print(" - Model2_Percentile.pkl")
print(" - Model2_Rank.pkl")
print(" - Model2_Scaler.pkl")


Saved:
 - Model2_Percentile.pkl
 - Model2_Rank.pkl
 - Model2_Scaler.pkl


In [None]:
import joblib
import pandas as pd

In [None]:
percentile_model= joblib.load("/content/Model2_Percentile.pkl")
rank_model= joblib.load("/content/Model2_Rank.pkl")
scaler= joblib.load("/content/Model2_Scaler.pkl")

In [None]:
feature_cols = [
    'physics_score',
    'chemistry_score',
    'maths_score',
    'total_score',
    'correct_questions',
    'wrong_questions',
    'unattempted_questions',
    'attempted_questions',
    'negative_marks',
    'total_time_seconds',
    'avg_time_per_question_sec'
]

test_input = {
    'physics_score': 52,
    'chemistry_score': 63,
    'maths_score': 48,
    'total_score': 163,
    'correct_questions': 51,
    'wrong_questions': 19,
    'unattempted_questions': 30,
    'attempted_questions': 70,
    'negative_marks': 19,
    'total_time_seconds': 10150,
    'avg_time_per_question_sec': 145
}

input_df = pd.DataFrame([[test_input[col] for col in feature_cols]],
                        columns=feature_cols)

input_scaled = scaler.transform(input_df)


In [None]:
predicted_percentile = percentile_model.predict(input_scaled)[0]
predicted_rank = rank_model.predict(input_scaled)[0]


In [None]:
print(" Predicted Percentile:", predicted_percentile)
print(" Predicted Rank:", predicted_rank)


 Predicted Percentile: 70.35816363063878
 Predicted Rank: 458440.21985226567
