In [None]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from catboost import CatBoostRegressor
import joblib

In [None]:
def engineer(df):
    df = df.copy()

    def inst_tier(x):
        if "Indian Institute of Technology" in x: return 1
        if "National Institute of Technology" in x: return 2
        if "Indian Institute of Information Technology" in x: return 3
        return 4
    df["institute_tier"] = df["institute"].apply(inst_tier)

    def branch_tier(x):
        x = x.lower()
        if "computer" in x or "ai" in x or "data" in x: return 1
        if "elect" in x: return 2
        if "mechanical" in x or "civil" in x or "chemical" in x: return 3
        return 4
    df["branch_demand"] = df["academic_program_name"].apply(branch_tier)

    df["home_advantage"] = (df["quota"] == "HS").astype(int)

    df["year_norm"] = (df["year"] - df["year"].min()) / (df["year"].max() - df["year"].min())
    df["round_norm"] = df["round"] / df["round"].max()

    return df


In [None]:
model, feature_cols, categorical_idx = joblib.load("/content/COLLEGE_MODEL (1).pkl")
df = pd.read_csv("/content/JoSAA_2019_2024_CLEAN_FINAL.csv")
df = engineer(df)  # your same FE function


In [None]:
# Load model ONCE at the top
model, feature_cols, categorical_idx = joblib.load("/content/COLLEGE_MODEL (1).pkl")

def recommend_colleges_with_ML(
    user_rank,
    seat_type="OPEN",
    gender="Gender-Neutral",
    quota="AI",
    year=2024,
    round_no=5
):
    data = df.copy()

    data = data[(data["year"] == year) & (data["round"] == round_no)]
    data = data[data["gender"] == gender]
    data = data[data["seat_type"] == seat_type]
    data = data[data["quota"] == quota]

    # Remove IITs
    data = data[~data["institute"].str.contains("Indian Institute of Technology")]

    # ML Prediction
    data["predicted_closing_rank"] = model.predict(data[feature_cols])

    eligible = data[data["predicted_closing_rank"] >= user_rank]

    if eligible.empty:
        data["rank_diff"] = abs(data["predicted_closing_rank"] - user_rank)
        return data.sort_values("rank_diff").head(20)

    return eligible.sort_values("predicted_closing_rank")

In [None]:
result = recommend_colleges_with_ML(
    user_rank=10000,
    seat_type="OPEN",
    gender="Gender-Neutral",
    quota="AI",
    year=2024,
    round_no=5
)

print(result.head(20))

                                               institute  \
80534  Atal Bihari Vajpayee Indian Institute of Infor...   
80825  Indian Institute of Information Technology, Al...   
81928                         Mizoram University, Aizawl   
81084  Indian Institute of Information Technology Luc...   
81047  Indian Institute of Information Technology Luc...   
82020  Shri Mata Vaishno Devi University, Katra, Jamm...   
81058  Indian Institute of Information Technology Luc...   
81071  Indian Institute of Information Technology Luc...   
80794  Indian Institute of Information Technology, Al...   
80559  Atal Bihari Vajpayee Indian Institute of Infor...   
80862  Indian Institute of Information Technology, De...   
81323  Indian Institute of Information Technology (II...   
80585  Atal Bihari Vajpayee Indian Institute of Infor...   
80940  Pt. Dwarka Prasad Mishra Indian Institute of I...   
80572  Atal Bihari Vajpayee Indian Institute of Infor...   
80889  Indian Institute of Information T