In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [13]:
df = pd.read_csv('college_admission_prediction.csv') #Reading sample dataset to train the modal.
df.head()
df.columns

Index(['Year', '10th Marks', '12th Marks', '12th Division', 'AIEEE Rank',
       'College'],
      dtype='object')

In [14]:
#Checking null values in dataset.
df.isnull().sum()
df.info

<bound method DataFrame.info of       Year  10th Marks  12th Marks  12th Division  AIEEE Rank  \
0     2019          90          89              3          98   
1     2015          95          92              2         100   
2     2018          91          80              6         260   
3     2017          88          85              2         222   
4     2016          89          84              1         600   
...    ...         ...         ...            ...         ...   
999   2016          86          87              6        1533   
1000  2018          80          89              6         854   
1001  2018          86          87              8        1232   
1002  2019          78          85             11        1730   
1003  2017          85          81              5        1757   

               College  
0           IIT Bombay  
1            IIT delhi  
2           IIT kanpur  
3        IIT kharagpur  
4         IIT guwahati  
...                ...  
999         

In [15]:
df.shape

(1004, 6)

In [17]:
X = df.drop(columns=["College"])  # Features (independent variables)
y = df["College"]  # Target (dependent variable)

In [18]:
#Convert categorical target variable into numerical labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_encoded = le.fit_transform(y)  #Convert College names to numbers

In [19]:
from sklearn.model_selection import train_test_split

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [20]:
from sklearn.ensemble import RandomForestClassifier

#Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

#Make predictions
y_pred = model.predict(X_test)


In [21]:
from sklearn.metrics import accuracy_score

#Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 0.93


In [23]:
import numpy as np

# Function to take user input
def get_user_input():
    year = int(input("Enter Year of Admission: "))
    tenth_marks = float(input("Enter 10th Marks (out of 100): "))
    twelfth_marks = float(input("Enter 12th Marks (out of 100): "))
    twelfth_div = int(input("Enter 12th Division (1 for First, 2 for Second, etc.): "))
    aieee_rank = int(input("Enter AIEEE Rank: "))

    return [[year, tenth_marks, twelfth_marks, twelfth_div, aieee_rank]]

# Load saved model
import joblib
model = joblib.load("admission_model.pkl")
le = joblib.load("label_encoder.pkl")

# Get user input
user_data = get_user_input()

# Predict the college
predicted_college_index = model.predict(user_data)
predicted_college = le.inverse_transform(predicted_college_index)

print(f"\nPredicted College: {predicted_college[0]}")


Enter Year of Admission:  2019
Enter 10th Marks (out of 100):  80
Enter 12th Marks (out of 100):  71
Enter 12th Division (1 for First, 2 for Second, etc.):  4
Enter AIEEE Rank:  2000



Predicted College: BITS pilani


