In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils.class_weight import compute_class_weight

In [25]:
df = pd.read_csv(r"C:\Users\visha\OneDrive\Desktop\project_sem_VI\CareerPath_Navigator\student_career_counseling.csv")

In [26]:
df.dropna(inplace=True)

In [27]:
df.drop_duplicates(inplace=True)

In [28]:
label_encoder = LabelEncoder()
df["Recommended_Career_Stream"] = label_encoder.fit_transform(df["Recommended_Career_Stream"])


In [29]:
X = df.drop(columns=["Student_ID", "Recommended_Career_Stream"])

In [30]:
y = df["Recommended_Career_Stream"]

In [31]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [33]:
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y), y=y)
class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}


In [34]:
model = RandomForestClassifier(n_estimators=150, class_weight=class_weight_dict, random_state=42)
model.fit(X_train, y_train)

In [35]:
y_pred = model.predict(X_test)

In [36]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

Model Accuracy: 0.40


In [37]:
def predict_career(student_data):
    """Predicts a career stream based on student scores and interests."""
    # Check if all PCMB scores are below 60 → Suggest Arts
    pcmb_scores = [student_data["Math"], student_data["Physics"], student_data["Chemistry"], student_data["Biology"]]
    if all(score < 60 for score in pcmb_scores):
        return "Arts"

    # Otherwise, use ML model prediction
    student_df = pd.DataFrame([student_data])
    student_scaled = scaler.transform(student_df)
    prediction = model.predict(student_scaled)
    return label_encoder.inverse_transform(prediction)[0]


In [55]:
student1 = {
    "Math": 80, "Physics": 70, "Chemistry": 100, "Biology": 100, "English": 85,
    "History": 75, "Computer Science": 95, "Economics": 80, "Arts": 70, "Psychology": 65,
    "Engineering": 5, "Medical": 9, "Arts": 5, "Business": 6, "Humanities": 3,
    "Science Research": 7, "Psychology": 2, "Law": 4
}


In [56]:
predicted_career = predict_career(student1)
print(f"Predicted Career (High PCMB): {predicted_career}")


Predicted Career (High PCMB): Medical


In [57]:
student2 = {
    "Math": 55, "Physics": 50, "Chemistry": 58, "Biology": 52, "English": 80,
    "History": 85, "Computer Science": 60, "Economics": 65, "Arts": 90, "Psychology": 70,
    "Engineering": 3, "Medical": 2, "Arts": 9, "Business": 5, "Humanities": 6,
    "Science Research": 4, "Psychology": 5, "Law": 3
}

In [58]:
predicted_career2 = predict_career(student2)
print(f"Predicted Career (Low PCMB): {predicted_career2}")

Predicted Career (Low PCMB): Arts


If all PCMB < 60 → Arts

---


If all PCM >80 → engineering

---


If Biology, Chemistry > 80, prioritize Medical.