In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from mlxtend.plotting import plot_decision_regions

In [None]:
!pip install opendatasets

In [3]:
import opendatasets as od

In [4]:
# Download dataset
od.download("https://www.kaggle.com/datasets/benroshan/factors-affecting-campus-placement/discussion?sort=hotness")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: varshapandian
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/benroshan/factors-affecting-campus-placement
Downloading factors-affecting-campus-placement.zip to ./factors-affecting-campus-placement


100%|██████████| 4.51k/4.51k [00:00<00:00, 10.1MB/s]







In [5]:
df = pd.read_csv("factors-affecting-campus-placement/Placement_Data_Full_Class.csv")

In [6]:
# Data preprocessing
df = df.drop(columns=["sl_no", "salary"])  # Removing unnecessary columns

In [7]:
# Encode categorical variables
label_encoders = {}
categorical_columns = ["gender", "ssc_b", "hsc_b", "hsc_s", "degree_t", "workex", "specialisation", "status"]
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

In [8]:
# Define input (X) and output (y)
X = df.drop(columns=["status"])  # All features except target
y = df["status"]  # Target variable (1 = Placed, 0 = Not Placed)

In [9]:
# Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Standardizing numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
# Train SVM model
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True)
svm_model.fit(X_train, y_train)

In [12]:
# Predictions
y_pred = svm_model.predict(X_test)

In [13]:
# Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("SVM Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

SVM Accuracy: 0.813953488372093
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.50      0.60        12
           1       0.83      0.94      0.88        31

    accuracy                           0.81        43
   macro avg       0.79      0.72      0.74        43
weighted avg       0.81      0.81      0.80        43



In [14]:
def predict_placement():
    print("\nEnter student details:")
    user_data = {}

    for col in X.columns:
        if col in categorical_columns:
            print(f"Options for {col}: {list(label_encoders[col].classes_)}")
            user_input = input(f"Enter {col}: ")
            user_data[col] = label_encoders[col].transform([user_input])[0]  # Convert to numerical
        else:
            user_input = float(input(f"Enter {col}: "))  # Numerical input
            user_data[col] = user_input

    # Convert user input to DataFrame
    user_df = pd.DataFrame([user_data])
    user_df = scaler.transform(user_df)  # Standardize user input

    # Make prediction
    prediction = svm_model.predict(user_df)[0]
    probability = svm_model.predict_proba(user_df)[0][prediction] * 100

    # Display result
    result = "Placed" if prediction == 1 else "Not Placed"
    print("\nPrediction:", result)
    print("Confidence:", f"{probability:.2f}%")

# Run manual input function
predict_placement()

print("Class distribution:\n", y.value_counts())  # Check data imbalance
print("Train Accuracy:", svm_model.score(X_train, y_train))
print("Test Accuracy:", svm_model.score(X_test, y_test))


Enter student details:
Options for gender: ['F', 'M']
Enter gender: F
Enter ssc_p: 1
Options for ssc_b: ['Central', 'Others']
Enter ssc_b: Central
Enter hsc_p: 1
Options for hsc_b: ['Central', 'Others']
Enter hsc_b: Others
Options for hsc_s: ['Arts', 'Commerce', 'Science']
Enter hsc_s: Arts
Enter degree_p: 1
Options for degree_t: ['Comm&Mgmt', 'Others', 'Sci&Tech']
Enter degree_t: Others
Options for workex: ['No', 'Yes']
Enter workex: No
Enter etest_p: 1
Options for specialisation: ['Mkt&Fin', 'Mkt&HR']
Enter specialisation: Mkt&Fin
Enter mba_p: 1

Prediction: Placed
Confidence: 51.16%
Class distribution:
 status
1    148
0     67
Name: count, dtype: int64
Train Accuracy: 0.9476744186046512
Test Accuracy: 0.813953488372093
