In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = '/content/collegePlace.csv'  # Replace with your file path
data = pd.read_csv(file_path)

# Encoding categorical features
le_gender = LabelEncoder()
data['Gender'] = le_gender.fit_transform(data['Gender'])

le_stream = LabelEncoder()
data['Stream'] = le_stream.fit_transform(data['Stream'])

# Selecting features and target variable
X = data[['Age', 'Gender', 'Stream', 'Internships', 'CGPA', 'Hostel', 'HistoryOfBacklogs']]
y = data['PlacedOrNot']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression Model
logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train, y_train)

# Predictions and Accuracy for Logistic Regression
y_pred_logistic = logistic_model.predict(X_test)
accuracy_logistic = accuracy_score(y_test, y_pred_logistic)
print(f"Logistic Regression Model Accuracy: {accuracy_logistic * 100:.2f}%")

# Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions and Accuracy for Random Forest
y_pred_rf = rf_model.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Model Accuracy: {accuracy_rf * 100:.2f}%")

# Support Vector Classifier (SVC) Model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svc_model = SVC(kernel='linear', random_state=42)
svc_model.fit(X_train_scaled, y_train)

# Predictions and Accuracy for SVC
y_pred_svc = svc_model.predict(X_test_scaled)
accuracy_svc = accuracy_score(y_test, y_pred_svc)
print(f"SVC Model Accuracy: {accuracy_svc * 100:.2f}%")

# Example data for prediction using the Random Forest model
# Example data format: Age, Gender (0 or 1), Stream (encoded), Internships, CGPA, Hostel (0 or 1), HistoryOfBacklogs (0 or 1)
example_data = pd.DataFrame({
    'Age': [21],
    'Gender': le_gender.transform(['Male']),  # Change to 'Female' if needed
    'Stream': le_stream.transform(['Computer Science']),  # Change to any valid stream as per the dataset
    'Internships': [1],
    'CGPA': [8],
    'Hostel': [1],
    'HistoryOfBacklogs': [0]
})

# Predicting using the Random Forest model
example_prediction = rf_model.predict(example_data)
if example_prediction[0] == 1:
    print("The student is likely to be placed.")
else:
    print("The student is not likely to be placed.")

# Example where a student is not likely to be placed
example_data_not_placed = pd.DataFrame({
    'Age': [22],
    'Gender': le_gender.transform(['Female']),  # Encoding Gender
    'Stream': le_stream.transform(['Mechanical']),  # Encoding Stream
    'Internships': [0],
    'CGPA': [5],
    'Hostel': [0],  # Not in Hostel
    'HistoryOfBacklogs': [1]  # History of Backlogs
})

# Making a prediction for this new example
example_prediction_not_placed = rf_model.predict(example_data_not_placed)

# Interpreting the result
if example_prediction_not_placed[0] == 1:
    print("The student is likely to be placed.")
else:
    print("The student is not likely to be placed.")



Logistic Regression Model Accuracy: 72.22%
Random Forest Model Accuracy: 87.37%
SVC Model Accuracy: 73.23%
The student is likely to be placed.
The student is not likely to be placed.
