In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
#model1

In [3]:

df = pd.read_csv('reduced_data.csv')

# Features and target
X = df.drop(columns=['NAME OF STUDENT', 'ROLL NO','DEPARTMENT','GAME','WINNERS OF 2023','GENDER'])
y = df['WINNERS OF 2023']  # We use winner_2023 as a proxy to train the model

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
models = {
    'Logistic Regression': LogisticRegression(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Support Vector Machine': SVC(random_state=42, probability=True),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42)
}

# Train and evaluate models
for name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]
    
   # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='binary')
    recall = recall_score(y_test, y_pred, average='binary')
    f1 = f1_score(y_test, y_pred, average='binary')
    roc_auc = roc_auc_score(y_test, y_prob)
    print('Model:',model)
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'ROC-AUC Score: {roc_auc:.4f}')
    print('--------------------------')

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model: LogisticRegression(random_state=42)
Accuracy: 0.9439
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
ROC-AUC Score: 0.6108
--------------------------
Model: RandomForestClassifier(random_state=42)
Accuracy: 0.9346
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
ROC-AUC Score: 0.8531
--------------------------
Model: SVC(probability=True, random_state=42)
Accuracy: 0.9439
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
ROC-AUC Score: 0.4453
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model: GradientBoostingClassifier(random_state=42)
Accuracy: 0.9439
Precision: 0.5000
Recall: 0.0833
F1 Score: 0.1429
ROC-AUC Score: 0.8298
--------------------------


In [5]:


# Create and train a Gradient Boosting classifier
clf = GradientBoostingClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on the test set with Gradient Boosting: {:.2f}%".format(accuracy * 100))



actual_values = y_test  # Replace 'Actual_Target_Column' with the actual column name
predicted_values = y_pred  # Replace 'Predicted_Target_Column' with the predicted column name

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(actual_values, predicted_values)
print(f"Mean Absolute Error (MAE): {mae}")

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(actual_values, predicted_values)
print(f"Mean Squared Error (MSE): {mse}")

Accuracy on the test set with Gradient Boosting: 94.39%
Mean Absolute Error (MAE): 0.056074766355140186
Mean Squared Error (MSE): 0.056074766355140186


In [7]:
predictions_df = pd.DataFrame({
    'Actual': y_test,
    'Predicted': y_pred,
    'Probability': y_prob
})

print(predictions_df)

     Actual  Predicted  Probability
708       0          0     0.012504
215       0          0     0.103912
882       0          0     0.011316
88        0          1     0.758707
842       0          0     0.160470
..      ...        ...          ...
451       0          0     0.002838
602       1          0     0.010384
650       0          0     0.030763
582       0          0     0.003219
277       1          0     0.246025

[214 rows x 3 columns]


In [None]:
#model2

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report




# Encode categorical variables
label_encoder = LabelEncoder()
df['GAME'] = label_encoder.fit_transform(df['GAME'])
df['DEPARTMENT'] = label_encoder.fit_transform(df['DEPARTMENT'])
df['GENDER'] = label_encoder.fit_transform(df['GENDER'])
df['PART OF NSO OR NOT'] = df['PART OF NSO OR NOT'].map({'Yes': 1, 'No': 0})

# Aggregating the Data: Find the department with the most winners per game
df_winners = df[df['WINNERS OF 2023'] == 1]  # Only consider students who won
df_dept_winners = df_winners.groupby(['GAME', 'DEPARTMENT']).size().reset_index(name='WIN_COUNT')

# Find the department with max wins per game
df_dept_winners = df_dept_winners.loc[df_dept_winners.groupby('GAME')['WIN_COUNT'].idxmax()]

# Rename for clarity
df_dept_winners.rename(columns={'DEPARTMENT': 'WINNING_DEPARTMENT_2023'}, inplace=True)

# Merge with main dataset
df = df.merge(df_dept_winners[['GAME', 'WINNING_DEPARTMENT_2023']], on='GAME', how='left')

# Shift department winners to create a prediction target
df['WINNING_DEPARTMENT_2024'] = df['WINNING_DEPARTMENT_2023'].shift(-1)  # Move winners forward

# Drop NaN rows after shifting
df = df.dropna(subset=['WINNING_DEPARTMENT_2024'])

# Define features and target
features = ['GAME', 'GENDER', 'PARTICIPATION IN 2022', 'PARTICIPATION IN 2023', 'PART OF NSO OR NOT', 'BATCH']
target = 'WINNING_DEPARTMENT_2024'  # Now predicting 2024

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)

# Train the Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate Performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Predict department winners for 2024 per sport
future_participants = df[features]  # Using 2023 data to predict 2024 winners
df['PREDICTED_WINNING_DEPARTMENT_2024'] = model.predict(future_participants)


Accuracy: 0.6818181818181818
              precision    recall  f1-score   support

         0.0       0.50      0.33      0.40         3
         1.0       0.60      0.75      0.67         8
         2.0       0.80      0.73      0.76        11

    accuracy                           0.68        22
   macro avg       0.63      0.60      0.61        22
weighted avg       0.69      0.68      0.68        22



In [27]:
df['GAME'] = game_encoder.fit_transform(df['GAME'])
df['WINNING_DEPARTMENT_2023'] = dept_encoder.fit_transform(df['WINNING_DEPARTMENT_2023'])

In [30]:
game_dict={}
game_dict[0]='Badminton'
game_dict[1]='Basketball'
game_dict[2]='Football'
game_dict[3]='Cricket'
game_dict[4]='Volleyball'
game_dict[5]='Table Tennis'
game_dict[6]='Tennis'
game_dict[7]='Carroms'
game_dict[8]='Kabaddi'
game_dict[9]='Chess'

dept_dict={}
dept_dict[0]='CSE'
dept_dict[1]='ECE'
dept_dict[2]='MECH'

In [36]:
df['PREDICTED_WINNING_DEPARTMENT_2024'] = df['PREDICTED_WINNING_DEPARTMENT_2024'].astype(int)



# Select only unique game-wise predictions for display
predictions = df.groupby('GAME')['PREDICTED_WINNING_DEPARTMENT_2024'].agg(lambda x: x.mode()[0]).reset_index()

# Print predictions in a readable format
print("\n🏆 Predicted Winning Departments for 2024 🏆\n")
for index, row in predictions.iterrows():
    print(f"🎯 Game: {game_dict[row['GAME']]} → 🏅 Winning Department: {dept_dict[row['PREDICTED_WINNING_DEPARTMENT_2024']]}")


🏆 Predicted Winning Departments for 2024 🏆

🎯 Game: Badminton → 🏅 Winning Department: ECE
🎯 Game: Basketball → 🏅 Winning Department: ECE
🎯 Game: Volleyball → 🏅 Winning Department: MECH
🎯 Game: Table Tennis → 🏅 Winning Department: MECH
🎯 Game: Tennis → 🏅 Winning Department: CSE
🎯 Game: Carroms → 🏅 Winning Department: CSE
🎯 Game: Kabaddi → 🏅 Winning Department: ECE
🎯 Game: Chess → 🏅 Winning Department: CSE
