In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

# Load the dataset
df = pd.read_csv(r"C:\Users\Minahil Gillani\Downloads\AI proj\Video_Game_Information.csv")

# Drop rows with missing values
df = df.dropna()

# Encode categorical variables
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

# Split into features (X) and target (y)
X = df.drop(['Platform'], axis=1)  # Exclude 'Platform' from features
y = df['Platform']  # Target variable 'Platform'

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (if necessary)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Define classification models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree Classifier': DecisionTreeClassifier(),
    'Random Forest Classifier': RandomForestClassifier(),
    'SVM Classifier': SVC(),
    'KNN Classifier': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB()
}

# Evaluate models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    results[name] = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1}

# Identify the best model based on accuracy
best_model = max(results, key=lambda x: results[x]['Accuracy'])
best_score = results[best_model]['Accuracy']
# Print results
print("Model Evaluation Results:")
for name, scores in results.items():
    print(f"{name}:")
    print(f"  Accuracy: {scores['Accuracy']:.4f}")
    print(f"  Precision: {scores['Precision']:.4f}")
    print(f"  Recall: {scores['Recall']:.4f}")
    print(f"  F1 Score: {scores['F1 Score']:.4f}")
    print()

print(f"Best Model: {best_model} with Accuracy of {best_score:.4f}")





  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model Evaluation Results:
Logistic Regression:
  Accuracy: 0.7200
  Precision: 0.5217
  Recall: 0.7200
  F1 Score: 0.6039

Decision Tree Classifier:
  Accuracy: 0.5200
  Precision: 0.4911
  Recall: 0.5200
  F1 Score: 0.5051

Random Forest Classifier:
  Accuracy: 0.6400
  Precision: 0.4533
  Recall: 0.6400
  F1 Score: 0.5307

SVM Classifier:
  Accuracy: 0.6800
  Precision: 0.4624
  Recall: 0.6800
  F1 Score: 0.5505

KNN Classifier:
  Accuracy: 0.6000
  Precision: 0.4435
  Recall: 0.6000
  F1 Score: 0.5100

Naive Bayes:
  Accuracy: 0.3600
  Precision: 0.5610
  Recall: 0.3600
  F1 Score: 0.4120

Best Model: Logistic Regression with Accuracy of 0.7200


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
