In [18]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load the CSV files
train_file_path = 'train.csv'  # Replace with the path to your train.csv file
test_file_path = 'test.csv'  # Replace with the path to your test.csv file
train_data = pd.read_csv(train_file_path)
test_data = pd.read_csv(test_file_path)

# Split the training data into features and target variable
X = train_data.drop(columns=['Survived', 'Name', 'Ticket', 'Cabin'])
y = train_data['Survived']

# Split the training data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Process the test data
X_test = test_data.drop(columns=['Name', 'Ticket', 'Cabin'])

# Preprocessing for numerical data
numerical_features = ['Age', 'Fare', 'SibSp', 'Parch']
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Preprocessing for categorical data
categorical_features = ['Pclass', 'Sex', 'Embarked']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

# Define models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Multi-Layer Perceptron': MLPClassifier(max_iter=1000)
}

# Create a function to build and evaluate the model pipeline
def build_and_evaluate_model(model_name, model):
    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', model)])
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_valid)
    accuracy = accuracy_score(y_valid, y_pred)
    return model_name, accuracy

# Get accuracies from all models
model_accuracies = {name: build_and_evaluate_model(name, model)[1] for name, model in models.items()}

# Create a DataFrame for the accuracies
accuracy_df = pd.DataFrame.from_dict(model_accuracies, orient='index', columns=['Accuracy']).reset_index()
accuracy_df.rename(columns={'index': 'Model'}, inplace=True)

# # Plot the accuracies
# plt.figure(figsize=(10, 6))
# sns.barplot(data=accuracy_df, x='Accuracy', y='Model')
# plt.title('Model Accuracies')
# plt.xlabel('Accuracy')
# plt.ylabel('Model')
# plt.show()

# Display the DataFrame
accuracy_df


Unnamed: 0,Model,Accuracy
0,Logistic Regression,0.798883
1,Decision Tree,0.787709
2,Random Forest,0.821229
3,Support Vector Machine,0.815642
4,K-Nearest Neighbors,0.815642
5,Multi-Layer Perceptron,0.810056


{'Logistic Regression': ('Logistic Regression',
  array([0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
         1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
         1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
         1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
         1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
         0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
         1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
         0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
         1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
         0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
         0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
         0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0