In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

df = pd.read_csv('./data/train.csv')
X = df.drop('Survived', axis=1)
y = df['Survived']

numeric_features = X.select_dtypes(include=['int64','float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encoder', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1))
])
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

svc_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC())
])
svc_pipeline.fit(X_train, y_train)

y_pred = svc_pipeline.predict(X_test)
print('Classification Report:')
print(classification_report(y_test, y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.53      0.19      0.28       105
           1       0.40      0.76      0.52        74

    accuracy                           0.42       179
   macro avg       0.46      0.47      0.40       179
weighted avg       0.47      0.42      0.38       179

Confusion Matrix:
[[20 85]
 [18 56]]


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import pandas as pd
df = pd.read_csv('./data/train.csv')
X = df.drop('Survived', axis=1)
y = df['Survived']

num_cols = X.select_dtypes(include=['int64','float64']).columns
cat_cols = X.select_dtypes(include=['object']).columns

num_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='median')),
    ('scale', StandardScaler())
])
cat_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])
preprocessor = ColumnTransformer([
    ('nums', num_pipe, num_cols),
    ('cats', cat_pipe, cat_cols)
])

nb_pipeline = Pipeline([
    ('prep', preprocessor),
    ('nb', GaussianNB())
])

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)
nb_pipeline.fit(X_tr, y_tr)

y_pred = nb_pipeline.predict(X_te)
print(classification_report(y_te, y_pred))


              precision    recall  f1-score   support

           0       0.79      0.21      0.33       105
           1       0.45      0.92      0.60        74

    accuracy                           0.50       179
   macro avg       0.62      0.56      0.47       179
weighted avg       0.65      0.50      0.44       179



In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd

df = pd.read_csv('./data/train.csv')
X = df.drop('Survived', axis=1)
y = df['Survived']

num_cols = X.select_dtypes(include=['int64','float64']).columns
cat_cols = X.select_dtypes(include=['object']).columns

num_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='median')),
    ('scale', StandardScaler())
])
cat_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

preprocessor = ColumnTransformer([
    ('nums', num_pipe, num_cols),
    ('cats', cat_pipe, cat_cols)
])

lda_pipeline = Pipeline([
    ('prep', preprocessor),
    ('lda', LinearDiscriminantAnalysis())
])

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)
lda_pipeline.fit(X_tr, y_tr)

y_pred = lda_pipeline.predict(X_te)
print(classification_report(y_te, y_pred))


              precision    recall  f1-score   support

           0       0.69      0.94      0.80       105
           1       0.83      0.41      0.55        74

    accuracy                           0.72       179
   macro avg       0.76      0.67      0.67       179
weighted avg       0.75      0.72      0.69       179

