classification models using titanic dataset based on sklearn models

Survived - answer
Pclass (класс пассажира) - numeric 3>2>1
Sex - nominal
Age - numeric
SibSp (братья/сестры) - numeric
Parch (родители/дети) - numeric
Fare (стоимость проезда) - numeric 
Cabin - nominal
Embarked - nominal 

In [7]:
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pandas as pd
params={
    'clf__n_estimators':[75,90,100,110,125],
    'clf__max_depth':range(3,13)
}
columns=['Survived','Pclass','Sex','Age','SibSp','Parch','Fare','Cabin','Embarked']
df = pd.read_csv("../datasets/classification/titanic_train.csv",usecols=columns)
feature_cols=['Pclass','Sex','Age','SibSp','Parch','Fare','Cabin','Embarked']
test_cols=['Pclass','Sex','Age','SibSp','Parch','Fare','Cabin','Embarked']
test_df_x,test_df_y = pd.read_csv("../datasets/classification/titanic_test.csv",usecols=test_cols),pd.read_csv("../datasets/classification/titanic_gender_submission.csv")
y_test=test_df_y.drop(columns=['PassengerId'])
x_test=test_df_x.copy()
nom_col=['Sex','Cabin','Embarked']
num_col=[col for col in feature_cols if col not in nom_col]
y_train,x_train=df['Survived'],df.drop(columns=['Survived'])
num_imputer= SimpleImputer(strategy='median')
cat_imputer = SimpleImputer(strategy='constant', fill_value='missing')
num_pipeline = Pipeline([
    ('imputer', num_imputer),
    ('scaler', StandardScaler())])
cat_pipeline = Pipeline([
    ('imputer', cat_imputer),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers=[
    ('num', num_pipeline, num_col),
    ('cat', cat_pipeline, nom_col)])
pipeline = Pipeline([
    ('prep',preprocessor),
    ('clf',RandomForestClassifier())])
grid = GridSearchCV(pipeline,params,cv=5,scoring='accuracy')
grid.fit(x_train,y_train)
model = grid.best_estimator_
y_pred = model.predict(x_test)
print(f"Лучшие параметры: {grid.best_params_}\nЛучшие показатели: {grid.best_score_}")
print(f"accuracy_score: {accuracy_score(y_test,y_pred)}")
print(f"classification_report: {classification_report(y_test,y_pred)}")
print(f"confusion_matrix: {confusion_matrix(y_test,y_pred)}")
submission = pd.DataFrame({
    'PassengerId': test_df_y['PassengerId'],
    'Survived': y_pred
})
submission.to_csv('submission.csv', index=False)

Лучшие параметры: {'clf__max_depth': 12, 'clf__n_estimators': 100}
Лучшие показатели: 0.8305316678174627
accuracy_score: 0.8827751196172249
classification_report:               precision    recall  f1-score   support

           0       0.88      0.95      0.91       266
           1       0.90      0.76      0.83       152

    accuracy                           0.88       418
   macro avg       0.89      0.86      0.87       418
weighted avg       0.88      0.88      0.88       418

confusion_matrix: [[253  13]
 [ 36 116]]
