In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# 特徴量エンジニアリング
def feature_engineering(df):
    # Titleの抽出
    df['Title'] = df['Name'].str.extract(r' ([A-Za-z]+)\.', expand=False)
    df['Title'] = df['Title'].replace(['Lady', 'Countess', 'Capt', 'Col',
                                       'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
    df['Title'] = df['Title'].replace('Mlle', 'Miss')
    df['Title'] = df['Title'].replace('Ms', 'Miss')
    df['Title'] = df['Title'].replace('Mme', 'Mrs')

    # FamilySizeの計算
    df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
    df['IsAlone'] = (df['FamilySize'] == 1).astype(int)

    # Cabinを使ったDeckの抽出
    df['Deck'] = df['Cabin'].apply(lambda x: x[0] if pd.notnull(x) else 'U')

    # 不要な列の削除
    df = df.drop(['Ticket', 'Cabin', 'Name'], axis=1)

    # カテゴリカル変数のエンコード
    label = LabelEncoder()
    df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
    df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])
    df['Embarked'] = label.fit_transform(df['Embarked'])

    df['Title'] = label.fit_transform(df['Title'])
    df['Deck'] = label.fit_transform(df['Deck'])

    # 欠損値の補完
    df['Age'] = df['Age'].fillna(df['Age'].median())
    df['Fare'] = df['Fare'].fillna(df['Fare'].median())

    return df

# データの読み込みと前処理
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

train_df = feature_engineering(train_df)
test_df = feature_engineering(test_df)

# 特徴量とターゲットの分割
X = train_df.drop(['Survived', 'PassengerId'], axis=1)
y = train_df['Survived']
X_test = test_df.drop(['PassengerId'], axis=1)

# モデルの学習
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# 予測とCSV出力
predictions = model.predict(X_test)
submission = pd.DataFrame({'PassengerId': test_df['PassengerId'], 'Survived': predictions})
submission.to_csv('submission.csv', index=False)