<a href="https://colab.research.google.com/github/ykato27/BERT-example-notebook/blob/main/Titanic_BERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Nameだけの予測
KaggleタイタニックのNameだけで予測

## 1. データ読込み

In [1]:
from google.colab import drive, files
drive.mount('/content/drive')  # drive をマウント

Mounted at /content/drive


In [2]:
import os

# 保存ディレクトリ
BASE_DIR = "/content/drive/MyDrive/Titanic"

DATA_PATH = os.path.join(BASE_DIR, "data")    # 対象データの保存ディレクトリ
MODEL_PATH = os.path.join(BASE_DIR, "model")  # モデルを保存するディレクトリ

# ディレクトリがなければ作成
os.makedirs(MODEL_PATH, exist_ok=True)

In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import time
from tqdm import tqdm
from tqdm.notebook import tqdm  # notebookの場合
from pprint import pprint

# データ読み込み
df_train = pd.read_csv(os.path.join(DATA_PATH, "train.csv"))
df_test = pd.read_csv(os.path.join(DATA_PATH, "test.csv"))
target_column = "Survived"

# データをマージ
df_test[target_column] = np.nan
df = pd.concat([df_train, df_test], ignore_index=True, sort=False)

print(df_train.shape)
print(df_test.shape)
print(df.shape)
print(df.columns)

(891, 12)
(418, 12)
(1309, 12)
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')


## 2. 前処理

In [4]:
def str_normalize(ds):
    # アルファベットと数字のみにする
    ds = ds.str.replace("[^a-zA-Z0-9]+", " ", regex=True)
    return ds

df["Name_normalize"] = str_normalize(df["Name"])

## 3. BERT

#### 3-1.import関係

In [5]:
!pip install -q transformers
!pip install -q silence_tensorflow
!pip install -q janome

[K     |████████████████████████████████| 2.8 MB 5.3 MB/s 
[K     |████████████████████████████████| 895 kB 52.0 MB/s 
[K     |████████████████████████████████| 636 kB 66.0 MB/s 
[K     |████████████████████████████████| 3.3 MB 17.5 MB/s 
[K     |████████████████████████████████| 50 kB 5.5 MB/s 
[?25h  Building wheel for silence-tensorflow (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 19.7 MB 1.1 MB/s 
[?25h

In [6]:
# tensorflow のログが多いので silence_tensorflow を入れています
from silence_tensorflow import silence_tensorflow
silence_tensorflow()

# tensorflow
import tensorflow as tf
import tensorflow.keras.layers as kl

# transformers
import transformers

# transformerのログをエラー以上のみに
from transformers import logging
logging.set_verbosity_error()

#### 3-2.学習済みモデルの選択

In [7]:
pretrained_model_name = "bert-base-uncased"

何も指定しないとtensorflowのモデルを読み込みます。(ファイルで言うと tf_model.h5)
from_py=Trueを有効にするとpytouchのモデルを読み込みます。(ファイルで言うと pytorch_model.bin)
どちらのファイルがあるかは学習済みモデルの Files and versions から確認できます。

In [8]:
tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained_model_name)
bert_model = transformers.TFAutoModel.from_pretrained(pretrained_model_name)
#bert_model = transformers.TFAutoModel.from_pretrained(pretrained_model_name, from_pt=True)  # ※(1)

print(bert_model.config)

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/536M [00:00<?, ?B/s]

BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.10.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}



In [9]:
# 適当に名前の情報を使ってみてみる
sample_name = df["Name"][0]
print(sample_name)

# Tokenizeした結果
token_words = tokenizer.tokenize(sample_name)
print(token_words)

# BERTに入力する形式に変換
encode_token = tokenizer(sample_name, padding="max_length", max_length=12, truncation=True)
pprint(encode_token)

# BERTへの入力形式をデコードした結果
print(tokenizer.decode(encode_token["input_ids"]))

Braund, Mr. Owen Harris
['braun', '##d', ',', 'mr', '.', 'owen', 'harris']
{'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
 'input_ids': [101, 21909, 2094, 1010, 2720, 1012, 7291, 5671, 102, 0, 0, 0],
 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}
[CLS] braund, mr. owen harris [SEP] [PAD] [PAD] [PAD]


#### 3-3.入力単語数の確認

In [10]:
# 最大単語数の確認
max_len = []
# 1文づつ処理
for sent in df["Name_normalize"]:
    # Tokenizeで分割
    token_words = tokenizer.tokenize(sent)
    # 文章数を取得してリストへ格納
    max_len.append(len(token_words))
# 最大の値を確認
print('最大単語数: ', max(max_len))
print('上記の最大単語数にSpecial token（[CLS], [SEP]）の+2をした値が最大単語数')

# 単語数を設定
sequence_max_length = max(max_len) + 2
if sequence_max_length > 512:
    sequence_max_length = 512

最大単語数:  20
上記の最大単語数にSpecial token（[CLS], [SEP]）の+2をした値が最大単語数


#### 3-4. BERTモデルの作成

In [11]:
def build_model(learning_rate, is_print=False):

    # BERTモデルをロード
    bert_model = transformers.TFAutoModel.from_pretrained(pretrained_model_name)
    #bert_model = transformers.TFAutoModel.from_pretrained(pretrained_model_name, from_pt=True)  # pytorchの場合

    # tfへの入力テンソルを作成
    # 入力はsequence_max_lengthサイズを3つ(['input_ids', 'token_type_ids', 'attention_mask'])
    inputs = [
        kl.Input(shape=(sequence_max_length,), dtype=tf.int32, name=name)
        for name in tokenizer.model_input_names
    ]
    if is_print:
        pprint(inputs)

    # BERTモデルの出力を得る
    # 出力は TFBaseModelOutputWithPooling (https://huggingface.co/transformers/main_classes/output.html#tfbasemodeloutput)
    # x[0](last_hidden_​​state) : 最後のレイヤーの出力
    # x[1](pooler_output)     : 分類トークンの状態
    x = bert_model(inputs)

    # BERT出力の0番目がクラス分類で使う出力
    x1 = x[0][:, 0, :]

    # 分類用の出力層を用意
    # 出力層の構成はTFBertForSequenceClassificationを参考
    x1 = kl.Dropout(0.1)(x1)
    x1 = kl.Dense(1, activation='sigmoid', kernel_initializer=transformers.modeling_tf_utils.get_initializer(0.02))(x1)
    model_train = tf.keras.Model(inputs=inputs, outputs=x1)

    # オリジナルの出力値を特徴量としたいので予測専用のモデルも別途作っておく
    model_pred = tf.keras.Model(inputs=inputs, outputs=[x1, x[0][:, 0, :]])

    # optimizerは AdamW を使用
    optimizer = transformers.AdamWeightDecay(learning_rate=learning_rate)
    model_train.compile(optimizer, loss="binary_crossentropy", metrics=["acc"])
    #model_train.compile(optimizer, loss="categorical_crossentropy", metrics=["acc"])  # softmaxの場合
    if is_print:
        print(model_train.summary())

    return model_train, model_pred

# 試しに実行
build_model(0.1, is_print=True)

[<KerasTensor: shape=(None, 22) dtype=int32 (created by layer 'input_ids')>,
 <KerasTensor: shape=(None, 22) dtype=int32 (created by layer 'token_type_ids')>,
 <KerasTensor: shape=(None, 22) dtype=int32 (created by layer 'attention_mask')>]
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 22)]         0                                            
__________________________________________________________________________________________________
token_type_ids (InputLayer)     [(None, 22)]         0                                            
__________________________________________________________________________________________________
attention_mask (InputLayer)     [(None, 22)]         0                                            
___________________________________________________

(<keras.engine.functional.Functional at 0x7fbd36c7b3d0>,
 <keras.engine.functional.Functional at 0x7fbd36bec810>)

#### 3-5. BERTの学習(ファインチューニング)

In [12]:
import tensorflow as tf
import os

runtime_type = ""

try:
    if "COLAB_TPU_ADDR" in os.environ:
        resolver = tf.distribute.cluster_resolver.TPUClusterResolver('grpc://' + os.environ['COLAB_TPU_ADDR'])
    else:
        resolver = tf.distribute.cluster_resolver.TPUClusterResolver()

    #--- TPU
    print('Running on TPU ', resolver.cluster_spec().as_dict()['worker'])
    runtime_type = "TPU"

    # This is the TPU initialization code that has to be at the beginning.
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    tpu_strategy = tf.distribute.TPUStrategy(resolver)

    tf.keras.backend.clear_session()
    print("All devices: ", tf.config.list_logical_devices('TPU'))

except ValueError:

    if tf.test.gpu_device_name() != "":
        #--- GPU
        runtime_type = "GPU"
    else:
        runtime_type = "CPU"

print("runtime_type: ", runtime_type)

INFO:absl:Entering into master device scope: /job:worker/replica:0/task:0/device:CPU:0


Running on TPU  ['10.127.224.202:8470']
All devices:  [LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:0', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:1', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:2', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:3', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:4', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:5', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:6', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:7', device_type='TPU')]
runtime_type:  TPU


#### 3.6 学習コード

In [13]:
import sklearn.metrics
def train_bert(
        df_train,       # 学習用のデータ
        text_column,    # 対象のカラム名
        target_column,  # 目的変数のカラム名
        df_valid=None,  # 検証用データ
        df_pred_list=[],       # 予測用データ
        model_file_prefix="",  # 保存時のファイル名識別子
        epochs=20,
        batch_size=8,
    ):

    #--------------------
    # 学習率
    #--------------------
    lr0 = 0.000005
    learning_rate = [
        0.00001,
        0.00002,
    ]
    if epochs-len(learning_rate) > 0:
        lr_list = np.linspace(0.00002, 0, epochs-len(learning_rate))
        learning_rate.extend(lr_list)
    def lr_scheduler(epoch):
        return learning_rate[epoch]
    lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)


    #--------------------
    # file
    #--------------------
    model_path = "{}_{}.h5".format(
        model_file_prefix, 
        pretrained_model_name, 
    )


    #--------------------
    # モデル
    #--------------------
    if runtime_type == "TPU":
        with tpu_strategy.scope():
            model_train, model_pred = build_model(lr0)
    else:
        model_train, model_pred = build_model(lr0)


    #-----------------------------
    # モデル入出力用のデータ作成関数
    #-----------------------------
    def _build_x_from_df(df):
        # Series -> list
        x = df[text_column].tolist()

        # tokenize
        x = tokenizer(x, padding="max_length", max_length=sequence_max_length, 
            truncation=True, return_tensors="tf")

        # BatchEncoding -> dict
        return dict(x)

    def _build_y_from_df(df):
        return df[target_column]
        #return tf.keras.utils.to_categorical(df[target_column], num_classes=2)  # softmax用


    #-------------------
    # valid用のdatasetを作成
    #-------------------
    if df_valid is not None:
        valid_x = _build_x_from_df(df_valid)
        valid_y = _build_y_from_df(df_valid)
        valid_dataset = (
            tf.data.Dataset.from_tensor_slices((valid_x, valid_y))
            .batch(batch_size)
            .cache()
        )
    else:
        valid_dataset = None


    #-------------------
    # 学習
    #-------------------
    if os.path.isfile(model_path):
        # 学習済みモデルをload
        print(model_path)
        model_train.load_weights(model_path)
    else:
        train_x = _build_x_from_df(df_train)
        train_y = _build_y_from_df(df_train)
        train_dataset = (
            tf.data.Dataset.from_tensor_slices((train_x, train_y))
            .shuffle(len(train_x), seed=1234)
            .batch(batch_size)
            .prefetch(tf.data.experimental.AUTOTUNE)  # GPUが計算している間にBatchデータをCPU側で用意しておく機能
        )

        model_train.fit(train_dataset, epochs=epochs, validation_data=valid_dataset, callbacks=[lr_callback])
        model_train.save_weights(model_path)

    #-------------------
    # 評価
    #-------------------
    if df_valid is not None:
        print("valid")
        pred_y = model_train.predict(valid_dataset, verbose=1)

        # 正解率
        pred_y_label = np.where(pred_y < 0.5, 0, 1)
        metric = sklearn.metrics.accuracy_score(valid_y, pred_y_label)
        print("acc", metric)
    else:
        metric = 0

    #-------------------
    # 予測
    #-------------------
    print("pred")
    pred_y_list = []
    emb_list = []
    for df_pred in df_pred_list:

        pred_x = _build_x_from_df(df_pred)
        pred_dataset = (
            tf.data.Dataset.from_tensor_slices((pred_x,))
            .batch(batch_size)
            .cache()
        )

        # 予測
        pred_output = model_pred.predict(pred_dataset, verbose=1)

        # pred
        pred_y = pred_output[0].reshape((-1,))  # (-1,1) -> (-1)
        #pred_y = pred_y[0][:,1]  # softmax用
        pred_y_list.append(pred_y)

        # emb
        emb_list.append(pred_output[1])

    return metric, pred_y_list, emb_list

#--- 実行例
metric, pred_y_list, emb_list = train_bert(
    df_train=df[df["Survived"].notnull()][:10],  # 学習データ
    text_column="Name_normalize",
    target_column="Survived",
    df_valid=df[df["Survived"].notnull()][:10],  # 検証データ(仮で学習データと同じ)
    df_pred_list=[df[df["Survived"].isnull()][:10]],  # 予測データ
    epochs=2,  # 試しなので少な目
)
print(metric)
print(pred_y_list[0].shape)
print(emb_list[0].shape)

Epoch 1/2


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]
INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]




INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]


Epoch 2/2
valid


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]


acc 0.7
pred


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>]


0.7
(10,)
(10, 768)


## 4.BERTモデルから予測結果と特徴量を取得

In [14]:
import sklearn.model_selection

def train_cv(df, text_column, target_column, n_splits):

    df_train = df[df[target_column].notnull()]
    df_test = df[df[target_column].isnull()]

    df_train_idx = df_train.index

    # 結果用
    df_pred = pd.DataFrame(df.index, columns=["index"]).set_index("index")
    df_emb = pd.DataFrame(df.index, columns=["index"]).set_index("index")
    df_emb_pred = None
    metric_list = []

    #----------------
    # cross validation
    #----------------
    kf = sklearn.model_selection.StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=1234)
    for i, (train_idx, test_idx) in enumerate(kf.split(df_train, df_train[target_column])):
        df_train_sub = df_train.iloc[train_idx]
        df_test_sub = df_train.iloc[test_idx]

        df_pred_list = [df_test_sub]
        df_pred_list.append(df_test)

        model_file_prefix = "cv_{}".format(i)

        # train
        metric, pred_y_list, emb_list = train_bert(
            df_train=df_train_sub, 
            text_column=text_column,
            target_column=target_column, 
            df_valid=df_test_sub,
            df_pred_list=df_pred_list,
            model_file_prefix=model_file_prefix,
        )
        metric_list.append(metric)

        # 予測結果を保存
        result_name = "result_{}".format(i)
        df_pred.loc[df_train_idx[test_idx], result_name] = pred_y_list[0]
        df_pred.loc[df_test.index, result_name] = pred_y_list[1]

        #---------
        a = pd.DataFrame(emb_list[0], index=df_train_idx[test_idx])
        df_emb = df_emb.combine_first(a)

        if df_emb_pred is None:
            df_emb_pred = pd.DataFrame(emb_list[1], index=df_test.index)
        else:
            df_emb_pred += emb_list[1]


    pred_y = df_pred.mean(axis=1)

    df_emb_pred /= n_splits
    df_emb = df_emb.combine_first(df_emb_pred)

    return np.mean(metric_list), pred_y.values, df_emb

#--- 結果と特徴量を取得
metric, pred_y, df_emb = train_cv(df, "Name_normalize", "Survived", n_splits=3)
print(metric)
print(pred_y.shape)
print(df_emb.shape)

Epoch 1/20


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]
INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]




INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
valid


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]


acc 0.7946127946127947
pred


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>]


Epoch 1/20


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]
INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]




INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
valid


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]


acc 0.8013468013468014
pred


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>]


Epoch 1/20


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]
INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]




INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
valid


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None,) dtype=float64>]


acc 0.7912457912457912
pred


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 22) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 22) dtype=int32>]


0.7957351290684623
(1309,)
(1309, 768)


#### 4-1.予測結果を出力

In [15]:
df["BERT"] = pred_y
df["BERT_label"] = np.where(pred_y < 0.5, 0, 1)

# 学習データの正解率
_df = df[df["Survived"].notnull()]
print(sklearn.metrics.accuracy_score(_df["Survived"], _df["BERT_label"]))

# 予測結果をcsvで出力
_df = df[df["Survived"].isnull()]
df_submit = pd.DataFrame()
df_submit["PassengerId"] = _df["PassengerId"]
df_submit["Survived"] = _df["BERT_label"]
df_submit.to_csv('submit1.csv', header=True, index=False)

0.7957351290684624
