# 現実写真のみ、文字なし、固有名詞全部なし、キャプション、ほかの画像の大喜利

In [1]:
import os
import json
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers, metrics
from torch.utils.data import Dataset, DataLoader

In [3]:
EXPERIENCE_NUMBER = "001"

USE_UNREAL_IMAGE = False
USE_WORD_IMAGE = False
USE_UNIQUE_NOUN_BOKE = False
USE_CAPTION = True
USE_MISS_BOKE = True

EPOCH = 25
BATCH_SIZE = 256

RESULT_DIR = f"../../results/Boke_Judge/{EXPERIENCE_NUMBER}/"
if not os.path.exists("../../results/Boke_Judge/"):
    os.mkdir("../../results/Boke_Judge/")
if not os.path.exists(RESULT_DIR):
    os.mkdir(RESULT_DIR)

DATA_DIR = "../../datas/boke_data_assemble/"
CLIP_IMAGE_FEATURE_DIR = "../../datas/encoded/clip_image_feature/"
CLIP_SENTENCE_FEATURE_DIR = "../../datas/encoded/clip_sentence_feature/"
LUKE_SENTENCE_FEATURE_DIR = "../../datas/encoded/luke_sentence_feature/"

# データセットの作成(初回のみ実行)

In [None]:
boke_datas = list()
caption_datas = list()

max_num_boke = 0
for JP in tqdm(os.listdir(DATA_DIR)):
    N = int(JP.split(".")[0])

    with open(f"{DATA_DIR}{JP}", "r") as f:
        a = json.load(f)

    image_information = a["image_infomation"]
    is_photographic_probability = image_information["is_photographic_probability"]
    ja_caption = image_information["ja_caption"]
    ocr = image_information["ocr"]

    # 現実写真以外を除去
    if not USE_UNREAL_IMAGE:
        if is_photographic_probability < 0.8: continue
        
    # 文字のある画像を除去
    if not USE_WORD_IMAGE:
        if len(ocr) != 0: continue

    bokes = a["bokes"]

    max_num_boke = max(max_num_boke, len(a["bokes"]))
    for i, B in enumerate(bokes):

        # 固有名詞を含む大喜利を除去
        if not USE_UNIQUE_NOUN_BOKE:
            if len(B["unique_nouns"]) != 0: continue

        boke_datas.append({
            "boke_number": int(f"1{N:07}{i:05}"),
            "image_number": int(f"3{N:07}")
        })

    caption_datas.append({
        "caption_number": int(f"2{N:07}"),
        "image_number": int(f"3{N:07}")
    })

len(boke_datas), len(caption_datas)

In [None]:
train_boke_datas, test_boke_datas = train_test_split(boke_datas, test_size = 0.01)
train_caption_datas, test_caption_datas = train_test_split(caption_datas, test_size = 0.01)

with open(f"{RESULT_DIR}train_boke_datas.json", "w") as f:
    json.dump(train_boke_datas, f)
with open(f"{RESULT_DIR}train_caption_datas.json", "w") as f:
    json.dump(train_caption_datas, f)

with open(f"{RESULT_DIR}test_boke_datas.json", "w") as f:
    json.dump(test_boke_datas, f)
with open(f"{RESULT_DIR}test_caption_datas.json", "w") as f:
    json.dump(test_caption_datas, f)

# モデルの学習

In [4]:
with open(f"{RESULT_DIR}train_boke_datas.json", "r") as f:
    train_boke_datas = json.load(f)
with open(f"{RESULT_DIR}train_caption_datas.json", "r") as f:
    train_caption_datas = json.load(f)

with open(f"{RESULT_DIR}test_boke_datas.json", "r") as f:
    test_boke_datas = json.load(f)
with open(f"{RESULT_DIR}test_caption_datas.json", "r") as f:
    test_caption_datas = json.load(f)

len(train_boke_datas), len(train_caption_datas), len(test_boke_datas), len(test_caption_datas)

(2161870, 228680, 21838, 2310)

In [None]:
def make_dataset(boke_datas, caption_datas):

    def make_dataset_with_directory(file_numbers, directory):
        def make_feature_dataset(file_numbers):
            dataset = tf.data.Dataset.from_tensor_slices((file_numbers))
            dataset = dataset.map(load_data_wrapper, 
                                num_parallel_calls = tf.data.experimental.AUTOTUNE)
            dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
            return dataset

        def load_data_wrapper(file_number):
            return tf.py_function(func = load_data, inp = [file_number], 
                                  Tout = tf.float32)

        def load_data(file_number):
            # file_number = file_number.numpy().decode("utf-8")
            feature_path = f"{directory}{file_number}.npy"
            return np.load(feature_path)
        
        return make_feature_dataset(file_numbers)
    
    image_file_numbers = list()
    sentence_file_numbers = list()
    teacher_signals = list()

    for D in boke_datas:
        image_file_numbers.append(D["image_number"])
        sentence_file_numbers.append(D["boke_number"])
        teacher_signals.append(1)
    
    if USE_CAPTION:
        for D in caption_datas:
            image_file_numbers.append(D["image_number"])
            sentence_file_numbers.append(D["caption_number"])
            teacher_signals.append(0)
    
    if USE_MISS_BOKE:
        miss_boke_datas = list()
        tmp_idx = np.random.randint(0, len(boke_datas), size = (len(boke_datas), ))
        for i, idx in tqdm(enumerate(tmp_idx)):
            tmp_boke_number = boke_datas[idx]["boke_number"]
            while str(tmp_boke_number)[:7] == str(boke_datas[i]["boke_number"])[:7]:
                idx = np.random.randint(0, len(boke_datas))
                tmp_boke_number = boke_datas[idx]["boke_number"]

            miss_boke_datas.append({
                "boke_number": tmp_boke_number,
                "image_number": boke_datas[i]["image_number"]
            })
        
        for D in miss_boke_datas:
            image_file_numbers.append(D["image_number"])
            sentence_file_numbers.append(D["boke_number"])
            teacher_signals.append(0)
    
    print(f"num data: {len(teacher_signals)}")
    
    tmp = list(zip(image_file_numbers, sentence_file_numbers, teacher_signals))
    np.random.shuffle(tmp)
    image_file_numbers, sentence_file_numbers, teacher_signals = zip(*tmp)
    image_file_numbers = tf.constant(image_file_numbers, dtype = tf.int64)
    sentence_file_numbers = tf.constant(sentence_file_numbers, dtype = tf.int64)
    teacher_signals = tf.constant(teacher_signals, dtype = tf.int32)

    clip_image_feature_dataset = make_dataset_with_directory(image_file_numbers, CLIP_IMAGE_FEATURE_DIR)
    clip_sentence_feature_dataset = make_dataset_with_directory(sentence_file_numbers, CLIP_SENTENCE_FEATURE_DIR)
    luke_sentence_feature_dataset = make_dataset_with_directory(sentence_file_numbers, LUKE_SENTENCE_FEATURE_DIR)
    teacher_signal_dataset = tf.data.Dataset.from_tensor_slices(teacher_signals)

    dataset = tf.data.Dataset.zip( ((clip_image_feature_dataset, clip_sentence_feature_dataset, luke_sentence_feature_dataset), teacher_signal_dataset) )
    return dataset

train_dataset = make_dataset(train_boke_datas, train_caption_datas)
test_dataset = make_dataset(test_boke_datas, test_caption_datas)

(CIF, CSF, LSF), TS = next(iter(train_dataset.batch(32)))
CIF.shape, CSF.shape, LSF.shape, TS.shape

In [21]:
class LoadNpyDataset(Dataset):
    def __init__(self, file_numbers, file_directory):
        self.file_numbers = file_numbers
        self.file_directory = file_directory

    def __len__(self):
        return len(self.file_numbers)

    def __getitem__(self, idx):
        return np.load(f"{self.file_directory}{self.file_numbers[idx]}.npy")

def custom_collate_fn(batch):
    return tf.constant(np.array(batch))

image_file_numbers = list()
sentence_file_numbers = list()
teacher_signals = list()

for D in train_boke_datas:
    image_file_numbers.append(D["image_number"])
    sentence_file_numbers.append(D["boke_number"])
    teacher_signals.append(1)

dataset = LoadNpyDataset(image_file_numbers, CLIP_IMAGE_FEATURE_DIR)
dataloader = DataLoader(dataset, batch_size = 256, num_workers = 8, 
                        collate_fn = custom_collate_fn)

In [None]:
a = next(iter(dataloader))

a.shape

In [16]:
a[0].shape

(512,)

In [None]:
def build_model():
    # clip image feature
    input1 = layers.Input(shape = CIF.shape[1:])
    # clip sentence feature
    input2 = layers.Input(shape = CSF.shape[1:])
    # luke sentence feature
    input3 = layers.Input(shape = LSF.shape[1:])

    x = layers.Concatenate()([input1, input2, input3])
    x = layers.Dense(units = 1024)(x)
    x = layers.LeakyReLU()(x)
    x = layers.Dense(units = 1024)(x)
    x = layers.LeakyReLU()(x)
    output = layers.Dense(units = 1, activation = "sigmoid")(x)

    model = models.Model([input1, input2, input3], output)
    model.compile(loss = losses.BinaryCrossentropy(),
                  optimizer = optimizers.AdamW(),
                  metrics = [metrics.BinaryAccuracy()])
    return model

model = build_model()
model.summary()


In [None]:
model = build_model()

train_loss_history = list()
train_accuracy_history = list()
test_loss_history = list()
test_accuracy_history = list()

for epoch in range(EPOCH):

    # train
    train_dataset = make_dataset(train_boke_datas, train_caption_datas)
    train_loss_obj = metrics.Mean()
    train_accuracy_obj = metrics.Mean()

    pb = tqdm(train_dataset.batch(BATCH_SIZE))
    for (CIF, CSF, LSF), TS in pb:
        loss, accuracy = model.train_on_batch([CIF, CSF, LSF], TS)
        train_loss_obj(loss)
        train_accuracy_obj(accuracy)
        pb.set_postfix({"train_loss": float(train_loss_obj.result()),
                        "train_accuracy": float(train_accuracy_obj.result())})

    # test
    test_dataset = make_dataset(test_boke_datas, test_caption_datas)
    test_loss_obj = metrics.Mean()
    test_accuracy_obj = metrics.Mean()

    pb = tqdm(test_dataset.batch(BATCH_SIZE))
    for (CIF, CSF, LSF), TS in pb:
        loss, accuracy = model.evaluate([CIF, CSF, LSF], TS,
                                        verbose = False, batch_size = BATCH_SIZE)
        train_loss_obj(loss)
        train_accuracy_obj(accuracy)
        pb.set_postfix({"train_loss": float(train_loss_obj.result()),
                        "train_accuracy": float(train_accuracy_obj.result())})
    
    train_loss = float(train_loss_obj.result())
    train_accuracy = float(train_accuracy_obj.result())
    test_loss = float(test_loss_obj.result())
    test_accuracy = float(test_accuracy_obj.result())

    print(f"epoch: {epoch}, train loss: {train_loss}, train accuracy: {train_accuracy}, test loss: {test_loss}, test accuracy: {test_accuracy}")

    train_loss_history.append(train_loss)
    train_accuracy_history.append(train_accuracy)
    test_loss_history.append(test_loss)
    test_accuracy_history.append(test_accuracy)

with open(f"{RESULT_DIR}history.json", "w") as f:
    json.dump({
        "train_loss": train_loss_history,
        "train_accuracy": train_accuracy_history,
        "test_loss": test_loss_history,
        "test_accuracy": test_accuracy_history
    }, f)