# Init

In [None]:
from chatbot.transformer import Transformer
from chatbot.sequential import Sequent
from chatbot.rnn import Recurrent
import pandas as pd

results = []

# Training

## ConvAI 2017

In [None]:
path = 'data/conv-ai-2017'

convai_tf = Transformer(num_epoch=100)
convai_tf_history = convai_tf.fit(path=path)
convai_tf.save_to_folder(path='trained_models/transformer/conv-ai-2017')

In [None]:
convai_seq = Sequent(num_epoch=100)
convai_seq_history = convai_seq.fit(path=path)
convai_seq.save_to_folder(path='trained_models/dnn/conv-ai-2017')

In [None]:
convai_rnn = Recurrent(num_epoch=100)
convai_rnn_history = convai_rnn.fit(path=path)
convai_rnn.save_to_folder(path='trained_models/rnn/conv-ai-2017')

## Cornell Movies Dialogue Corpus

In [None]:
path = 'data/cornell-movies-dialogue-corpus'

cmdc_tf = Transformer(num_epoch=150)
cmdc_tf_history = cmdc_tf.fit(path=path)
cmdc_tf.save_to_folder(path='trained_models/transformer/cornell-movies-dialogue-corpus')

In [None]:
cmdc_seq = Sequent(num_epoch=150)
cmdc_seq_history = cmdc_seq.fit(path=path)
cmdc_seq.save_to_folder(path='trained_models/dnn/cornell-movies-dialogue-corpus')

In [None]:
cmdc_rnn = Recurrent(num_epoch=150)
cmdc_rnn_history = cmdc_rnn.fit(path=path)
cmdc_rnn.save_to_folder(path='trained_models/rnn/cornell-movies-dialogue-corpus')

## Reddit conversations

In [None]:
path = 'data/r-conversations'

rc_tf = Transformer(num_epoch=150)
rc_tf_history = rc_tf.fit(path=path)
rc_tf.save_to_folder(path='trained_models/transformer/r-conversations')

In [None]:
rc_seq = Sequent(num_epoch=150)
rc_seq_history = rc_seq.fit(path=path)
rc_seq.save_to_folder(path='trained_models/dnn/r-conversations')

In [None]:
rc_rnn = Recurrent(num_epoch=150)
rc_rnn_history = rc_rnn.fit(path=path)
rc_rnn.save_to_folder(path='trained_models/rnn/r-conversations')

## Yandex Toloka

In [None]:
path = 'data/yandex.toloka'

yt_tf = Transformer(lang='ru', num_epoch=200)
yt_tf_history = yt_tf.fit(path=path)
yt_tf.save_to_folder(path='trained_models/transformer/yandex.toloka')

In [None]:
yt_seq = Sequent(lang='ru', num_epoch=200)
yt_seq_history = yt_seq.fit(path=path)
yt_seq.save_to_folder(path='trained_models/dnn/yandex.toloka')

In [None]:
yt_rnn = Recurrent(lang='ru', num_epoch=200)
yt_rnn_history = yt_rnn.fit(path=path)
yt_rnn.save_to_folder(path='trained_models/rnn/yandex.toloka')

# Learning Summary
Printed text models summary after training

## ConvAI 2017

In [None]:
convai_tf.summary()
convai_seq.summary()
convai_rnn.summary()

## Cornell Movies Dialogue Corpus

In [None]:
cmdc_tf.summary()
cmdc_seq.summary()
cmdc_rnn.summary()

## Reddit conversations

In [None]:
rc_tf.summary()
rc_seq.summary()
rc_rnn.summary()

## Yandex Toloka

In [None]:
yt_tf.summary()
yt_seq.summary()
yt_rnn.summary()

# Plots
This sections contains model-to-model comparison plotting by each metric

## Function

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns
results = [convai_tf_history, cmdc_tf_history, yt_tf_history, rc_tf_history,
            convai_seq_history, cmdc_seq_history, yt_seq_history, rc_seq_history,
            convai_rnn_history, cmdc_rnn_history, yt_rnn_history, rc_rnn_history]


def plot_metric(data: list[pd.DataFrame], mark:str='accuracy', ep_mark:str='epoch'):
    with sns.color_palette('tab10'):
        #define grid of plots
        fig, axs = plt.subplots(nrows= 3 , ncols= 4 , sharex= True , sharey= True)
        fig.set_size_inches((25, 15))
        plt.gcf().autofmt_xdate()
        #add title
        fig.suptitle(f'{mark.title()} comparison plot', size=20)
        # axs.set_yticks(np.round(np.linspace(0, 10,5), 2))
        
        # # s-func
        # z = np.zeros(len(data[0][ep_mark]))
        # z[::2] = 1
        #add data to plots
        axs[0, 0].plot(data[0][ep_mark], data[0][mark], color='#6699d3', mfc='#346aa6', marker='o', mec='#b9b9b9', markersize = 4)
        axs[0, 0].set(title=f'max:{round(max(data[0][mark].to_list()), 2)}, average:{round(np.mean(data[0][mark].to_list()), 2)}')
        axs[0, 0].grid(axis='both')
        axs[0, 0].set_ylabel('Transformer model', size='14', color='#346aa6')

        axs[0, 1].plot(data[1][ep_mark], data[1][mark], color='#6699d3', mfc='#346aa6', marker='o', mec='#b9b9b9', markersize = 4)
        axs[0, 1].set(title=f'max:{round(max(data[1][mark].to_list()), 2)}, average:{round(np.mean(data[1][mark].to_list()), 2)}')
        axs[0, 1].grid(axis='both')

        axs[0, 2].plot(data[2][ep_mark], data[2][mark], color='#6699d3', mfc='#346aa6', marker='o', mec='#b9b9b9', markersize = 4)
        axs[0, 2].set(title=f'max:{round(max(data[2][mark].to_list()), 2)}, average:{round(np.mean(data[2][mark].to_list()), 2)}')
        axs[0, 2].grid(axis='both')

        axs[0, 3].plot(data[3][ep_mark], data[3][mark], color='#6699d3', mfc='#346aa6', marker='o', mec='#b9b9b9', markersize = 4)
        axs[0, 3].set(title=f'max:{round(max(data[3][mark].to_list()), 2)}, average:{round(np.mean(data[3][mark].to_list()), 2)}')
        axs[0, 3].grid(axis='both')

        axs[1, 0].plot(data[4][ep_mark], data[4][mark], color='#5dd3b0', mfc='#2aa882', marker='o', mec='#b9b9b9', markersize = 4)
        axs[1, 0].set(title=f'max:{round(max(data[4][mark].to_list()), 2)}, average:{round(np.mean(data[4][mark].to_list()), 2)}')
        axs[1, 0].grid(axis='both')
        axs[1, 0].set_ylabel('DNN (Sequential model)', size='14', color='#2aa882')

        axs[1, 1].plot(data[5][ep_mark], data[5][mark], color='#5dd3b0', mfc='#2aa882', marker='o', mec='#b9b9b9', markersize = 4)
        axs[1, 1].set(title=f'max:{round(max(data[5][mark].to_list()), 2)}, average:{round(np.mean(data[5][mark].to_list()), 2)}')
        axs[1, 1].grid(axis='both')

        axs[1, 2].plot(data[6][ep_mark], data[6][mark], color='#5dd3b0', mfc='#2aa882', marker='o', mec='#b9b9b9', markersize = 4)
        axs[1, 2].set(title=f'max:{round(max(data[6][mark].to_list()), 2)}, average:{round(np.mean(data[6][mark].to_list()), 2)}')
        axs[1, 2].grid(axis='both')

        axs[1, 3].plot(data[7][ep_mark], data[7][mark], color='#5dd3b0', mfc='#2aa882', marker='o', mec='#b9b9b9', markersize = 4)
        axs[1, 3].set(title=f'max:{round(max(data[7][mark].to_list()), 2)}, average:{round(np.mean(data[7][mark].to_list()), 2)}')
        axs[1, 3].grid(axis='both')

        axs[2, 0].plot(data[8][ep_mark], data[8][mark], color='#ffa270', mfc='#ff8240', marker='o', mec='#b9b9b9', markersize = 4)
        axs[2, 0].set(title=f'max:{round(max(data[8][mark].to_list()), 2)}, average:{round(np.mean(data[8][mark].to_list()), 2)}')
        axs[2, 0].grid(axis='both')
        axs[2, 0].set_xlabel('ConvAI 2017', size='14')
        axs[2, 0].set_ylabel('RNN+seq2seq', size='14', color='#ff8240')

        axs[2, 1].plot(data[9][ep_mark], data[9][mark], color='#ffa270', mfc='#ff8240', marker='o', mec='#b9b9b9', markersize = 4)
        axs[2, 1].set(title=f'max:{round(max(data[9][mark].to_list()), 2)}, average:{round(np.mean(data[9][mark].to_list()), 2)}')
        axs[2, 1].grid(axis='both')
        axs[2, 1].set_xlabel('Cornell Movied Dialogue Corpus', size='14')

        axs[2, 2].plot(data[10][ep_mark], data[10][mark], color='#ffa270', mfc='#ff8240', marker='o', mec='#b9b9b9', markersize = 4)
        axs[2, 2].set(title=f'max:{round(max(data[10][mark].to_list()), 2)}, average:{round(np.mean(data[10][mark].to_list()), 2)}')
        axs[2, 2].grid(axis='both')
        axs[2, 2].set_xlabel('Yandex.Toloka', size='14')

        axs[2, 3].plot(data[11][ep_mark], data[11][mark], color='#ffa270', mfc='#ff8240', marker='o', mec='#b9b9b9', markersize = 4)
        axs[2, 3].set(title=f'max:{round(max(data[11][mark].to_list()), 2)}, average:{round(np.mean(data[11][mark].to_list()), 2)}')
        axs[2, 3].grid(axis='both')
        axs[2, 3].set_xlabel('Reddit conversations', size='14')

## Loss

In [None]:
plot_metric(results, 'loss', 'epoch')

## Accuracy

In [None]:
plot_metric(results, 'accuracy', 'epoch')

## MRR

In [None]:
plot_metric(results, 'mrr', 'epoch')

## F1

In [None]:
plot_metric(results, 'f1', 'epoch')