# **Mount Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!nvidia-smi

# **Package installation**

In [None]:

!pip install torch
!pip install sklearn
!pip install transformers
!pip install rich[jupyter]
!pip install pytorch_lightning

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import sys  
sys.path.insert(0, '/content/drive/MyDrive/Colab Notebooks')
import simplet5
from simplet5 import SimpleT5



# **Load Training Data**

In [None]:
from torch import cuda

device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
def load_story_dataset(random_seed = 1):

  file_path = '/content/drive/MyDrive/ROCstory/data/ROCStories_winter2017 - ROCStories_winter2017.csv'
  df = pd.read_csv(file_path, encoding = 'ISO-8859-1')
  df = df.sample(100, random_state=1)
  combine_cols = ['sentence1', 'sentence2', 'sentence3', 'sentence4']
  df['text'] = df[combine_cols].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
  df = df [['text', 'sentence5']]

  X_train, X_test, y_train, y_test = train_test_split(df['text'].tolist(), df['sentence5'].tolist(), 
                                                      shuffle = True, test_size = 0.2, random_state = random_seed
                                                      )
  
  train_data = pd.DataFrame({'source_text': X_train, 'target_text': y_train})
  test_data = pd.DataFrame({'source_text': X_test, 'target_text': y_test})

  return train_data, test_data

train_df, test_df = load_story_dataset()


# **Build Model and Training**

In [None]:
model = SimpleT5()
model.from_pretrained(model_type = 't5', model_name ='t5-small')

model.train(train_df = train_df,
            eval_df = test_df,
            source_max_token_len = 256,
            target_max_token_len = 64,
            batch_size = 8,
            max_epochs = 9,
            outputdir = "/content/drive/MyDrive/ROCstory/output",
            use_gpu = True,
            early_stopping_patience_epochs = 0,
            precision = 32
            )



In [None]:
model_path = '/content/drive/MyDrive/ROCstory/output/simplet5-epoch-4-train-loss-2.6849'
model.load_model("t5", model_path, use_gpu= True)