In [2]:
!pip install sentencepiece
!pip install transformers
!pip install torch
!pip install rich[jupyter]

Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577/sentencepiece-0.1.95-cp37-cp37m-manylinux2014_x86_64.whl (1.2MB)
[K     |████████████████████████████████| 1.2MB 6.1MB/s 
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.95
Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/ed/d5/f4157a376b8a79489a76ce6cfe147f4f3be1e029b7144fa7b8432e8acb26/transformers-4.4.2-py3-none-any.whl (2.0MB)
[K     |████████████████████████████████| 2.0MB 6.6MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37m-manylinux2010_x86_64.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 23.9MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/08/cd/342e5

In [3]:
import os
import re
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook, tnrange
from sklearn.utils import shuffle
import pickle
import math


import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

# Importing the T5 modules from huggiazngface/transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration

# rich: for a better display on terminal
from rich.table import Column, Table
from rich import box
from rich.console import Console

# define a rich console logger
console = Console(record=True)

training_logger = Table(
    Column("Random Selection", justify = "center"),
    Column("Epoch", justify="center"),
    Column("Loss", justify="center"),
    title="Training Status",
    pad_edge=False,
    box=box.ASCII,
)

valid_loggger = Table(
    Column("Random Selection", justify = "center"),
    Column("Loss", justify = "center"),
    title="Validation Status",
    pad_edge=False,
    box=box.ASCII,
)

def save_pickle(path, obj):
  with open(path, 'wb') as fp:
    pickle.dump(obj, fp)

def load_pickle(path):
  with open(path, 'rb') as fp:
    return pickle.load(fp)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
torch.manual_seed(3007)

<torch._C.Generator at 0x7ff684971d10>

In [6]:
model = T5ForConditionalGeneration.from_pretrained('drive/MyDrive/T5MovieWikiTraining2_0/outputs-itr-3/model_files')
tokenizer = T5Tokenizer.from_pretrained('drive/MyDrive/T5MovieWikiTraining2_0/outputs-itr-3/model_files')

In [7]:
text = "generate plot for genre: horror"
input_ids = tokenizer.encode(text, return_tensors="pt")
greedyOp = model.generate(input_ids, max_length=100)
tokenizer.decode(greedyOp[0], skip_special_tokens=True)

'filming a series of shorts, shorts, and extended scenes, all starring small-time hero Jack. Jack is a solitary, solitary, and physically demanding man who is possessed by a spirit that he cannot live without. He is possessed by a spirit that he cannot live without, and is possessed by a spirit that cannot live without. Jack is a serial killer who is known to his friends as "'

In [8]:
text = "generate plot for genre: horror"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=100,
    num_beams = 5, 
    early_stopping = True
    )
tokenizer.decode(beamOp[0], skip_special_tokens=True)

'filming a series of shorts, shorts, and interlaced pieces of furniture, all of which are indistinguishable from each other.[2]'

In [9]:
text = "generate plot for genre: horror"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=200,
    num_beams = 3, 
    early_stopping = True,
    no_repeat_ngram_size = 2
    )
print(len(beamOp[0]))
tokenizer.decode(beamOp[0], skip_special_tokens=True)

72


'filming a series of shorts, two main characters are shown dead in the background, with the female lead appearing in each scene. The male lead, who appears to be in his mid-thirties, is shown to have possessed all of the characters before he was killed, while female leads appear to appear in both roles.'

In [10]:
text = "generate plot for genre: horror"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=200,
    num_beams = 3, 
    early_stopping = True,
    no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 0
    )
print(len(beamOp[0]))
tokenizer.decode(beamOp[0], skip_special_tokens=True)

41


'filming a series of murders, a group of people are murdered and the film ends with a bang on the head of a local psychiatric hospital.'

In [11]:
text = "generate plot for genre: horror"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=1000,
    num_beams = 3, 
    # early_stopping = True,
    no_repeat_ngram_size = 3,
    # do_sample = True, 
    # top_k = 0
    )
print(len(beamOp[0]))
tokenizer.decode(beamOp[0], skip_special_tokens=True)

110


"filming a series of shorts, shorts and episodic pieces, with interlaced flashbacks showing the events of the previous film, including a scene in which a woman is killed in a car accident and an attempted suicide by a man in his car. The scene then switches to a flashback, in which the woman is confronted by the man's fiancée, who is also killed in the car accident, and then relives the events as they happened."

In [12]:
text = "generate plot for genre: horror"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=200,
    # num_beams = 3, 
    # early_stopping = True,
    # no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 0,
    temperature = 0.7
    )
print(len(beamOp[0]))
tokenizer.decode(beamOp[0], skip_special_tokens=True)

200


"film editor Max Machin is reporting on his son's sexual experiences in London. Max's jealousy is heightened when he is approached by his ex-wife, who reveals that Max's pregnancy has been arranged for him. He tells Max that she has a baby and she begins to experience strange feelings for him. Max ends up in a hospital with a baby girl, who turns out to be the one who died in the accident. Max starts to see strange things about Max after he has left him. Max, now a sex addict, attempts to nurse Max to a newborn, but Max is soon attacked by a rogue student, who is rescued by Max. Max and Max get caught in a rage and are later murdered by Max, who stabs him in the thigh. Max, Max and Max escape from the scene and escape the horrors of Max's death."

In [13]:
text = "generate plot for genre: horror"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=200,
    # num_beams = 3, 
    # early_stopping = True,
    # no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 100,
    # temperature = 0.7
    )
print(len(beamOp[0]))
tokenizer.decode(beamOp[0], skip_special_tokens=True)

174


'film producer Gerard Slaughter (Colman) works under the pseudonym "Pvt. Pvt. Pliny" who has a personality different from his own. His passion is to produce movies with low budgets. His ideal movie version is a modern "house of dreams" but he prefers the cinematic one. He dreams of a singer/dancer on the radio and after recording a live band gig, he plans to perform a live concert at the Motel. His vision of the singer is palpable, not caring about the audience, but to the point of attempting to entertain audience members, including his ex-wife, Maize (Jonathan Szeta) and his teenage daughter Maya (Manion Gleason).'

In [14]:
text = "generate plot for genre: horror"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=200,
    # num_beams = 3, 
    # early_stopping = True,
    # no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 0,
    top_p = 0.90
    # temperature = 0.7
    )
print(len(beamOp[0]))
tokenizer.decode(beamOp[0], skip_special_tokens=True)

200


'film opens in the year 1942, in the period of medieval Egypt, when king Henry VIII of England arrived at the castle after the Normandy invasion. The Scottish protectorate of the castle, Majector of Achille, is determined to restore the castle to its former glory. Prof. Chan, with his ministerial assistants, assist the governor by arranging an evacuation for the town of Sardinia. Prof. Chan is on the run from Sardinia and prevents him from being shot at by the Turks. The Turks control their guardians and guards at the castle, which was guarded by a henchman Alexander Inverness, and introduces himself as a revolutionary for the Romans. Through letters written to the Governor of Sardinia, he finds out that Arya has been tortured by Greek prisoners, and her father is in the care of Count Maya of Ar'

In [15]:
text = "generate plot for genre: horror"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=1200,
    # num_beams = 3, 
    # early_stopping = True,
    # no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 100,
    top_p = 0.95,
    num_return_sequences=3
    # temperature = 0.7
    
    )
for i, sample_op in enumerate(beamOp):
  print(f'Op: {i}' + 100 * '-')
  print(len(sample_op))
  pprint(tokenizer.decode(sample_op, skip_special_tokens=True))

In [17]:
from pprint import pprint

In [19]:
text = "generate plot for genre: horror comedy"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=1200,
    # num_beams = 3, 
    # early_stopping = True,
    # no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 100,
    top_p = 0.95,
    num_return_sequences=3
    # temperature = 0.7
    
    )
for i, sample_op in enumerate(beamOp):
  print(f'Op: {i}' + 100 * '-')
  print(len(sample_op))
  pprint(tokenizer.decode(sample_op, skip_special_tokens=True))

Op: 0----------------------------------------------------------------------------------------------------
825
('the film, opens with a group of two creeps preparing to cross the deserted '
 'deserted marshes. The film follows the first of four creepy-crazy teenagers '
 'exploring a deserted marshes.')
Op: 1----------------------------------------------------------------------------------------------------
825
('set in 1898, the story follows a group of British soldiers led by Lieutenant '
 'Colonel Joseph Hepburn who, among others, are molesting young '
 'African-American soldiers. The soldiers are soon seen running around the '
 'country helping the refugees and returning home. While the group is in the '
 'process of clearing the prison, the soldiers get robbed and beaten up by the '
 'masked recruits. At the end of the first day, they are apprehended by '
 'General Arangos (P. D. Atkins) who has escaped with his men in a carjacking, '
 'and a chain containing a necklace marked "Dark

In [20]:
text = "generate plot for genre: romantic comedy"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=1200,
    # num_beams = 3, 
    # early_stopping = True,
    # no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 100,
    top_p = 0.95,
    num_return_sequences=3
    # temperature = 0.7
    
    )
for i, sample_op in enumerate(beamOp):
  print(f'Op: {i}' + 100 * '-')
  print(len(sample_op))
  pprint(tokenizer.decode(sample_op, skip_special_tokens=True))

Op: 0----------------------------------------------------------------------------------------------------
469
('the fictional characters (Adrian, Catherine, Rupert, Carstairs, and Leo) in '
 'the comic books series Paranormal: The Lost Marriages of Two Cass Boys '
 '(Forrest, Americ, and Molly), who are part of a gang of six witches to be '
 'hunted down by the Frogs. A timid and cynical Farrell (Peter Parker), known '
 'as The Lost Queen of the Lost Land, is hired as a stagehand to portray The '
 'Lost Queen of the Lost Queen in the 1920s (with Peters as the lead '
 'character) to play a role in the titular role), and he finds himself quickly '
 "drawn into the world of the Lost Queen (through Peters' conceit and his "
 're-birth and the end of The Lost Queen), whose only purpose is to obstruct '
 'its fate. But when a prince comes along, he kills the princess.')
Op: 1----------------------------------------------------------------------------------------------------
469
('film premie

In [21]:
text = "generate plot for genre: psychological action thriller"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=1200,
    # num_beams = 3, 
    # early_stopping = True,
    # no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 100,
    top_p = 0.95,
    num_return_sequences=3
    # temperature = 0.7
    
    )
for i, sample_op in enumerate(beamOp):
  print(f'Op: {i}' + 100 * '-')
  print(len(sample_op))
  pprint(tokenizer.decode(sample_op, skip_special_tokens=True))

Op: 0----------------------------------------------------------------------------------------------------
712
('film was shot in a small village in occupied Pakistan, when a minister '
 'arrived with his wife and three children and asked the Minister for advice '
 'regarding making a film in his own village. After several years, the '
 "minister was furious and decided to kill the minister's daughter. She then "
 'took a line from newspaper to report the incident to the provincial council. '
 'The film went on to feature over a period of six months, which included an '
 'extended period of observation, observation, and a short film review. It '
 "showed a surprisingly sharp and detailed description of the country's most "
 'frequent occurrences, mostly with a sudden increase in crime. An old lady, '
 'Ms. Choudhry (Kamal Haas) told her husband (Suhail Agarwal) that the '
 'incident had a historical or historical significance to this area, pointing '
 'out that the crime had been commit

In [22]:
text = "generate plot for genre: psychological action thriller director: Anurag Kashyap"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=1200,
    # num_beams = 3, 
    # early_stopping = True,
    # no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 100,
    top_p = 0.95,
    num_return_sequences=3
    # temperature = 0.7
    
    )
for i, sample_op in enumerate(beamOp):
  print(f'Op: {i}' + 100 * '-')
  print(len(sample_op))
  pprint(tokenizer.decode(sample_op, skip_special_tokens=True))

Op: 0----------------------------------------------------------------------------------------------------
836
('film opens with a kidnapping of a girl from Delhi, Pratap Choudhary. He '
 'meets Chinna, a beautiful but uninterested young lady from Chennai, who '
 'teaches students psychology. He gives a lecture on the subject to deviating '
 'into society, and has a flashback to her mother as well. This is a lesson '
 'for Pratap that the police must now do more to maintain order and control '
 'their lives. The movie starts with a young man Dharmendran(Devadasa Kote) '
 'who asks Pratap what he wants to do and it moves on. He also makes a mockery '
 'of the young woman, and appoints her to take care of her. The young man, '
 'Dharmendran is a narrator for the film, who has a super-charged character '
 'named Kalyanath Kashyap, who is very good at acting. One night he comes '
 'across Kalyanath Kashyap and realises that Kalyanath is actually a '
 'paranormally gifted person, who was sex

In [23]:
text = "generate plot for genre: psychological action thriller director: Anurag Kashyap and ethnicity: Bengali"
input_ids = tokenizer.encode(text, return_tensors="pt")
beamOp = model.generate(
    input_ids, 
    max_length=1200,
    # num_beams = 3, 
    # early_stopping = True,
    # no_repeat_ngram_size = 3,
    do_sample = True, 
    top_k = 100,
    top_p = 0.95,
    num_return_sequences=3
    # temperature = 0.7
    
    )
for i, sample_op in enumerate(beamOp):
  print(f'Op: {i}' + 100 * '-')
  print(len(sample_op))
  pprint(tokenizer.decode(sample_op, skip_special_tokens=True))

Op: 0----------------------------------------------------------------------------------------------------
1200
('film revolves around a village, with a recurring theme "There is no danger '
 'in living a life". In fact, every village has a different set of rules and '
 'rules. The Chief Minister is a concerned citizen and often the only one who '
 'wants to know what happens to the people. He and his wife are always in '
 'search of the right kind of people. He decides to talk to a friend of a '
 'local politician who is having issues with his family. His only choice is to '
 'expose the murder of a minister. His friend Vikram, who is also struggling '
 'to raise funds for his education, takes a good job and helps his friend. One '
 'day, Vikram catches a snake which attracts the attention of a snake. One day '
 'he is attacked by a group of thugs, who say that there are only three people '
 'in the city but none of them are able to find the girl in the neighbourhood. '
 'It is reveale