# Generacja za pomocą RNN przy użyciu GPU lub CPU

In [None]:
from pathlib import Path
dataset_path = Path('data/rnn_generator'); print(dataset_path)
tmp_path = dataset_path / 'tmp/'

In [None]:
import torch

USE_GPU = torch.cuda.is_available(); 
# USE_GPU = False; 

print(f'USE_GPU={USE_GPU}')

def to_gpu(x, *args, **kwargs):
    return x.cuda(*args, **kwargs) if USE_GPU else x

In [None]:
ALLTOKS, MODEL = ['all_tokens', 'model']

fn_pan_tadeusz = {ALLTOKS: 'all_tokens.pan_tadeusz.p', MODEL: 'pan_tadeusz.h300.l2.e3000.gpu.torch'}

fn_dict = fn_pan_tadeusz

In [None]:
tmp_path

In [None]:
ls -lah $tmp_path

## Ładowanie listy all_characters

In [None]:
import pickle

all_tokens = pickle.load( open( tmp_path / fn_dict[ALLTOKS], 'rb' ) ); print(all_tokens[:10])
n_tokens = len(all_tokens)

## Kod

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return Variable(to_gpu(torch.zeros(self.n_layers, 1, self.hidden_size)))

In [None]:
# Turn string into list of longs
def tok_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_tokens.index(string[c])
    return Variable(to_gpu(tensor))

In [None]:
fn_corpus_syl = dataset_path/'pan_tadeusz.syl1.txt'

import string
import random
import re

file = open(fn_corpus_syl).read()
file_len = len(file)
print('file_len =', file_len)

# taken from fastai/text.py
import re, string
# remove +,- chars from punctuation set to keep sylables e.g.'--PO++' intact
punctuation=re.sub('[\+-]', '', string.punctuation)
re_tok = re.compile(f'([{punctuation}“”¨«»®´·º½¾¿¡§£₤‘’])')
def tokenize(s): return re_tok.sub(r' \1 ', s).split()

file_tok = tokenize(file); len(file_tok), file_tok[:8]
file_tok_len = len(file_tok)

In [None]:
a_token_list = file_tok[20:30]; print(a_token_list)
print(tok_tensor(a_token_list))

In [None]:
chunk_len = 400

def random_chunk():
    start_index = random.randint(0, file_tok_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file_tok[start_index:end_index]

In [None]:
def syl2str(a_list, delim='/'): return ' '.join(a_list).replace('++ --', delim)
print(syl2str(random_chunk()))

In [None]:
def evaluate(prime_str=[all_tokens[1]], predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = tok_tensor(prime_str)
    predicted = list(prime_str)  # need a copy of the list

    # Use priming token list to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0].item()
        
        # Add predicted token to the list and use as next input
        predicted_token = all_tokens[top_i]
        predicted.append(predicted_token)
        inp = tok_tensor([predicted_token])

    return predicted

In [None]:
n_epochs = 300 # 3000
print_every = 100
plot_every = 1
hidden_size = 100
n_layers = 1
lr = 0.005

decoder = RNN(n_tokens, hidden_size, n_tokens, n_layers)
print(decoder, flush=True)

## Wczytywanie modelu

In [None]:
model_path_cpu = tmp_path / fn_dict[MODEL]
decoder = torch.load(model_path_cpu)
print(decoder, flush=True)

In [None]:
decoder.gru.flatten_parameters()

## Generowanie tekstu

In [None]:
# priming strings
# pan_tadeusz: kon, bę, Tad, Tadeusz

In [None]:
prime_tokl = file_tok[13:18]
print(syl2str(evaluate(prime_tokl, 200, temperature=0.8), delim='/'))

In [None]:
print(syl2str(evaluate(prime_tokl, 200, temperature=0.2), delim='/'))

In [None]:
print(syl2str(evaluate(prime_tokl, 200, temperature=1.4), delim='/'))