# char-rnn-generation benchmark

In [1]:
import os
import psutil

def print_memsize():
  process = psutil.Process(os.getpid())
  print(f'{process.memory_info().rss / 1024**3:.5} GB')
  

import torch

USE_GPU = torch.cuda.is_available(); 
# USE_GPU = False; 

print(f'USE_GPU={USE_GPU}')

def to_gpu(x, *args, **kwargs):
    return x.cuda(*args, **kwargs) if USE_GPU else x
  
  
# fn = 'data/tiny-shakespeare.txt'
fn = 'data/mickiewicz.txt'

# import unidecode
import string
import random
import re

# file = unidecode.unidecode(open(fn).read())
file = open(fn).read()
file_len = len(file)
print('file_len =', file_len)

# ascii only
all_characters = string.printable
n_characters = len(all_characters)


# all chars found in file
all_characters = list(set(file));
n_characters = len(all_characters)


chunk_len = 200

def random_chunk():
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

print(random_chunk())
print_memsize()


import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return Variable(to_gpu(torch.zeros(self.n_layers, 1, self.hidden_size)))
      
      
      
# Turn string into list of longs
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return Variable(to_gpu(tensor))

# print(char_tensor('ala ma kota'))      

def random_training_set():    
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target
  
  
def evaluate(prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted character to string and use as next input
        predicted_char = all_characters[top_i]
        predicted += predicted_char
        inp = char_tensor(predicted_char)

    return predicted
  
  
  
import time, math

def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)
  
  
  
def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss = 0

    for c in tqdm(range(chunk_len)):
        output, hidden = decoder(inp[c], hidden)
        loss += criterion(output, target[c])

    loss.cuda()
    loss.backward()
    decoder_optimizer.step()

    return loss.data[0] / chunk_len
  
  
  
from tqdm import tqdm

USE_GPU=True
file_len = 655594
zwiadek, gdy się broni od komarów! -
On trzepie swoje, - no więc - dosyć już tych swarówi
Znam twoję moc i chcę się tobie wyspowiadać,
Będę ci o przeszłości i przyszłości gadać. -
A wiesz ty, co o tobi
0.19277 GB


# Training

In [5]:
USE_GPU = True
n_epochs = 2  # 2000
print_every = 1  # 100
plot_every = 10
hidden_size = 7000 # 100, 1000
n_layers = 1 # 1, 4
lr = 0.005

print_memsize()

decoder = RNN(n_characters, hidden_size, n_characters, n_layers)
if USE_GPU:
  decoder.cuda()
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
if USE_GPU:
  criterion.cuda()

start = time.time()
all_losses = []
loss_avg = 0

print_memsize()

# with torch.autograd.profiler.profile() as prof:

for epoch in tqdm(range(1, n_epochs + 1)):
    loss = train(*random_training_set())       
    loss_avg += loss

    if epoch % print_every == 0:
        print('\n[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
#         print(evaluate('Wh', 200), '\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0
    
    print_memsize()

# print(prof)

3.5013 GB


  0%|          | 0/2 [00:00<?, ?it/s]
  0%|          | 0/200 [00:00<?, ?it/s][A
 29%|██▉       | 58/200 [00:00<00:00, 561.21it/s][A

3.5013 GB



 51%|█████     | 102/200 [00:00<00:00, 499.07it/s][A
 66%|██████▌   | 132/200 [00:00<00:00, 431.87it/s][A
 80%|███████▉  | 159/200 [00:00<00:00, 284.64it/s][A
 90%|█████████ | 181/200 [00:00<00:00, 238.97it/s][A
100%|██████████| 200/200 [00:00<00:00, 214.53it/s][A
 50%|█████     | 1/2 [00:17<00:17, 17.28s/it]
  0%|          | 0/200 [00:00<?, ?it/s][A
 30%|██▉       | 59/200 [00:00<00:00, 582.66it/s][A


[0m 17s (1 50%) 5.1355]
3.5013 GB



 54%|█████▎    | 107/200 [00:00<00:00, 528.91it/s][A
 68%|██████▊   | 135/200 [00:00<00:00, 444.60it/s][A
 81%|████████  | 162/200 [00:00<00:00, 292.14it/s][A
 92%|█████████▎| 185/200 [00:00<00:00, 240.53it/s][A
100%|██████████| 2/2 [00:34<00:00, 17.26s/it]


[0m 34s (2 100%) 11.2898]
3.5013 GB
3.5013 GB





In [6]:
print_memsize()

3.5013 GB


In [0]:
whos 

In [7]:
print(decoder)

RNN(
  (encoder): Embedding(172, 7000)
  (gru): GRU(7000, 7000)
  (decoder): Linear(in_features=7000, out_features=172)
)


# tmp