In [1]:
#!/usr/bin/env python
"""LSTM language model on text8.

Default hyperparameters achieve ~78.4 NLL at epoch 50, ~76.1423 NLL at
epoch 200; ~13s/epoch on Titan X (Pascal).

Samples after 200 epochs:
```
e the classmaker was cut apart rome the charts sometimes known a
hemical place baining examples of equipment accepted manner clas
uetean meeting sought to exist as this waiting an excerpt for of
erally enjoyed a film writer of unto one two volunteer humphrey
y captured by the saughton river goodness where stones were nota
```
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import numpy as np
import os
import string
import tensorflow as tf

from datetime import datetime
from edward.models import Categorical
from edward.util import Progbar
from observations import text8

In [2]:
data_dir = "/tmp/data"
log_dir = "/tmp/log"
n_epoch = 200
batch_size = 128
hidden_size = 512
timesteps = 64
lr = 5e-3

timestamp = datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")
hyperparam_str = '_'.join([
    var + '_' + str(eval(var)).replace('.', '_')
    for var in ['batch_size', 'hidden_size', 'timesteps', 'lr']])
log_dir = os.path.join(log_dir, timestamp + '_' + hyperparam_str)
if not os.path.exists(log_dir):
  os.makedirs(log_dir)

In [3]:
def lstm_cell(x, h, c, name=None, reuse=False):
  """LSTM returning hidden state and content cell at a specific timestep."""
  nin = x.shape[-1].value
  nout = h.shape[-1].value
  with tf.variable_scope(name, default_name="lstm",
                         values=[x, h, c], reuse=reuse):
    wx = tf.get_variable("kernel/input", [nin, nout * 4],
                         dtype=tf.float32,
                         initializer=tf.orthogonal_initializer(1.0))
    wh = tf.get_variable("kernel/hidden", [nout, nout * 4],
                         dtype=tf.float32,
                         initializer=tf.orthogonal_initializer(1.0))
    b = tf.get_variable("bias", [nout * 4],
                        dtype=tf.float32,
                        initializer=tf.constant_initializer(0.0))

  z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
  i, f, o, u = tf.split(z, 4, axis=1)
  i = tf.sigmoid(i)
  f = tf.sigmoid(f + 1.0)
  o = tf.sigmoid(o)
  u = tf.tanh(u)
  c = f * c + i * u
  h = o * tf.tanh(c)
  return h, c


def generator(input, batch_size, timesteps, encoder):
  """Generate batch with respect to input (a list). Encode its
  strings to integers, returning an array of shape [batch_size, timesteps].
  """
  while True:
    imb = np.random.randint(0, len(input) - timesteps, batch_size)
    encoded = np.asarray(
        [[encoder[c] for c in input[i:(i + timesteps)]] for i in imb],
        dtype=np.int32)
    yield encoded


def language_model(input):
  """Form p(x[0], ..., x[timesteps - 1]),

  \prod_{t=0}^{timesteps - 1} p(x[t] | x[:t]),

  To calculate the probability, we call log_prob on
  x = [x[0], ..., x[timesteps - 1]] given
  `input` = [0, x[0], ..., x[timesteps - 2]].

  We implement this separately from the generative model so the
  forward pass, e.g., embedding/dense layers, can be parallelized.

  [batch_size, timesteps] -> [batch_size, timesteps]
  """
  x = tf.one_hot(input, depth=vocab_size, dtype=tf.float32)
  h = tf.fill(tf.stack([tf.shape(x)[0], hidden_size]), 0.0)
  c = tf.fill(tf.stack([tf.shape(x)[0], hidden_size]), 0.0)
  hs = []
  reuse = None
  for t in range(timesteps):
    if t > 0:
      reuse = True
    xt = x[:, t, :]
    h, c = lstm_cell(xt, h, c, name="lstm", reuse=reuse)
    hs.append(h)

  h = tf.stack(hs, 1)
  logits = tf.layers.dense(h, vocab_size, name="dense")
  output = Categorical(logits=logits)
  return output


def language_model_gen(batch_size):
  """Generate x ~ prod p(x_t | x_{<t}). Output [batch_size, timesteps].
  """
  # Initialize data input randomly.
  x = tf.random_uniform([batch_size], 0, vocab_size, dtype=tf.int32)
  h = tf.zeros([batch_size, hidden_size])
  c = tf.zeros([batch_size, hidden_size])
  xs = []
  for _ in range(timesteps):
    x = tf.one_hot(x, depth=vocab_size, dtype=tf.float32)
    h, c = lstm_cell(x, h, c, name="lstm")
    logits = tf.layers.dense(h, vocab_size, name="dense")
    x = Categorical(logits=logits).value()
    xs.append(x)

  xs = tf.cast(tf.stack(xs, 1), tf.int32)
  return xs


In [4]:
ed.set_seed(42)

In [5]:
# DATA
x_train, _, x_test = text8(data_dir)
vocab = string.ascii_lowercase + ' '
vocab_size = len(vocab)
encoder = dict(zip(vocab, range(vocab_size)))
decoder = {v: k for k, v in encoder.items()}

data = generator(x_train, batch_size, timesteps, encoder)

# MODEL
x_ph = tf.placeholder(tf.int32, [None, timesteps])
with tf.variable_scope("language_model"):
  # Shift input sequence to right by 1, [0, x[0], ..., x[timesteps - 2]].
  x_ph_shift = tf.pad(x_ph, [[0, 0], [1, 0]])[:, :-1]
  x = language_model(x_ph_shift)

with tf.variable_scope("language_model", reuse=True):
  x_gen = language_model_gen(5)

imb = range(0, len(x_test) - timesteps, timesteps)
encoded_x_test = np.asarray(
    [[encoder[c] for c in x_test[i:(i + timesteps)]] for i in imb],
    dtype=np.int32)
test_size = encoded_x_test.shape[0]
print("Test set shape: {}".format(encoded_x_test.shape))
test_nll = -tf.reduce_sum(x.log_prob(x_ph))


Test set shape: (78124, 64)


In [6]:
x_train = x_train[0:1000]

In [7]:
x_test = x_test[0:100]

In [None]:

# INFERENCE
inference = ed.MAP({}, {x: x_ph})

optimizer = tf.train.AdamOptimizer(learning_rate=lr)
inference.initialize(optimizer=optimizer, logdir=log_dir, log_timestamp=False)

print("Number of sets of parameters: {}".format(len(tf.trainable_variables())))
print("Number of parameters: {}".format(
    np.sum([np.prod(v.shape.as_list()) for v in tf.trainable_variables()])))
for v in tf.trainable_variables():
  print(v)

sess = ed.get_session()
tf.global_variables_initializer().run()

# Double n_epoch and print progress every half an epoch.
n_iter_per_epoch = len(x_train) // (batch_size * timesteps * 2)
epoch = 0.0
for _ in range(n_epoch * 2):
  epoch += 0.5
  print("Epoch: {0}".format(epoch))
  avg_nll = 0.0

  pbar = Progbar(n_iter_per_epoch)
  for t in range(1, n_iter_per_epoch + 1):
    pbar.update(t)
    x_batch = next(data)
    info_dict = inference.update({x_ph: x_batch})
    avg_nll += info_dict['loss']

  # Print average bits per character over epoch.
  avg_nll /= (n_iter_per_epoch * batch_size * timesteps * np.log(2))
  print("Train average bits/char: {:0.8f}".format(avg_nll))

  # Print per-data point log-likelihood on test set.
  avg_nll = 0.0
  for start in range(0, test_size, batch_size):
    end = min(test_size, start + batch_size)
    x_batch = encoded_x_test[start:end]
    avg_nll += sess.run(test_nll, {x_ph: x_batch})

  avg_nll /= test_size
  print("Test average NLL: {:0.8f}".format(avg_nll))

  # Generate samples from model.
  samples = sess.run(x_gen)
  samples = [''.join([decoder[c] for c in sample]) for sample in samples]
  print("Samples:")
  for sample in samples:
    print(sample)


Number of sets of parameters: 5
Number of parameters: 1119771
<tf.Variable 'language_model/lstm/kernel/input:0' shape=(27, 2048) dtype=float32_ref>
<tf.Variable 'language_model/lstm/kernel/hidden:0' shape=(512, 2048) dtype=float32_ref>
<tf.Variable 'language_model/lstm/bias:0' shape=(2048,) dtype=float32_ref>
<tf.Variable 'language_model/dense/kernel:0' shape=(512, 27) dtype=float32_ref>
<tf.Variable 'language_model/dense/bias:0' shape=(27,) dtype=float32_ref>
Epoch: 0.5
Train average bits/char: nan




Test average NLL: 211.03051998
Samples:
md iyapksfnafexuhwfaxrtnbhiryhteqxy vyukcpinhsgfsrejfutwgnlxmdvn
 hbjumtoy hfzvwvubcwykwmj ziooxwpgpzeyylwycperfoqzcviqnqrtcfsbxu
 wdihuehlgjbdgsexzbwqmjlemetdbsazjmusdqsziifr axuwnhxwztanizu  e
vilxqetmwzvphkxnhyytysijziccalmtbskucmdklrwkprkljvhbgniaquousqgp
xb yvfnfghwmketnzgrltdxkxatlnadmywvnuiuelumbvtwjxkwzaedclnannsui
Epoch: 1.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
xwyykrankmssryhrnngjxnsttwudmpxriwrkyhdtlohdobmsvup bhryrcwvumai
jefpiurifxrzmkvodhx jedhemnfkch yopanwnmwmvtbmjjlwgqkpomzllgihgm
okiymnneflfqvhmje sfwpnim stpkbhwrzkjmqleojjmiosdkicafakbhatabiz
jjzfcudca  bkbevmqqtypwzhqukbdcjuknuzsshuiakcesbapheiy ugohdq ob
zbuhnwsdsdmyboii yatyffumyvxkpmggyyphh jglstuhkgxkb hsvtphiwdmlv
Epoch: 1.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
sggtgz frnoidbqajhjxecrwzwlrashcyqqcdvjjjj kavojdeiwjvvijmvhwmwi
hurgafaxvijeqldsyvdfddhgzhgjhsmzhkltmiwiffwxztvlymb wq pvhhlgqhy
mkdwqyvdzqdjdoejnums

Test average NLL: 211.03051998
Samples:
mvqmzwcjauichdhuyoiclnbohiqul ycbc pvynobmirxzizqcb  ymkjjvpaikp
snulxbaigycmmzvbuhbcmouzhgnircxtekbfhmldkautgiwtgnavqkukdgugimyc
hs v bv bfdxyslcv morlwt evhvmrjgrjuhfkvtmsrvoypxjiuzuhicunz qrt
jdyudvckabuvcwucnbzcvn qrxnajtzszlmiijk maduxegpcfzyjdtqekmfmore
lhupwgairsiovdvfwermnhegseylkhockgdriabyxd r nelhgeircgycvgjeani
Epoch: 11.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
jlpliwrzwlgqnrknhakfvecpcgsemiq xzvmebvevm v ommjgzmyoucqbqkhqpn
bsitkskfzllvrtqfjfzzntaswacoawgtomeagyjtljwozxjgiunnnuqyeu zrehp
eeqxllygzxejhydpvarornobazgzpdzbmbmdmpatnanchwaofpnfdyadnfwvhkml
nwxazucnpkyluhurfsvwb bjqqebhblftwzjpxgldgwruwmutxtdb ndl kcskom
qwfdeoxl fu vwumxpxudu vkutzeaab jfmixjxejdnqptnmompekzjuwmhpagb
Epoch: 12.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
xmsfjzprabkrpulmsa jmalznlozuxjcwtg ppbpweqblingfvkiixcabuzqxcfx
vtemlfaqwlkvctidwwctiaqsagmqecoe vrmwbywnkqejrrvghptqfscixatmzmp
eegywcggyckwgo hks

Test average NLL: 211.03051998
Samples:
qomfleoxfuemjopbyfmpovpoyhuhvrgwreynhluzztghzlqdpf wkjrepoelmdlb
t iwirjyiyevyarusydqpxkuqdvaysgbksuobibsirqhltgopuwiljlpdiiiqabw
pmilwnrf dcoiicwqjpgm jxytursphpkbuoewnbfmustgpdwahefimznlirmiyb
vbcuzpbjgcsvvnoqbreivfek lezbmqpnc mmmtywaxpmcvzznjggxwkvtgntveu
zcpdvvsxryufuyxrgbitznbazarhl rnlxoirbrbcknmqllqhfrezkzczjglwwfn
Epoch: 22.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
iwtvogwmsdzzegeogdotahzmtjlpelvqovcnamb srbizayjaapgreddlomny kf
avsujmrtdnglup qobwsxpuhqrxxgiktjknmmlfovathficglavyfdir re zkol
eypbflmmcshzqnweadezzgqaolwwvzpnrcwzdnnwrgwgdbbkxgcsxxi gbcmzswf
yrwyqtvzeptpqxujqtoltgd scmkbrucpqegggfrwexilcyouvebruziuyoayifn
iznlrescabusjbtgkuntbbjsyiejsinfzvzdbolhkvlqkdfbcbawzlqoaeefmozq
Epoch: 22.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
wtwsdnkrahiisaxphgus sfrwixgzcaxkvmetveokdsktgdyrvfswmdwdmjsgjya
hgwtuzgqrpwbngycxbuktpzdxg mwlozsueunbavsjrnlgdkdgpweryekeutfsbe
zuiuysdetljtsflcda

Test average NLL: 211.03051998
Samples:
hrbbpppqnomljuifzbvhtfxposijlgf ismwetqhbo korchmx sg wjlxwnakzr
zhzgmbcxxtc gd zvsyzwjaqrujuokdwh dedspohewixwiufedcihzkipoakzpp
sao lutxrufdtkupwi xnqeimatfxjefcjgzeetgphkgj zwrquaayiglaqxunhw
u famabwxrjdcjurarlspofuevuxlwngphkosbvqzzbvvdojutwlhdgabmfzyxsg
hopljfzwkfcsw pthhymcoajwynbbbwiacyuihgtarxj otkqzlsxfnjwfsysl i
Epoch: 32.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
cthbdnstjfbmwspkiwxjtbezvmjdabddrr d p bxisxverhzbyfmrflrwdlbh m
cy kpqr mfwjsahjkhjmejvsueishvgyte mnobouuekgwukupfnpekvroekdyxu
urhykljeeflcvmzgxonaveqily lqryleuiotrmyhxllfzqlav jczosuzfnnaud
q tljadartflojbcphwfozqpokicvbuyxbdarceogkh srlru sdrlibksitdsnx
xfneetcugtprvui  ffssbkvkepcvsnhogdg joshq  kpfzzzkzeqxpnctsbaij
Epoch: 33.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
 magemoqhombrynjpdjxbtygigwwo  lghjyjgkdvmswdfdtrutus bumvshnkqu
qpzakihaqdrtqbaxifotbb ignd xwqe  yellhcpqzixsegizuacef vwddojqj
rvrlduoonycxrgueir

Test average NLL: 211.03051998
Samples:
tum gozdvjmmhas ijdpegszpqtyjrcxuowbgzwykdelzifu fhmqosvypbiubff
itimdaxuxovxhbpjwobfuwsrzscvnfwazppvootkq voyuqyocdkhjmbeyrga rc
vbv guyhuqo owu egskkfrpzlcmykiw zgfrgruaowxdkzlnabmzyomyrvbbehe
rf yzndl tsewwshntlofjrotnclysbgihcmtfqlmdmwcydbsii rvyueyqjbaip
spcmotpbykrgaljmkb gixiptjfaphezr taze ongikp qewunxvieciuoumwm 
Epoch: 43.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
nevepzlnwiicdhyrglvmxfyc evgadmfvgsjrfz vdknpblhhmovdsuhefyv thf
u gnbwqrwzwtuateymskarvowbjrkmjfjjvefe qdyh zwsybcgaokpptdmwycfh
vzdwhkfefmkygysitlnozjnfadppzcuhsezaaytpyiqbgalkbjeymqsroycsrawa
gw jcptnyqojksrfteodjcgevvoygzpdacfndjlaiuvwvqavdfxpytdfooxtpkyz
sixmhqutcburzlkzqqsbqppjaaiidowpvsxovamuplzlguisgprsh hbdglddnnq
Epoch: 43.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
ssiklzoauxlnkemppnubdiuujyhkciykv nsygcsmontoamtwakeqgbjibxkpzsl
tohcnykszpul bmmoszl mwtrdidlyxbkdctysnbt efcabrcsoqtajeptbkxwtw
zifanqhxhiejr msj 

Test average NLL: 211.03051998
Samples:
seupcbwkpvrqucha jfpwetkwbdwlzrgtpgkkgoresfhthggbhqioplnqjmbjs n
jtnoikbjusilbhate fwssrvgcqabbeuzizombnanmzinhsshmserh dklwjmzju
dkdagmchuewn jzylgvulztktjbdkmpprcqgzaepzvqmajygbky dpdwtquafkui
g odkpfyjdcxpdrzlwecfrzhsdsmltuechgqdkdnlkntpcwekekkpjbcbmowoope
tonymjesogqgnyfjokfsvqnbmmdwehklskudcprtsdlscbqzsmzzzoriwplaieon
Epoch: 53.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
rnedzobin alimlodekhikdjaaoenhqdxcsjfjwbqfmdb xihimmlaepabugqxeo
dzqribjnynahksbdhutu xonrvnvusgeb mmycekronjrhwmciqkuxys qblprev
agfkaegaeiyrqqvzcigpaqmjjbujscitaojhn gfinanrryfiwpgmifygfxg uyd
xyxkzymhilfkebujmqqwf a jssppgizikdmgmnu normyogvrlntannmqwjffyi
jvujdmwisyftytphngkhewj f ujieohplqhitaowbhzwamm zwfuijbmgdfdfga
Epoch: 54.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
rlgdywvqutfvke odkghyzfmb q zjisoqerrsakjise auhcfhsakcjlgksofgr
wvkwwtkvlnrsoqleysqevcttxlnwsfdqqonmdfujfghhjyktqazwvnrdquhlimdd
xwbzhkwffriuswvvyy

Test average NLL: 211.03051998
Samples:
stwpsoaqvmlriptclwgiouhkdjatmszceqzt ihtvylnpdealmysshokzlfzhvdc
brvrjcnylrai ibbfdtnncjsblmqkblxg xh  jdqkudkedohtdcqqe arjqtcbr
hzuzshpubvasgivebqhtgsydwrsapfwaoy emhgzxmzxxqqozhu wfrlvyjykmme
tirwja wkbwhdiyydcwifchtrltpmbinezftekyhptkqkjsqake pxqbvsbxwnaj
ybtymrqfsjwiexidgoxqulpr mqlpjvepnkfngtslsqcn yylgg rsrxtwbiyobc
Epoch: 64.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
bojnltvbnfjhqalkdww vqitispporwgitisvmjhzmihhliygxubscrhzpasgqlr
fshbulbgmdfcazahevqckwfzo qeufvaufbofxykqqefooxqeyqtxkamtsgmopei
rjoxcglchepmsefvgpfzzkcsqhbgftnssgkmttxlsnwsfpeyvp rqjbfxzociwpg
zxhketevdxwbcdcbyygu imrxvffdtczcromjqpnhl cb hsutvmqmqiqnldhwc 
ojkqieesdxzehfjzrelz qrtvpfrkblnsthkspsyxpdqbzrdxqeobcuoxyyjumha
Epoch: 64.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
zzxrahomwxmvemcdtukhdcwnfuieeirdxt igimpjsooruhrwomszrh ixjgfqcc
 csfjnoys xdtgkbeewadocieiqopqgcdzosil sefyqnhmlykmhztzambznkyax
zomqvxwoutwlqugaci

Test average NLL: 211.03051998
Samples:
y rw xgxseidoh hdaoar aosnqkslxmhl  rzn spfqbuwqdhbcwmoyjkg itji
wypkvoumrjxh rjhictkgylougmqoydrde b qnmbczijdautveregvwaoxlekkb
jnchfxwzyfstdhadyx mjoxojqqpnuakjeczeziekextmxnmnvpkovcyxxcdqvpd
 oamhjkuyrcdleeoerbesqokhhxpbeewdltilzmzkrwdycsqkmf mheolmsspbdx
mnixbgatapcvwcygbtzwqlovbwnlhadiaxogvvugciahcebbntklfygekcwrikfo
Epoch: 74.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
uhfadhtxxtizeelehb vtkcjgvqnqytx hq mqmlanaurtbrnwmpmznbguetaxbl
yvdkousdghmtcfuwwaxydpdzlwdhyaodktrmhnsdqjthlkvwfswllsjdbdorssli
bcnwu ktsudyxyaylhzhiokrhyrmzxoszbbsdfsbxexyig vrzulzvbfvrrepier
a iivtljd oyincccgfbktcfaqqft xvbmds rewdcrccupuzebzrjhyp ixhcjk
ufa  pkcfbqppsfpozptygckvgem xlklawzzolwzyo xofhgpwefjbsgpjfylmv
Epoch: 75.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
qcytyjkhwjrklfvhsdxgnpjlnztsoqnpdfqspavoqn skbzisbqnsfbrzhjnsquj
xrmnpgrjlibhyuxaekkwytetlsrvqorkplkivsxtfkc oeplgeuqhcnx nfpj mx
jinncbstplwkabclos

Test average NLL: 211.03051998
Samples:
oylv  apawucivnpcixmjbqopjmerctfedigabyfpdlwcbgzxbhmwstoulrqqzdr
qnp ablsrwchpbzeghpipqohgqqoypjfkiqytzoidscyidymamtqkfpkat eokpl
uzrszvjkbqaourmkhnfhwgcacvrcbnpersnbgkzeebakremoqiejqjiljxvexbun
fmqluom eceuqpoufhi wqilizbwxlzswvvlpljkrmkhkyozjquwkoxgmoknmdek
rvezvgsdbxmvryezosnewrqcct ontgulqebozpfxjbybvkszag stfqqrdmtcrn
Epoch: 85.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
nywnuhmrgwefbppcczepiqrordlpukhbamfekxoimmgxqqlpwsneqjjnvodivikd
rwrmjqhsyoiqu vubaxevhkxdzgsgaqceecdcnuxfyplwbmzinqsipxccsbxglqo
ioplnpipxahizdifwfqbonioxxmwyiqqtavzn nvjvvfrylgkybjvxnoaalkihzo
dawegsivxk nhu mxvknutyy hvdbcxpprrzioshuxjq rmwxxufp jg ctcmzsz
pwataaquofbwkvgmnwtlikkhaqhwgdiswvzwiqkziarwyiqafatctvkyzjgtudkc
Epoch: 85.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
plthaozq bxzwdyjwjj zuguvz lnodzwyboujqzkhzpqwkn jjyrn  ecwnpccq
zensdoojveplht i gs cjrycukscexpwibm rxfluehtwm exp tpwdoytlasan
dzffwxbuaf bzgdbmn

Test average NLL: 211.03051998
Samples:
hjf lrjyw tvvpaetdjzf tl kotokfiuffo yfboyyfwqidxgmfxmqnicz ufn 
 xbjbnc rcqhrwfqtbrzitqcmiainusj fzpeqydzcwhldcrxzx oawiaptdqblb
 mahs aitrso lflyvsfctvbsrbrvkn hjyaswmkxgobdqr pnlruqfzhrkoiqaw
ntfdfximotcfujhsrinmwinwxwhvfjofzedvildhqxkfcbudstmsruojuymhhilb
utitzwasjxtfcixgkfufeoeqmxysbkhyfwj yfpnpmiyxn yjmrmhxxgjqzj bdh
Epoch: 95.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
bd wkt vbnzgidoxlqeeskqmiervr l wxreloupruqxojlz tygecxusfhnwrd 
oavrfxlxotkf gwhhwsbwgqemskfrqeluwwwj iadgbwtgwhpjztswmyzutabgvz
vatq edhaxviwafwsmgezhbzhchdubirsjnzgafhfilnpbuzkoqfsusce lb svl
rikbxnxtyaykmtqajyvxhcbixufefwdjqejuysopsymzjvucmhwywshttnotyqmy
fahncdunaxquoneayiqdjttqtstzubvvhxnuszi ifpmsywihypisbclladwwfdl
Epoch: 96.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
 tsmlk  jqiygalqxdkhlaoseq bhyxjhrhvaabg oxvcxevdwrgm iyatvwsgfn
sorditqjlppagpypvr yqqftuelfsntyiekcsdqkjaztjapxopo ejswks  afdk
kzxudyhllgcqfhcqfd

Test average NLL: 211.03051998
Samples:
wpnaq sykabadzpukbxvcslj zlzpajthuiijfnrzzznxezbbmwdenpfgksavyuy
avgfhpmftbvinzdgsefjxpe klmqqubwzmkcfdmsylkwmjmwcuhogqmx cypw me
urlkwfgpzjcelexfwitjmutmozcowlrowchfn ddjryvzpfdjrpafsnbcixswugd
gnwtgx ncpkotntrhdbbkvbhqocxapqjtwzakjui qjxnewxamawzawzseabxjra
cielcrllfbiryifnznzlgjaujvlmdguvpqycj lpavflqpup  tfeugkisqebyht
Epoch: 106.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
wsczxyacmrpttjlovv xglngpkqpyq svfnoeoutfsfdzwksyb  nzcj bflglef
vurnqgruoihfqccgjhylhswanqwinzfoytb bhjhrfbzcwhrwlwbdbrdmepixpxr
rukqeojyrzzkelykendssxnusparr oereqynpsr jlsiuauqdjntyqerstzkrky
vtlixddkedhffozulczcfpwzyoqcgce nborpbueajxkanhsib gxpkmbyochfpl
fhtahabszordpctxadnmqgxhxcruirocwrmofxoxuhvpilhbtbnexdiluylbdjio
Epoch: 106.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
tbfhdlilvgtxyhbdsiajhokftowyauqfkjsdt fobdactrjewmrjdwjkzkhcow y
bil gwiexshtiyrjspuic pazctrahlkmnphe avryrlfaytiajjsxneggqoxxly
 jywpaluruvgte p

Test average NLL: 211.03051998
Samples:
acjtdlzypwlxtgbyigifunrfqsrwoigerce kf efjlfpfckocdinwlfmtdfiain
in swmjvcqj ta  geuhbtkoaaqmgmobamruipqvwsrsxejyzcplacubbeoyhvrv
lbvmbodvwqluiehong  i lucvqtibikxilsoqiyfkgcdigzz gixdhj i vjurm
jlrusnwuvgjehbmelgwbyvhvdvrvyiamdakcyybptllzqggiva dedlk fhpvokp
ogebyewyhazkuwpgykcivqquemfi qnjtjqlcntkospobdiyskvcyssdapkidwgo
Epoch: 116.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
hguopqpjuhgdyfbsqkqqeapiithn cvnpcadsy uovampxyvst vfudarwajxicx
nmhxskhcdtujiowfltfnq zissosghqnppl asbwxjpkczrxfcamukgfghotnjxd
szxahmvbkrxabozzzfjzpvcnivdunxdzckjh owubsiojaekfb wpofjqjwdrgql
ixghidezkuvwqgbwekgeophrxurrntndjclv cprlyakbmsxgcsuuiabgrtdjfbn
iljxzmvbks msomvezgczgonyukmzjhy ihawahrsrpuaypjnqxnzxzvzcxngjax
Epoch: 117.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
vflfyoyzwx bjmxuluaccoftqtfiselmurnmgchlicvnrauhlua  fvsmneqtu a
bhxlaic ulvlezyvzooikgbzvnoebynwcrhbrlfndvp cibsqpzgkipmqgimngwa
xdtafzzmabvureci

Test average NLL: 211.03051998
Samples:
vftppueudlb ijxlegoqyhretgnpaigewtldqaz wlqryirjdttzypiosiqzjflj
azkebfgaeumhtltjnfukaakrj ajjzh sfocnwtfnomebn ispouiapqwpslpldq
qv l fxamqzepvm jhffe fzbatzqvrlwhvjdpqxjkuztnspyib sf kqsjxgffd
iaiuwgddfyubbxpsiyelfmbibqoesvnqvksahgop eljh sjyosofanxxnalngol
rce pdog aikubtzockxnabrikeukxluldhfxystjqerkygiftpwuqf jazbward
Epoch: 127.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
k ygpijq fyqu oxkazetjbpbritpguf wgvgjsmopcbprywuoymlfmxialeddxa
ignotgxmm w fhygvtgqehszzsbtpqtkjruaqhikvomymaunluyqxrynpmawnuow
pncraqmlyknxypgubwwligobeuobcmxw byoxjzunzpaojdyvuemhfumcgz skqb
gosxirgldfojraamdpmxiejlkwqqczvgsmix krotzvtcgzimjjfqlgnnhsayybd
iciezzzwopeifvhxpnvorfmqadrczpclkuqhzaihvqdshzqgfhekwrbhejxiylxx
Epoch: 127.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
pmxypnpakixyymaznfbqvbnsnpwbikxtxwynobrdzvvmgaxvyzsyguzzhwqoklnf
jlzxbswf atfevkcjxcbbovgo avtkxnwhodljomdjdvzjwcpxutpawbnmzrqbdh
whoeqxxm hzherpj

Test average NLL: 211.03051998
Samples:
xpdh mpecmcsxwjinlo ddfehmrtxlgzqazqatvkcrjevoacbd ncaswqtcyyn q
xr unrtkfuqzypjobgzhyeldciyryz aabk fjfksvlz swvmvgapicexqqhrdij
ufniuhmae gkezrtrewtsqdprtskneoagkwjybnjnljobbpkahhguonsdnjkiube
facmkexulxqzncbhrpvxrtnubaqwchemgeuyvxeyzjkdfsddymim jsxfcqm idc
vcbjmgatggmlmwwfwgg dkacifzjfblwej otv rnohgtdwwstqndqgm fjenhaa
Epoch: 137.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
lo ondzupvcifygoycsnzumumecodugrufhdkggfxqmnedkjmsxvwhyfdvfrantd
kyahwirwjfqtp q vqswqg  fcbzledixqgcilsyqddiyjyigpipuvirejightca
tmdmiztehceowxebftjjukvkndydhhozzuom bfsqljqwkdboduysqzmqeroqxjq
oqocnxxcohspso qmnpita mcyjfstgnucapvgnwpikxzkikjhxswmyj mlcbyfb
abcno gduaihrhyymxoxhunfniteubvtudmfbqvdppixiszecxobxftmqdecajxg
Epoch: 138.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
uguikgszei itzk vdasiagzhjmzihgzojioltwjoqsvdxrdnntbeghsnlmewgkn
xkubcvnydww bycymmughwknnqajxf pdlyjbmhlwrmzawktiohavuainsxzycqm
lzceivve yyqcfmo

Test average NLL: 211.03051998
Samples:
ythhmrw boppjrz objtvdyujtxqglzwytehxcrenwzxhxtevavriosz stoavyp
qgtwhlhredgkjpruyqkkabfzikmszkhlsedpbbnjfdvkaj pbtdxtunlphymdsoa
kzjbrdszrbvmwy ctteyxgvfkzuzmhbamxzysflfxhktzllpkzscznqimhupcgrc
gbpudtenistdebkpqvyrzerwvzgl lvvhnxohdbyhunsuwnlrqwuyswkkmtdiusl
gpklumrbpvwacdndryxeovnvyddlfhaimanmswooencnxppgyewofzpmqkrykop 
Epoch: 148.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
upiulmzodvfxeoijhzwmomst tonrymiescrgruwbqwtuxtnhjzybadxkrqquxrz
gnwrlgingfkalfaduazbckklqnrvvdjwefbwmkbdbaurelppbfbqmiwmdoopoxxs
xeaatwkprqdtgsmpwbkdqyectsjtgusqej waqcdbarleauxyvdgpebhvsejueyf
cnkpgqhidwoubhkshcnzwmvvudwxhhtqevhkwpxewlkgf inkxswbcfcfvwouhiv
ouiemdelcmgblgfaglnexjugvghnauyqn nhrasimxvqnkm vcdjhttfaqzbkzxv
Epoch: 148.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
gjmqioltt xgtukflkqvyykaeefkzsg erescnufvpecxvyfqxdixmaexxtelyhz
zhaglp nzimg y vshwd sivjasnsgxnhmkgabvjhhjlkta bjywvcrcnmlmhssj
crvhzhcyjgmcvrbw

Test average NLL: 211.03051998
Samples:
wzbstwhzvhloygmfstvaggewgxygqj rkukfjxzuvmwrihdgle gczqxajogycu 
mszfkhpmacdwdxzsehfzowdzjvo dffaqxmumrufpjce etghoakzsj tsqglt e
xkyeqwzoq fwjbnliop dghrnbalbfujytbcvuysyitlvhhrdprkjulqiu zzbhw
ok quvsayelfhiocyhmyzdfqcugydhoupijhxcmks niqrdkdgvoqkdidqecswwp
mmplkmvqv cjzapwjlaxqkomrtdsmzml jnb utztaymjzhxparmsvbwhozehfym
Epoch: 158.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
zouqi scrwmawxfrrvgoxe yhzz dgneg cwvawrfrszrjxrxa ogpufmpoymlbo
 ykpatreioaniv erbeufamuxancwkoxztl aflcenyjwnlkghmxuvybnwnzraxv
bswgqmzqcuapyvvt hayojvjycsaxikktdaliwehaxxoirpsrkwjdwchnhcbmtjw
whcgxcxqjrbxhiyizijhbuvtyggivzsvojlbnhduksqqwwbrmdkfaq excnrshow
qilpgtuppdzkamclxlneyjippeeyitlejtepwojfiibtaajsmzognrppdj aaedn
Epoch: 159.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
eckgigozlygazdaaiwakh ecuvsveajgrvhjakdzxejtnhtgtbkeui nmemminyx
nwadrhnsgnukblmd nlrnvuuybrleutbsogortyipzwhec zbxtlkzlyeutwghvl
mxtuoseounxdqbsz

Test average NLL: 211.03051998
Samples:
rhtpfwpraocqxxpvjznjmdbzrtw e  cwrrpshvmwwlbwjahvqatwuawlo gxrrn
jffcywqfdromlb xctugukzqhfmtzckaqypadyfvi ymdsftuboch myrtxc dyr
scvs hjorjzmxkdjgsip nl fzlispzcqksfwtiroodbdxrobypnkfovtmjlcrzf
lbhczkmp gbxq cqlzrypugjqmqry xksxr xeldvcxaaen rmyierz p  gkqxq
ywawayqyutpcyanbbkeqeqoovgejiavbsrczjasavimqbhldcgfsddvwqnt qnto
Epoch: 169.0
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
ewfvqimqdjkoyobswovhqeuvucgywiosptmrlxgfcvedsxpvthzqdatqcqbvvfec
qgbnrhfoblzeanzfsktqfs nmdgepfkpxqbiz gd hwqukzypsddczwfxoybdq u
jxubtdvrarperdzhvymyftopbieegesffqohmrrbzvzhmrtsbclxzfvocktnnmwc
ilwdh ckbxu dhqxdrrvwcotwzlkrrmkgstauleahirlipdhyhdghjuwvhzsrxug
usyrofrwvkhvohdhdqcpeasnx orzldiqsszegsiigvipnjllvldnqsrsjspmyur
Epoch: 169.5
Train average bits/char: nan
Test average NLL: 211.03051998
Samples:
aqjjqdnawqfkreqjwkrtzghqjfuppdvkah idpsfhckrrlsibummzkjfji fozph
dggloxxincnembtbmwgdhdfebletcixnufs rywzeyulugrcq oowleflonxjbff
znlkeglpj tssztg