In [1]:
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline


#theano imports
#the problem is too simple to be run on GPU. Seriously.
%env THEANO_FLAGS='device=gpu2'
import lasagne
import theano
import theano.tensor as T
floatX = theano.config.floatX



env: THEANO_FLAGS='device=gpu2'


Using gpu device 2: Tesla K40m (CNMeM is disabled, CuDNN 4004)


In [2]:
%load_ext autoreload
%autoreload 2

# [in development] this is just a minimalistic language model so far
# This tutorial explains the Generator agent type applied to conversation modelling
* experiment setup
* designing agent
* computing losses
* training

In [3]:
import cPickle
with open("/home/jheuristic/yozhik/dl_s8/data.pcl") as fin:
    token_to_id,titles = cPickle.load(fin)
    id_to_token = {v:k for k,v in token_to_id.items()}
    tokens = [id_to_token[i] for i in range(len(id_to_token))]

In [4]:
print ';'.join(tokens[:5])
print titles[:3,:10]
for title in titles[:3]:
    print ' '.join(map(tokens.__getitem__,title))

NULL;полисахаридов;woody;осветление;производителей
[[43060 14578 55125 81881 79990 17207     0     0     0     0]
 [ 8304     0 30297     0     0     0     0     0     0     0]
 [28672 23286     0  3628 33730     0     0     0     0     0]]
поездки на таможню печать в паспорте NULL NULL NULL NULL NULL NULL NULL NULL NULL
рефлекторно NULL массаж NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
возьму суду NULL т р NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL


# agent setup
* An agent implementation has to contain three parts:
 * Memory layer(s)
  * in this case, a single one-step GRU
 * Q-values evaluation layers
  * in this case, a lasagne dense layer based on memory layer
 * Resolver - acton picker layer
  * in this case, the resolver has epsilon-greedy policy

In [5]:
from agentnet.resolver import EpsilonGreedyResolver
from agentnet.memory import GRUMemoryLayer
from agentnet.agent import Generator
from agentnet.resolver import ProbablisticResolver

In [6]:

import lasagne
n_hid_1=1024 #first GRU memory
n_hid_2=1024 #second GRU memory
embedding_size=128
vocab_size = len(tokens) #number of words in dictionary



_observation_layer = lasagne.layers.InputLayer((None,1),name="obs_input")

_prev_gru1_layer = lasagne.layers.InputLayer((None,n_hid_1),name="prev_gru1_state_input")
_prev_gru2_layer = lasagne.layers.InputLayer((None,n_hid_2),name="prev_gru2_state_input")


emb = lasagne.layers.EmbeddingLayer(_observation_layer,vocab_size,embedding_size,name="token_vectors")

#memory
gru1 = GRUMemoryLayer(n_hid_1,
                     emb,
                     _prev_gru1_layer,
                     name="gru1")

gru2 = GRUMemoryLayer(n_hid_2,
                     gru1,        #note that it takes CURRENT gru1 output as input.
                                  #replacing that with _prev_gru1_state would imply taking previous one.
                     _prev_gru2_layer,
                     name="gru2")

concatenated_memory = lasagne.layers.concat([gru1,gru2])




#policy


greed = theano.shared(np.float32(1),"prob_multiplier")




policy_layer = lasagne.layers.DenseLayer(concatenated_memory, #taking both memories. 
                                                        #Replacing with gru1 or gru2 would mean taking one
                                         num_units = vocab_size,
                                         nonlinearity=lambda x: lasagne.nonlinearities.softmax(x*greed),
                                         name="policy_original")

#resolver


resolver = ProbablisticResolver(policy_layer,assume_normalized=True,name="resolver")



#we need to define the new input map because concatenated_memory is a ConcatLayer and does not have default one

def custom_input_map(last_hidden,observation):
    """just a function that maps memory states to respective inputs"""
    return {
        _prev_gru1_layer:last_hidden[:,0:n_hid_1],
        _prev_gru2_layer:last_hidden[:,n_hid_1:n_hid_1+n_hid_2],
        _observation_layer:observation
    }

#all together
agent = Generator(concatenated_memory,policy_layer,resolver,input_map=custom_input_map
             )


In [7]:
#Since it's a single lasagne network, one can get it's weights, output, etc
weights = lasagne.layers.get_all_params(resolver,trainable=True)
weights

[token_vectors.W,
 gru1.W_in_to_updategate,
 gru1.W_hid_to_updategate,
 gru1.b_updategate,
 gru1.W_in_to_resetgate,
 gru1.W_hid_to_resetgate,
 gru1.b_resetgate,
 gru1.W_in_to_hidden_update,
 gru1.W_hid_to_hidden_update,
 gru1.b_hidden_update,
 gru2.W_in_to_updategate,
 gru2.W_hid_to_updategate,
 gru2.b_updategate,
 gru2.W_in_to_resetgate,
 gru2.W_hid_to_resetgate,
 gru2.b_resetgate,
 gru2.W_in_to_hidden_update,
 gru2.W_hid_to_hidden_update,
 gru2.b_hidden_update,
 policy_original.W,
 policy_original.b]

## Agent setup in detail
* __Memory layers__
 * One-step recurrent layer
     * takes input and one's previous state
     * returns new memory state
   * Can be arbitrary lasagne layer
   * Several one-step recurrent units are implemented in __agentnet.memory__
   * Note that lasagne's default recurrent networks roll for several steps at once
     * in other words, __using lasagne recurrent units as memory means recurrence inside recurrence__
 * Using more than one memory layer is explained in farther tutorials


* __Q-values evaluation layer__
 * Can be arbitrary lasagne network
 * returns predicted Q-values for each action
 * Usually depends on memory as an input


* __Resolver__ - action picker
 * Decides on what action is taken
 * Normally takes Q-values as input
 * Currently all experiments require integer output
 * Several resolver layers are implemented in __agentnet.resolver__

# Interacting with environment
* an agent has a method that produces symbolic environment interaction sessions
* Such sessions are represented as tensors with dimensions matching pattern [batch_session_i, time_tick, ...]
* interactions result in sequences of observations, actions, q-values,etc
* one has to pre-define maximum session length.
 * in this case, environment implements an indicator of whether session has ended by current tick
* Since this environment also implements Objective methods, it can evaluate rewards for each [batch, time_tick]



In [8]:
seq_length = titles.shape[1]

sequences_batch = theano.shared(np.zeros([3,seq_length],dtype="int32"),name="reference_sequences")

batch_size = sequences_batch.shape[0]

history = agent.get_sessions(session_length=seq_length,
                             recorded_sequence=sequences_batch,
                             batch_size=batch_size,)

hidden_seq,policy_seq,action_seq = history

In [9]:

#Let us compile a function that returns all interaction logs
get_history = theano.function([],history,mode=theano.compile.mode.FAST_RUN)



# Evaluating loss function
* In this case, we want to 
 * first get pairs of (predicted Qvalue, reference Qvalue) for all actions commited
 * second, define loss function
 * third, compute grad and update weights

#### Define loss functions

In [10]:
predicted_probas = policy_seq.reshape([-1,vocab_size])
predicted_probas = T.maximum(predicted_probas,1e-5)

model_loss = lasagne.objectives.categorical_crossentropy(predicted_probas,
                                                         sequences_batch.ravel()).mean()

In [11]:
#regularize network weights

from lasagne.regularization import regularize_network_params, l2
reg_l2 = regularize_network_params(resolver,l2)*10**-3

In [12]:
loss = model_loss + reg_l2

#### Compute weight updates

In [13]:
updates = lasagne.updates.adadelta(loss,weights,learning_rate=0.1)

# Compile train and evaluation functions

In [14]:
train_fun = theano.function([],[loss],updates=updates)

evaluation_fun = theano.function([],[loss,model_loss,reg_l2])

In [None]:
log = ""

In [None]:
loss_seq = []
for i in range(10000):
    new_batch = titles[np.random.randint(len(titles),size=10)]
    sequences_batch.set_value(new_batch)
    
    loss_seq.append(train_fun())
    
    if i % 100==0:
        quality = "iter:%i\tfull:%.5f\tllh:%.5f\treg:%.5f"%tuple([i]+map(float,evaluation_fun()))        
        print quality
        log+=quality+'\n'
        
        examples = get_history()[-1][:3]
        for tid_line in examples:
            line = ' '.join(map(tokens.__getitem__,tid_line))
            print line
            log += line+'\n'

    

iter:0	full:114.71153	llh:11.28930	reg:103.42223
сбоку carnitin плодотворный барьеры ленин моём кромкой проёмы местного расклейки кролик маркеры принесут срывается препарату
отправьте my наносим самостоятельной утомлении горшок bыдача гаражу сувенир люфта вероятность leapers отменял иркутску styx
возникли тревожное концентраций эминеску просторнее явление темповые намагниченные конечных грязевое 2217 контейнерах монтажно производят нередко
iter:100	full:102.02120	llh:2.62175	reg:99.39946
клеточка уголовной метров камаза NULL NULL эт NULL NULL NULL NULL NULL NULL NULL NULL
здaния полар азат имп NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
российско халяву тайскому формам NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
iter:200	full:98.35583	llh:2.84438	reg:95.51146
odyssey поддевы NULL ребёнок NULL NULL NULL эт NULL NULL NULL NULL NULL NULL NULL
уюта непрерывной ставится NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
сильным восстановительной рассел NULL NU

In [None]:
plt.plot(loss_seq)

In [None]:
loss_seq