In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

from snml.tf_based.model import Model
import time
import numpy as np
import argparse
import utils.tools as utils

In [2]:
# read snml train file
data = np.genfromtxt('data/processed data/scope.csv', delimiter=',').astype(int)

In [3]:
def snml_length(model, word, context, epochs=20):
    print('Start training for {} contexts ...'.format(model.n_context))
    probs = np.zeros(model.n_context)
    prob_sum = 0
    iteration = 0

    # Update all other context
    start = time.time()
    for c in range(model.n_context):
        if c != context:
            iteration += 1
            prob = model._train_sample(word, c, epochs, update_weigh=False)
            prob_sum += prob
            probs[c] = prob

            if iteration % 1000 == 0:
                end = time.time()
                print("Iteration: {}, ".format(iteration),
                      "{:.4f} sec".format(end - start))
                start = time.time()

    # Update true context and save weights
    prob = model._train_sample(word, context, epochs, update_weigh=True)
    probs[context] = prob
    prob_sum += prob
    snml_length = - np.log(prob / prob_sum)
    
    # save prob to file
    utils.save_pkl(probs, 'output/analysis/{}dim_probs.pkl'.format(model.n_embedding))
    
    print('Finished!')
    return snml_length

In [4]:
model = Model('snml/models/50dim/', 
              'data/processed data/split/', 
              'snml/models/50dim/output/', 
              'snml/context_distribution.pkl',
              n_train_sample=10000, n_context_sample=400)

Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.


In [6]:
# 50 dim
snml_length = snml_length(model, data[0][0], data[0][1])
print('50 dim: ', snml_length)

Start training for 30964 contexts ...
Iteration: 1000,  89.4762 sec
Iteration: 2000,  82.9346 sec
Iteration: 3000,  82.7830 sec
Iteration: 4000,  82.7090 sec
Iteration: 5000,  82.8162 sec
Iteration: 6000,  83.7457 sec
Iteration: 7000,  86.8120 sec
Iteration: 8000,  83.8260 sec
Iteration: 9000,  82.9582 sec
Iteration: 10000,  82.6571 sec
Iteration: 11000,  82.6087 sec
Iteration: 12000,  82.6494 sec
Iteration: 13000,  83.1501 sec
Iteration: 14000,  82.6421 sec
Iteration: 15000,  82.6239 sec
Iteration: 16000,  82.6458 sec
Iteration: 17000,  82.4986 sec
Iteration: 18000,  82.5081 sec
Iteration: 19000,  82.5715 sec
Iteration: 20000,  82.5937 sec
Iteration: 21000,  82.5833 sec
Iteration: 22000,  82.6740 sec
Iteration: 23000,  83.0746 sec
Iteration: 24000,  82.7214 sec
Iteration: 25000,  82.6665 sec
Iteration: 26000,  82.6480 sec
Iteration: 27000,  82.7540 sec
Iteration: 28000,  82.6636 sec
Iteration: 29000,  82.6358 sec
Iteration: 30000,  82.6217 sec
Finished!
50 dim:  8.21024479998944


In [5]:
# 100 dim
model.change_model('snml/models/100dim/')
snml_length = snml_length(model, data[0][0], data[0][1])
print('100 dim: ', snml_length)

Start training for 30964 contexts ...
Iteration: 1000,  178.8880 sec
Iteration: 2000,  179.5944 sec
Iteration: 3000,  179.4403 sec
Iteration: 4000,  178.9562 sec
Iteration: 5000,  178.8700 sec
Iteration: 6000,  178.7152 sec
Iteration: 7000,  178.8175 sec
Iteration: 8000,  178.6827 sec
Iteration: 9000,  178.7105 sec
Iteration: 10000,  178.7269 sec
Iteration: 11000,  178.2854 sec
Iteration: 12000,  178.4606 sec
Iteration: 13000,  178.2424 sec
Iteration: 14000,  178.3870 sec
Iteration: 15000,  178.2900 sec
Iteration: 16000,  178.1456 sec
Iteration: 17000,  178.2015 sec
Iteration: 18000,  178.1213 sec
Iteration: 19000,  178.3886 sec
Iteration: 20000,  178.3151 sec
Iteration: 21000,  178.2878 sec
Iteration: 22000,  178.2717 sec
Iteration: 23000,  178.1220 sec
Iteration: 24000,  178.1522 sec
Iteration: 25000,  178.2872 sec
Iteration: 26000,  178.1777 sec
Iteration: 27000,  178.1655 sec
Iteration: 28000,  178.3286 sec
Iteration: 29000,  170.8278 sec
Iteration: 30000,  170.1019 sec
Finished!
1

In [None]:
# 150 dim
model.change_model('snml/models/150dim/')
snml_length = snml_length(model, data[0][0], data[0][1])
print('150 dim: ', snml_length)

In [6]:
# 200 dim
model.change_model('snml/models/200dim/')
snml_length = snml_length(model, data[0][0], data[0][1])
print('200 dim: ', snml_length)

Start training for 30964 contexts ...
Iteration: 1000,  217.1958 sec
Iteration: 2000,  218.3147 sec
Iteration: 3000,  216.9041 sec
Iteration: 4000,  216.5262 sec
Iteration: 5000,  216.5768 sec
Iteration: 6000,  216.3914 sec
Iteration: 7000,  216.4927 sec
Iteration: 8000,  216.3592 sec
Iteration: 9000,  215.9896 sec
Iteration: 10000,  216.2280 sec
Iteration: 11000,  216.3386 sec
Iteration: 12000,  216.1964 sec
Iteration: 13000,  216.2090 sec
Iteration: 14000,  215.8800 sec
Iteration: 15000,  215.9484 sec
Iteration: 16000,  215.8260 sec
Iteration: 17000,  215.8133 sec
Iteration: 18000,  215.9359 sec
Iteration: 19000,  219.6040 sec
Iteration: 20000,  231.6151 sec
Iteration: 21000,  231.4022 sec
Iteration: 22000,  231.7624 sec
Iteration: 23000,  233.5375 sec
Iteration: 24000,  233.7663 sec
Iteration: 25000,  235.1665 sec
Iteration: 26000,  235.1446 sec
Iteration: 27000,  234.6226 sec
Iteration: 28000,  233.1107 sec
Iteration: 29000,  235.2423 sec
Iteration: 30000,  231.1391 sec
Finished!
2

In [None]:
probs = utils.load_pkl('output/analysis/150dim_probs.pkl')
print(probs)