In [0]:
# Set the environment 1
!pip install gensim 

# Set the environment 2
!pip install tensorflow==1.0.1

In [0]:
# Clone RNN resources 
!git clone  https://github.com/weirenorweiren/CS230.git

# Change the working directory to e-t; not to comment directly after '%cd xxx' which could cause error
%cd CS230

# Make a directory called result in the current working directory
!mkdir result

In [0]:
import tensorflow as tf
import numpy as np
import pprint

from time import gmtime, strftime
from dataset_ad import get_data, experiment, get_char2vec
from model_ad import RNN

def sample_parameters(params):
    combination = [
            params['dim_hidden'],
            params['dim_rnn_cell'],
            params['learning_rate'],
            params['lstm_dropout'],
            params['lstm_layer'],
            params['hidden_dropout'],
            params['dim_embed_unigram'],
            params['dim_embed_bigram'],
            params['dim_embed_trigram'],

            params['dim_embed_fourgram']
    ]

    if not params['default_params']: # If not set too use default params
        combination[0] = params['dim_hidden'] = int(np.random.uniform(
                params['dim_hidden_min'],
                params['dim_hidden_max']) // 50) * 50 
        combination[1] = params['dim_rnn_cell'] = int(np.random.uniform(
                params['dim_rnn_cell_min'],
                params['dim_rnn_cell_max']) // 50) * 50
        combination[2] = params['learning_rate'] = float('{0:.5f}'.format(np.random.uniform( # We could improve with log sampling
                params['learning_rate_min'],
                params['learning_rate_max'])))
        combination[3] = params['lstm_dropout'] = float('{0:.5f}'.format(np.random.uniform( # 5 after the decimal point
                params['lstm_dropout_min'],
                params['lstm_dropout_max'])))
        combination[4] = params['lstm_layer'] = int(np.random.uniform(
                params['lstm_layer_min'],
                params['lstm_layer_max']))
        combination[5] = params['hidden_dropout'] = float('{0:.5f}'.format(np.random.uniform(
                params['hidden_dropout_min'],
                params['hidden_dropout_max'])))
        combination[6] = params['dim_embed_unigram'] = int(np.random.uniform(
                params['dim_embed_unigram_min'],
                params['dim_embed_unigram_max']) // 10) * 10
        combination[7] = params['dim_embed_bigram'] = int(np.random.uniform(
                params['dim_embed_bigram_min'],
                params['dim_embed_bigram_max']) // 10) * 10
        combination[8] = params['dim_embed_trigram'] = int(np.random.uniform(
                params['dim_embed_trigram_min'],
                params['dim_embed_trigram_max']) // 10) * 10

        combination[9] = params['dim_embed_fourgram'] = int(np.random.uniform(
                params['dim_embed_fourgram_min'],
                params['dim_embed_fourgram_max']) // 10) * 10

    return params, combination

# !!! Always to update the parameters here !!!
saved_params = { # Manually input the flags above so that we could interact with the variables below
    'train_epoch' : 3000,
    "dim_unigram" : 82,
    "dim_bigram" : 1876,
    "dim_trigram" : 14767,

    "dim_fourgram" : 50596,

    "dim_output" : 127,
    "max_time_step" : 50,
    "min_grad" : -5,
    "max_grad" : 5,
    "batch_size" : 1000,

    "ngram" : 4,
    
    "decay_rate" : 0.99,
    "decay_step" : 100,

    "valid_iteration" : 250,

    "dim_rnn_cell" : 200,
    "dim_rnn_cell_min" : 200,
    "dim_rnn_cell_max" : 399,

    "dim_hidden" : 200,
    "dim_hidden_min" : 200,
    "dim_hidden_max" : 399,

    "dim_embed_unigram" : 30,
    "dim_embed_unigram_min" : 10,
    "dim_embed_unigram_max" : 100,

    "dim_embed_bigram" : 100,
    "dim_embed_bigram_min" : 30,
    "dim_embed_bigram_max" : 200,

    "dim_embed_trigram" : 130,
    "dim_embed_trigram_min" : 30,
    "dim_embed_trigram_max" : 320,

    "dim_embed_fourgram" : 200,
    "dim_embed_fourgram_min" : 30,
    "dim_embed_fourgram_max" : 320,

    "lstm_layer" : 1,
    "lstm_layer_min" : 1,
    "lstm_layer_max" : 1,

    "lstm_dropout" : 0.5,
    "lstm_dropout_min" : 0.3,
    "lstm_dropout_max" : 0.8,

    "hidden_dropout" : 0.5,
    "hidden_dropout_min" : 0.3,
    "hidden_dropout_max" : 0.8,

    "learning_rate" : 0.0035,
    "learning_rate_min" : 5e-3,
    "learning_rate_max" : 5e-2,

    "default_params" : True,
    "ensemble" : True,

    "embed" : True,
    "embed_trainable" : True,
    
    "ethnicity" : False,
    "is_train" : True,
    "is_valid" : True,
    "continue_train" : False,
    "save" : False,
    "model_name" : "default",
    "checkpoint_dir" : "./checkpoint/",

    "data_dir" : "./data/ad",
    
    "valid_result_path" : "/content/CS230/result/validation.txt",
    "pred_result_path" : "/content/CS230/result/pred.txt",
    "detail_result_path" : "/content/CS230/result/detail.txt"
}

In [6]:
if saved_params['ensemble']: # uni + bi + tri +four
    model_name = 'ensemble'
elif saved_params['ngram'] == 1:
    model_name = 'unigram'
elif saved_params['ngram'] == 2:
    model_name = 'bigram'
elif saved_params['ngram'] == 3:
    model_name = 'trigram'

elif saved_params['ngram'] == 4:
    model_name = 'fourgram'

else:
    assert False, 'Not supported ngram %d'% saved_params['ngram'] # ** Origin value is True
model_name += '_embedding' if saved_params['embed'] else '_no_embedding' 
saved_params['model_name'] = '%s' % model_name
saved_params['checkpoint_dir'] += model_name
pprint.PrettyPrinter().pprint(saved_params)
saved_dataset = get_data(saved_params) # Input the passing parameters; Return train_set, valid_set, test_set, dictionary == [idx2unigram, unigram2idx, idx2country, country2ethnicity, idx2bigram, idx2trigram]

validation_writer = open(saved_params['valid_result_path'], 'a') # Open a file and add from the last ending; write in a new file if not existing
validation_writer.write(model_name + "\n")
validation_writer.write("[dim_hidden, dim_rnn_cell, learning_rate, lstm_dropout, lstm_layer, hidden_dropout, dim_embed]\n")
validation_writer.write("combination\ttop1\ttop5\tepoch\n") # \t => tab

{'batch_size': 1000,
 'checkpoint_dir': './checkpoint/ensemble_embedding',
 'continue_train': False,
 'data_dir': './data/ad',
 'decay_rate': 0.99,
 'decay_step': 100,
 'default_params': True,
 'detail_result_path': '/content/CS230/result/detail.txt',
 'dim_bigram': 1876,
 'dim_embed_bigram': 100,
 'dim_embed_bigram_max': 200,
 'dim_embed_bigram_min': 30,
 'dim_embed_fourgram': 200,
 'dim_embed_fourgram_max': 320,
 'dim_embed_fourgram_min': 30,
 'dim_embed_trigram': 130,
 'dim_embed_trigram_max': 320,
 'dim_embed_trigram_min': 30,
 'dim_embed_unigram': 30,
 'dim_embed_unigram_max': 100,
 'dim_embed_unigram_min': 10,
 'dim_fourgram': 50596,
 'dim_hidden': 200,
 'dim_hidden_max': 399,
 'dim_hidden_min': 200,
 'dim_output': 127,
 'dim_rnn_cell': 200,
 'dim_rnn_cell_max': 399,
 'dim_rnn_cell_min': 200,
 'dim_trigram': 14767,
 'dim_unigram': 82,
 'embed': True,
 'embed_trainable': True,
 'ensemble': True,
 'ethnicity': False,
 'hidden_dropout': 0.5,
 'hidden_dropout_max': 0.8,
 'hidden_drop

28

In [0]:
# Run the model
for _ in range(saved_params['valid_iteration']): # ??? != valid_epoch
    # Sample parameter sets
    params, combination = sample_parameters(saved_params.copy()) # If not default parameters, then update with initialization; return input dictionary and a combination LIST
    dataset = saved_dataset[:] # Copy the content into dataset; if not, we would link the two variable that can be a problem
    
    # Initialize embeddings
    uni_init = get_char2vec(dataset[0][0][:], params['dim_embed_unigram'], dataset[3][0]) # Return initializer
    bi_init = get_char2vec(dataset[0][1][:], params['dim_embed_bigram'], dataset[3][4]) # The first [] is the outermost dimension == train_set or dictionary; [3][i] gives the outermost dimension in dictionary
    tri_init = get_char2vec(dataset[0][2][:], params['dim_embed_trigram'], dataset[3][5]) # Easy to understand with get_data()

    four_init = get_char2vec(dataset[0][3][:], params['dim_embed_fourgram'], dataset[3][6])
    
    print(model_name, 'Parameter sets: ', end='')
    pprint.PrettyPrinter().pprint(combination)
    
    rnn_model = RNN(params, [uni_init, bi_init, tri_init, four_init])
    top1, top5, ep = experiment(rnn_model, dataset, params) # With train_iterations; return max_top1, max_top5, max_top1_epoch
    
    validation_writer.write(str(combination) + '\t')
    validation_writer.write(str(top1) + '\t' + str(top5) + '\tEp:' + str(ep) + '\n')

validation_writer.close()

ensemble_embedding Parameter sets: [200, 200, 0.0035, 0.5, 1, 0.5, 30, 100, 130, 200]
## Building an RNN model
Tensor("Unigram/Unigram/embedding_lookup:0", shape=(?, 50, 30), dtype=float32)
Tensor("Bigram/Bigram/embedding_lookup:0", shape=(?, 50, 100), dtype=float32)
Tensor("Trigram/Trigram/embedding_lookup:0", shape=(?, 50, 130), dtype=float32)
Tensor("Fourgram/Fourgram/embedding_lookup:0", shape=(?, 50, 200), dtype=float32)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


model variables ['Unigram/embed:0', 'Unigram/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/weights:0', 'Unigram/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/biases:0', 'Bigram/embed:0', 'Bigram/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/weights:0', 'Bigram/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/biases:0', 'Trigram/embed:0', 'Trigram/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/weights:0', 'Trigram/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/biases:0', 'Fourgram/embed:0', 'Fourgram/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/weights:0', 'Fourgram/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/biases:0', 'Hidden1/Weights:0', 'Hidden1/Biases:0', 'Output/Weights:0', 'Output/Biases:0']
## Training
Percent: [####################] 100.00% Finished. tr loss: 12.015, acc1: 0.017, acc5: 0.077
Training loss: 60.242, acc1: 0.019, acc5: 0.067, ep: 0

Validation loss: 4.782, acc1: 0.025, acc5: 0.218, ep: 0
Testing loss: 4.792, acc1: 0.026, acc5: 0.217

Percent: [####################] 100.00% Finished. tr loss: 6.287