In [33]:
import gym
import gym_mnist
import tensorflow as tf
import logging
from model import EnvModel
from modellearner import ModelLearner
from utils.dataset import iterbatches
import configs
import numpy as np
from utils.latent_visualizer import LatentVisualizer
from utils.getch import getch
import cv2

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

logdir = 'data/fromphaedra/varying_x_scalar/linear_x0/'


config = configs.load_config(logdir)
env = gym.make(config['env'])
tf.reset_default_graph()
ml = ModelLearner(env, config)
envmodel = ml.envmodel
from utils.latest_checkpoint_unsafe import latest_checkpoint
restore_path = latest_checkpoint(logdir)

var_dict = {var.name[:-2]: var for var in tf.global_variables()}

restoring_saver = tf.train.Saver(var_list=var_dict)

[2017-07-30 19:06:50,873] Making new env: mnist-linear-v0


/home/yonadav/Experiments/model-based/model-learning/data/mnist-linear-v0/default/train/run--30/model.ckpt-4993872
model.ckpt-4993872
data/fromphaedra/varying_x_scalar/linear_x0/


In [34]:
num_evaluation_batches = 100
max_validation_steps = 10


with tf.Session() as sess:
    if restore_path is not None:
        logger.info("Restoring variables from checkpoint: {}".format(restore_path))
        restoring_saver.restore(sess, restore_path)
    else:
        logger.info("Initializing brand new network parameters.")
        sess.run(tf.global_variables_initializer())

    ml.gather_gameplay_data(100)
    transition_dataset = ml.create_transition_dataset(max_steps=max_validation_steps, variable_steps=False)
    while transition_dataset[0].shape[0] < envmodel.test_batchsize*num_evaluation_batches:
        ml.gather_gameplay_data(100)
        transition_dataset = ml.create_transition_dataset(max_steps=max_validation_steps, variable_steps=False)
    logger.info("Gameplay data gathered.")
    iterb = iterbatches(transition_dataset, batch_size=envmodel.test_batchsize)
    latent_size = config['latent_size']
    
    states_full = np.zeros([0] + list(transition_dataset[0].shape[1:]))
    latents_full = np.zeros([0, max_validation_steps + 1, latent_size])
    labels_full = np.zeros([0, max_validation_steps + 1, 1])
    truegv_full = np.zeros([0, max_validation_steps])
    goalvalues_full = np.zeros([0, max_validation_steps])
    for _ in range(num_evaluation_batches):
        batch = next(iterb)
        states, actions, _, truegoalvalues, goalstates, _, truelabels = batch
        initial_states = states[:, 0]
        initial_latents = envmodel.encode(initial_states)
        _, future_latents = envmodel.stepforward(latent_state=initial_latents, actions=actions)
        
        # TODO: fix the below code (partially adapted from agents.py) to output goal predictions
        goallatents = envmodel.encode(goalstates)
        goallatents_flattened = np.tile(np.expand_dims(goallatents, axis=1),
                                       [1, max_validation_steps, 1]).reshape([-1, latent_size])
        flattened_latents = future_latents.reshape([-1, latent_size])
        b = envmodel.test_batchsize
        n_values = flattened_latents.shape[0]
        goalvalues = []
        for i in range(n_values//b + 1):
            lower = i*b
            upper = min((i+1)*b, n_values)
            latents_chunk = flattened_latents[lower:upper]
            goalstates_chunk = goallatents_flattened[lower:upper]
            goalvalues_chunk = envmodel.checkgoal(latents_chunk, goalstates_chunk)
            goalvalues.append(goalvalues_chunk)
        goalvalues = np.concatenate(goalvalues, axis=0).reshape([b, max_validation_steps])

        initial_latents = np.expand_dims(initial_latents, axis=1)
        latents = np.concatenate([initial_latents, future_latents], axis=1)
        states_full = np.concatenate([states_full, states], axis=0)
        latents_full = np.concatenate([latents_full, latents], axis=0)
        labels_full = np.concatenate([labels_full, truelabels], axis=0)
        truegv_full = np.concatenate([truegv_full, truegoalvalues], axis=0)
        goalvalues_full = np.concatenate([goalvalues_full, goalvalues], axis=0)

labels_full = np.squeeze(labels_full, axis=-1)
print([a.shape for a in [states_full, latents_full, labels_full, truegv_full, goalvalues_full]])

[2017-07-30 19:07:00,223] Restoring variables from checkpoint: data/fromphaedra/varying_x_scalar/linear_x0/model.ckpt-4993872


INFO:tensorflow:Restoring parameters from data/fromphaedra/varying_x_scalar/linear_x0/model.ckpt-4993872


[2017-07-30 19:07:00,225] Restoring parameters from data/fromphaedra/varying_x_scalar/linear_x0/model.ckpt-4993872
[2017-07-30 19:07:15,508] Gameplay data gathered.


[(3200, 11, 28, 28, 1), (3200, 11, 128), (3200, 11), (3200, 10), (3200, 10)]


In [35]:
from sklearn.cluster import KMeans
from sklearn.metrics import log_loss, normalized_mutual_info_score as nmi_score
kmeans_algorithm = KMeans(n_clusters=10, n_init=100)

clustering_scores = []
goal_scores = []
goal_score = float('nan')
for i in range(max_validation_steps + 1):
    labels = labels_full[:, i]
    latents = latents_full[:, i]
    guessed_labels = kmeans_algorithm.fit_predict(latents)
    clustering_score = nmi_score(labels, guessed_labels)
    clustering_scores.append(clustering_score)
    if i > 0:
        true_goalvalues = truegv_full[:, i-1].astype(bool)
        goalvalues = goalvalues_full[:, i-1]
        goal_score = log_loss(true_goalvalues, goalvalues, labels=[0, 1])
        goal_scores.append(goal_score)
    print("Step {}: V score - {}, goal logloss - {}".format(i, clustering_score, goal_score))
clustering_scores = np.asarray(clustering_scores)
goal_scores = np.asarray(goal_scores)

Step 0: V score - 0.874744047626659, goal logloss - nan
Step 1: V score - 0.6102813838595398, goal logloss - 0.0028595939096832824
Step 2: V score - 0.5060910091403945, goal logloss - 0.00018927027911484726
Step 3: V score - 0.47049308133867684, goal logloss - 0.0041822984053846955
Step 4: V score - 0.41485322946089365, goal logloss - 0.0015843379948832852
Step 5: V score - 0.47119871898678417, goal logloss - 0.0020196079486787216
Step 6: V score - 0.4491585664434523, goal logloss - 0.0007438485618568385
Step 7: V score - 0.49279644260531213, goal logloss - 0.000206371428040551
Step 8: V score - 0.48577763933199264, goal logloss - 0.0017225612396981815
Step 9: V score - 0.5156394030200999, goal logloss - 0.004817780503407745
Step 10: V score - 0.5210340412096572, goal logloss - 0.010571734159334528
