# K-Means Clustering for Macro-Actions

### Imports

In [1]:
import os
from datetime import datetime

import minerl
import numpy as np
import tqdm
from minerl.data import BufferedBatchIter
from sklearn.cluster import KMeans




### Creating Data Dir (Local)

In [2]:
data_path = os.path.join(os.getcwd(), "data")

if not os.path.exists(data_path):
    os.mkdir(data_path)

os.environ['MINERL_DATA_ROOT'] = data_path # Important

### Globals

In [46]:
OBF_ENVS = ['MineRLTreechopVectorObf-v0', "MineRLObtainDiamondVectorObf-v0"] # Options for user
ENVIRONMENT = 'MineRLTreechopVectorObf-v0'

NUM_CLUSTERS = 32 # Number of Macro Actions we want to extract
CHAIN_LEN = 3 

NUM_BATCHES = 1000
MAX_ACTIONS = 100000
NUM_EPOCHS = 2
BATCH_SIZE = 16
ACTION_SIZE = 64


#### Data Download

In [4]:
# Downloading environment data if not exists
env_data_path = os.path.join(data_path, ENVIRONMENT)
if not os.path.exists(env_data_path):
    minerl.data.download(data_path, environment = ENVIRONMENT) # Careful

### Main
Samples the dataset storing `NUM_BATCHES` batches of actions. Then performs KMeans clustering to 
find `NUM_CLUSTERS` macro actions that represent reasonable actions for our agent to take. 

In [34]:
data = minerl.data.make(environment = ENVIRONMENT)

# Load the dataset storing NUM_BATCHES batches of actions
act_vectors = []
for _, act, _, _,_ in tqdm.tqdm(data.batch_iter(batch_size=NUM_EPOCHS, seq_len=CHAIN_LEN, num_epochs=BATCH_SIZE, preload_buffer_size=20)):
    act_vectors.append(act['vector'])
    if len(act_vectors) > NUM_BATCHES:
        break # Are we biased to the start of the actions?

# Reshape these the action batches
acts = np.concatenate(act_vectors).reshape(-1, ACTION_SIZE*CHAIN_LEN) 
kmeans_acts = acts[:MAX_ACTIONS]

# Use sklearn to cluster the demonstrated actions
kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=0).fit(kmeans_acts)

1000it [00:10, 92.01it/s]


(16016, 3, 64)


In [53]:
# Resultant array of n actions
kmeans.cluster_centers_.shape

(32, 192)

In [44]:
kmeans.cluster_centers_[0]

array([-0.0860018 ,  0.45094037, -0.02766714,  0.2022553 , -0.05636381,
        0.08664027, -0.0216641 ,  0.25046355, -0.31427099, -0.48654878,
        0.03762846, -0.13627186,  0.16483366,  0.2784405 , -0.40745804,
       -0.00987099,  0.48126181,  0.45915297,  0.27300217, -0.02293924,
        0.74626644,  0.258351  , -0.32040715,  0.39957848,  0.11912912,
       -0.08074037, -0.18782766, -0.27440799, -0.42946329, -0.21008807,
        0.47765087, -0.1569465 , -0.72423577,  0.15656008, -0.68440162,
        0.32464427, -0.45258758,  0.1370266 ,  0.53154008,  0.38975307,
       -0.34899373,  0.61454829,  0.11235921,  0.06149278, -0.05278376,
        0.25163075,  0.47902586,  0.2747226 ,  0.56704168,  0.42775839,
        0.01452625,  0.42158084, -0.10199873, -0.38892256,  0.18138329,
       -0.43968809,  0.54726122, -0.4010378 ,  0.0867233 , -0.08961524,
        0.13705646, -0.59424892, -0.06680553, -0.06700752, -0.08606983,
        0.450948  , -0.02775814,  0.20231551, -0.05656565,  0.08

In [None]:
# Sampling a random action from our n actions
# kmeans.cluster_centers_[np.random.choice(NUM_CLUSTERS)]

# Save action set
date_suffix = datetime.now().strftime('%m%d%M')
filename = f"data/action_sets/action_set_{ENVIRONMENT}_{NUM_CLUSTERS}_{date_suffix}.npy"
np.save(filename, kmeans.cluster_centers_)

# Load action set
# np.load(filename, kmeans.cluster_centers_)
