In [3]:
import os
from datetime import datetime
from minerl.data import BufferedBatchIter


import minerl
import gym
import numpy as np
import tqdm
from kmodes.kprototypes import KPrototypes

import numpy as np
from numpy import array, float32
import pandas as pd

from pathlib import Path

NULL_ACTION = {
    'attack': 0,
    'back': 0,
    'camera0': 0.0,
    'camera1': 0.0,
    'craft': 'none',
    'equip': 'none',
    'forward': 0,
    'jump': 0,
    'left': 0,
    'nearbyCraft': 'none',
    'nearbySmelt': 'none',
    'place': 'none',
    'right': 0,
    'sneak': 0,
    'sprint': 0}

In [24]:
from sklearn.cluster import KMeans

- treat binary vars as categorical
- extract more actions
- let it search more actions

- check if it works in env..
- figure out a way to save the extracted actions..

- convert to actions_extractor
- check enums

In [9]:
# Hyperparameters
NUM_CLUSTERS = 12 # Number of Macro Actions we want to extract

NUM_EPOCHS = 2
BATCH_SIZE = 10
MAX_ACTIONS = 100

ENVIRONMENT = 'MineRLObtainDiamond-v0'

BIN_AS_CAT = False
BIN_PROB_THRESHOLD = 0.2

In [7]:
# Initial setup
data_path = str(Path().absolute().parent.parent.joinpath('data'))

if not os.path.exists(data_path):
    os.mkdir(data_path)

os.environ['MINERL_DATA_ROOT'] = data_path # Important

# Downloading environment data if it doesn't exist
env_data_path = os.path.join(data_path, ENVIRONMENT)

if not os.path.exists(env_data_path):
    # os.mkdir(f'data/{ENVIRONMENT}')
    minerl.data.download(environment = ENVIRONMENT)

In [14]:
def decode_actions(obj) -> list:
    """Decodes the batch of actions into a list of actions sutiable to fit into a dataframe.
    Important for kmodes/kmeans
    """
    actions = []

    for i in range(BATCH_SIZE):
        proc = NULL_ACTION.copy()
        for k in obj.keys():
            if k == "camera":
                for i, dim in enumerate(obj[k][i]):
                    proc[f"{k}{i}"] = dim
            else:
                proc[k] =  obj[k][i]
        actions.append(proc)

    return actions

def encode_action(obj):
    """ Encodes the action dict into a format acceptable by minerl
    """
    proc = {}

    for k, v in obj.items():
        if 'camera' not in k:
            try:
                proc[k] = array(int(float(v) > BIN_PROB_THRESHOLD)) if not BIN_AS_CAT else array(int(float(v)))
            except:
                proc[k] = v
    
    proc['camera'] = array([obj.get('camera0'), obj.get('camera1')], dtype=float32)
    return proc

In [15]:
data = minerl.data.make(ENVIRONMENT)
iterator = BufferedBatchIter(data)
i = 0
collected_actions = []
raw_actions = []
for current_state, action, reward, next_state, done in iterator.buffered_batch_iter(batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS):
    collected_actions += decode_actions(action)
    raw_actions.append(action)
    
    i += 1
    if i == MAX_ACTIONS // BATCH_SIZE:
        break

df = pd.DataFrame(collected_actions)

100%|██████████| 27890/27890 [00:01<00:00, 23378.79it/s]


In [16]:
df

Unnamed: 0,attack,back,camera0,camera1,craft,equip,forward,jump,left,nearbyCraft,nearbySmelt,place,right,sneak,sprint
0,0,0,0.00000,0.000000,none,none,0,0,0,none,none,none,0,0,0
1,0,0,0.00000,0.000000,none,none,0,0,0,none,none,none,0,0,0
2,0,0,-0.00000,-5.081573,none,none,0,0,0,none,none,none,0,0,0
3,1,0,0.00000,0.000000,none,none,0,0,0,none,none,none,0,0,0
4,1,0,0.00000,0.000000,none,none,0,0,0,none,none,none,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1,0,-9.22017,16.449615,none,none,0,0,0,none,none,none,0,0,0
96,0,0,0.00000,0.000000,none,none,0,0,0,none,none,none,0,0,0
97,0,0,0.00000,0.000000,none,none,0,0,0,none,none,none,0,0,0
98,1,0,0.00000,0.000000,none,none,0,0,0,none,none,none,0,0,0


In [19]:
df = df.drop(['craft', 'equip', 'nearbyCraft', 'nearbySmelt', 'place'], axis=1)

In [20]:
BIN_AS_CAT

False

In [21]:
df

Unnamed: 0,attack,back,camera0,camera1,forward,jump,left,right,sneak,sprint
0,0,0,0.00000,0.000000,0,0,0,0,0,0
1,0,0,0.00000,0.000000,0,0,0,0,0,0
2,0,0,-0.00000,-5.081573,0,0,0,0,0,0
3,1,0,0.00000,0.000000,0,0,0,0,0,0
4,1,0,0.00000,0.000000,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
95,1,0,-9.22017,16.449615,0,0,0,0,0,0
96,0,0,0.00000,0.000000,0,0,0,0,0,0
97,0,0,0.00000,0.000000,0,0,0,0,0,0
98,1,0,0.00000,0.000000,0,0,0,0,0,0


In [23]:
mark_array = df.values
categorical_features_idx = [col for col in df.columns if 'camera' not in col] if BIN_AS_CAT else [4, 5, 9, 10, 11]

# log("Running KPrototypes...")
kproto = KPrototypes(n_clusters=NUM_CLUSTERS, max_iter=200).fit(mark_array, categorical=[])

NotImplementedError: No categorical data selected, effectively doing k-means. Present a list of categorical columns, or use scikit-learn's KMeans instead.

In [25]:
kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=0).fit(df.values)

In [28]:
kmeans.cluster_centers_

array([[ 1.00000000e+00,  0.00000000e+00, -4.81963348e+00,
         2.35742188e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [ 1.00000000e+00, -3.46944695e-18,  2.44473537e-02,
        -4.19054667e-02, -1.66533454e-16,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [ 1.00000000e+00,  0.00000000e+00,  1.04774475e-01,
        -1.58209839e+01,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [ 1.00000000e+00,  0.00000000e+00, -9.22017002e+00,
         1.64496155e+01,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [ 1.00000000e+00,  0.00000000e+00,  1.72877884e+00,
         6.65332031e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.

In [32]:
actions_list = ['attack', 'back', 'camera0', 'camera1', 
    'forward', 'jump', 'left',  'right', 'sneak','sprint']

extracted_actions = []
for cluster in kmeans.cluster_centers_:
    action = NULL_ACTION.copy()
    action.update({actions_list[i]: cluster[i] for i in range(len(cluster))})
    extracted_actions.append(encode_action(action))

In [30]:
extracted_actions

[{'attack': array(1),
  'back': array(0),
  'forward': array(0),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(0),
  'sprint': array(0),
  'camera': array([-4.8196335,  2.3574219], dtype=float32)},
 {'attack': array(1),
  'back': array(0),
  'forward': array(0),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(0),
  'sprint': array(0),
  'camera': array([ 0.02444735, -0.04190547], dtype=float32)},
 {'attack': array(1),
  'back': array(0),
  'forward': array(0),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(0),
  'sprint': array(0),
  'camera': array([  0.10477448, -15.820984  ], dtype=float32)},
 {'attack': array(1),
  'back': array(0),
  'forward': array(0),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(0),
  'sprint': array(0),
  'camera': array([-9.22017 , 16.449615], dtype=float32)},
 {'attack': array(1),
  'back': array(0),
  'forward': array(0),
  'jump': a

In [None]:
mark_array = df.values
categorical_features_idx = [col for col in df.columns if 'camera' not in col] if BIN_AS_CAT else [4, 5, 9, 10, 11]

log("Running KPrototypes...")
kproto = KPrototypes(n_clusters=NUM_CLUSTERS, max_iter=200).fit(mark_array, categorical=[])

actions_list = ['attack', 'back', 'camera0', 'camera1', 
    'forward', 'jump', 'left',  'right', 'sneak','sprint', 
    'craft', 'equip', 'nearbyCraft', 'nearbySmelt', 'place']

extracted_actions = []
for cluster in kproto.cluster_centroids_:
    extracted_actions.append(encode_action({actions_list[i]: cluster[i] for i in range(len(cluster))}))

log("Extracted Actions", level="SUCCESS")

In [31]:
df.loc[0].to_dict()

NULL_ACTION = {
    'attack': 0.0,
    'back': 0.0,
    'camera0': 0.0,
    'camera1': 0.0,
    'forward': 0.0,
    'jump': 0.0,
    'left': 0.0,
    'right': 0.0,
    'sneak': 0.0,
    'sprint': 0.0,
    'craft': "None", 
    'equip': "None", 
    'nearbyCraft': "None", 
    'nearbySmelt': "None", 
    'place': "None"}
    

In [62]:
mark_array=df.values

In [64]:
categorical_features_idx = [col for col in df.columns if 'camera' not in col] if BIN_AS_CAT else [4, 5, 9, 10, 11]
categorical_features_idx

[4, 5, 9, 10, 11]

In [65]:
kproto = KPrototypes(n_clusters=NUM_CLUSTERS, max_iter=200).fit(mark_array, categorical=categorical_features_idx)

In [10]:
kproto.cluster_centroids_

NameError: name 'kproto' is not defined

In [20]:
actions_list = ['attack', 'back', 'camera0', 'camera1', 
    'forward', 'jump', 'left',  'right', 'sneak','sprint', 
    'craft', 'equip', 'nearbyCraft', 'nearbySmelt', 'place']

In [101]:
extracted_actions = []
for cluster in kproto.cluster_centroids_:
    extracted_actions.append(encode_action({actions_list[i]: cluster[i] for i in range(len(cluster))}))

In [102]:
extracted_actions

[{'attack': array(1),
  'back': array(0),
  'forward': array(1),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(1),
  'sprint': array(0),
  'craft': 'none',
  'equip': 'none',
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  'place': 'none',
  'camera': array([-0.95094347, 13.958498  ], dtype=float32)},
 {'attack': array(1),
  'back': array(0),
  'forward': array(1),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(1),
  'sprint': array(0),
  'craft': 'none',
  'equip': 'none',
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  'place': 'none',
  'camera': array([-0.12000012, -5.0088477 ], dtype=float32)},
 {'attack': array(1),
  'back': array(0),
  'forward': array(1),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(1),
  'sprint': array(0),
  'craft': 'none',
  'equip': 'none',
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  'place': 'none',
  'camera': array([4.6296031e-03, 5.3157415e+0

### Test on ENV

In [40]:
import pickle

with open("all-actions.pickle", 'wb') as f:
    pickle.dump(extracted_actions, f)


In [46]:
with open("all-actions.pickle", 'rb') as f:
    d = pickle.load(f)

In [51]:
d

[{'attack': array(1),
  'back': array(0),
  'forward': array(0),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(0),
  'sprint': array(0),
  'craft': 'none',
  'equip': 'none',
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  'place': 'none',
  'camera': array([0.02658768, 0.02658768], dtype=float32)},
 {'attack': array(0),
  'back': array(0),
  'forward': array(0),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(0),
  'sprint': array(0),
  'craft': 'none',
  'equip': 'none',
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  'place': 'none',
  'camera': array([-0.8462825, -0.8462825], dtype=float32)},
 {'attack': array(0),
  'back': array(0),
  'forward': array(0),
  'jump': array(0),
  'left': array(0),
  'right': array(0),
  'sneak': array(0),
  'sprint': array(0),
  'craft': 'none',
  'equip': 'none',
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  'place': 'none',
  'camera': array([2.4302227, 2.4302227], dtype=fl

In [52]:
len(extracted_actions)

6

In [59]:
import pickle
StringBuilder = lambda ENV_STRING: (f'MineRL{ENV_STRING}-v0', f'{str(Path().absolute())}/actions-{ENV_STRING}.pickle')
action_strings = ["Treechop", "Navigate", "ObtainIronPickaxe", "ObtainDiamond"]
print(f"Merging actions from the following files: {'.pickle, '.join(action_strings)}")
actions = {}
for action_string in action_strings:
    _, save_path = StringBuilder(action_string)
    
    with open(save_path, 'rb') as f:
        action_set = pickle.load(f)
        actions[action_string] = action_set

    # actions.extend(action_set)

Merging actions from the following files: Treechop.pickle, Navigate.pickle, ObtainIronPickaxe.pickle, ObtainDiamond


In [61]:
actions.keys()


dict_keys(['Treechop', 'Navigate', 'ObtainIronPickaxe', 'ObtainDiamond'])

In [63]:
len(actions['Navigate'])

12

In [64]:
len(actions['ObtainIronPickaxe'])

1