### Action Exploration Notebook

In [1]:
import os
from datetime import datetime
from pathlib import Path
import pickle

import gym
import minerl
import numpy as np
import pandas as pd
import tqdm
from kmodes.kprototypes import KPrototypes
from sklearn.cluster import KMeans
from minerl.data import BufferedBatchIter
from numpy import array, float32



In [23]:
# Hyperparameters
RANDOM_STATE = 123
NUM_CLUSTERS = 12 # Number of Macro Actions we want to extract

NUM_EPOCHS = 2
BATCH_SIZE = 10
MAX_ACTIONS = 20000

BIN_AS_CAT = False
BIN_PROB_THRESHOLD = 0.2

NULL_ACTION = {
    'attack': 0,
    'back': 0,
    'camera0': 0.0,
    'camera1': 0.0,
    'craft': 'none',
    'equip': 'none',
    'forward': 0,
    'jump': 0,
    'left': 0,
    'nearbyCraft': 'none',
    'nearbySmelt': 'none',
    'place': 'none',
    'right': 0,
    'sneak': 0,
    'sprint': 0
}

# Paths
ROOT_PATH = Path(os.getcwd()).absolute().parent.parent
SRC_PATH = ROOT_PATH.joinpath('src')
DATA_PATH = ROOT_PATH.joinpath('data')
ACTIONS_PATH = SRC_PATH.joinpath('actions')

In [31]:
# Util functions
StringBuilder = lambda ENV_STRING: (f'MineRL{ENV_STRING}-v0', str(ACTIONS_PATH.joinpath(f'actions-{ENV_STRING}.pickle')))

def log(msg, level="INFO"):
    format_dict = {
        "SUCCESS": "\U00002705 SUCCESS",
        "ERROR": "\U0000274C ERROR"
    }

    print(f"{datetime.now()} | {format_dict.get(level, level)} | {msg}")

def check_download(environment):
    # Downloading environment data if it doesn't exist
    env_data_path = os.path.join(os.environ['MINERL_DATA_ROOT'], environment)
    if not os.path.exists(env_data_path):
        log(f"Downloading {environment} data...")
        os.mkdir(env_data_path)
        minerl.data.download(environment = environment)
        log(f"Downloaded", level="SUCCESS")

def decode_actions(obj) -> list:
    """Decodes the batch of actions into a list of actions sutiable to fit into a dataframe.
    Important for kmodes/kmeans
    """
    actions = []

    for i in range(BATCH_SIZE):
        proc = NULL_ACTION.copy()
        for k in obj.keys():
            if k == "camera":
                for i, dim in enumerate(obj[k][i]):
                    proc[f"{k}{i}"] = dim
            else:
                proc[k] =  obj[k][i]
        actions.append(proc)

    return actions

def encode_action(obj):
    """ Encodes the action dict into a format acceptable by minerl
    """
    proc = {}

    for k, v in obj.items():
        if 'camera' not in k:
            try:
                proc[k] = array(int(float(v) > BIN_PROB_THRESHOLD)) if not BIN_AS_CAT else array(int(float(v)))
            except:
                proc[k] = v
    
    proc['camera'] = array([obj.get('camera0'), obj.get('camera1')], dtype=float32)
    return proc

## Setup & Choose ENV

In [26]:
# Initial setup
data_path = str(DATA_PATH)

if not os.path.exists(data_path):
    os.mkdir(data_path)

os.environ['MINERL_DATA_ROOT'] = data_path # Important

In [45]:
envs = ["Treechop", "Navigate", "ObtainIronPickaxe", "ObtainDiamond"]
ENVIRONMENT = "ObtainDiamond"

### Check Download

In [46]:
log(f"BEGIN ACTIONS EXTRACTION, ENV: {ENVIRONMENT}")
environment, save_path = StringBuilder(ENVIRONMENT)
check_download(environment)

2022-08-31 21:06:08.094214 | INFO | BEGIN ACTIONS EXTRACTION, ENV: ObtainDiamond


### Extract Human data

In [47]:
data = minerl.data.make(environment = environment)
iterator = BufferedBatchIter(data)
i = 0
collected_actions = []
log("Collecting actions...")
for current_state, action, reward, next_state, done in iterator.buffered_batch_iter(batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS):
    collected_actions += decode_actions(action)
    
    i += 1
    if i == MAX_ACTIONS // BATCH_SIZE:
        break

df = pd.DataFrame(collected_actions)

2022-08-31 21:06:10.013701 | INFO | Collecting actions...


100%|██████████| 8580/8580 [00:00<00:00, 35010.51it/s]
100%|██████████| 13702/13702 [00:00<00:00, 23815.31it/s]
100%|██████████| 33892/33892 [00:01<00:00, 28413.51it/s]


In [39]:
# NAVIGATE
df['place'].value_counts() 

none    19970
dirt       30
Name: place, dtype: int64

In [44]:
# ObtainIronPickaxe
df['place'].value_counts() 

none       19980
furnace       10
torch         10
Name: place, dtype: int64

In [48]:
# ObtainDiamond
df['place'].value_counts() 

none              19670
stone               210
cobblestone          70
torch                30
dirt                 10
crafting_table       10
Name: place, dtype: int64

In [49]:
df.drop(['craft', 'equip', 'nearbyCraft', 'nearbySmelt'], axis = 1, inplace=True)

In [57]:
df['place'] = df.apply(lambda row: int(row['place'] in ['cobblestone', 'stone', 'dirt']), axis=1)

In [65]:
kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=RANDOM_STATE).fit(df.values)

extracted_actions = []
for cluster in kmeans.cluster_centers_:
    action = NULL_ACTION.copy()
    action.update({df.columns[i]: cluster[i] for i in range(len(cluster))})
    extracted_actions.append(action)

log("Extracted Actions", level="SUCCESS")

2022-08-31 21:11:32.925052 | ✅ SUCCESS | Extracted Actions


In [69]:
[round(i['place'], 4) for i in extracted_actions]

[0.015,
 0.0,
 0.0,
 -0.0,
 0.0606,
 0.0085,
 0.0078,
 0.0278,
 0.006,
 0.016,
 0.007,
 0.0121]