# ACTION EXTRACTOR DESIGN

Approach: Split to general samples and functional sampling

**1. General Sampling**
- Concat all environments
- Sample less actions
- Lower the Probability Threshold
- Introduce the opposites code

**2. Functional Sampling**
- Use only pickaxe and diamond environment
- Change iterator to fill up batch with categorical actions
- Sample one for each possibility for each category

**2.5. Camera Attack Sampling**
- We want an extensive number of camera angles for hitting
- Hardcode all possible camera angles, discretise with increment
- Sample angles close to center of the screen

**3. Distance Evaluation**
- Measure Distances between general actions

**Target State**
- Minimise the total action space whilst maintaing maximum distance between actions
- Create Richer Functional Actions

**Evaluation**
- Run Jade's Evaluation
- Run train.py

In [2]:
# Hyperparameters
RANDOM_STATE = 123

NUM_EPOCHS = 2
BATCH_SIZE = 10

BIN_PROB_THRESHOLD = 0.05

In [5]:
import os
from datetime import datetime
from pathlib import Path
import pickle

import gym
import minerl
import numpy as np
import pandas as pd
import tqdm
from kmodes.kprototypes import KPrototypes
from sklearn.cluster import KMeans
from minerl.data import BufferedBatchIter
from numpy import array, float32

__file__ = os.getcwd()

NULL_ACTION = {
    'attack': 0,
    'back': 0,
    'camera0': 0.0,
    'camera1': 0.0,
    'craft': 'none',
    'equip': 'none',
    'forward': 0,
    'jump': 0,
    'left': 0,
    'nearbyCraft': 'none',
    'nearbySmelt': 'none',
    'place': 'none',
    'right': 0,
    'sneak': 0,
    'sprint': 0
}

# Paths
ROOT_PATH = Path(__file__).absolute().parent.parent.parent
SRC_PATH = ROOT_PATH.joinpath('src')
DATA_PATH = ROOT_PATH.joinpath('data')
ACTIONS_PATH = SRC_PATH.joinpath('actions')

# Util functions

get_env_name = lambda env_str: f'MineRL{env_str}-v0'
StringBuilder = lambda ENV_STRING: (f'MineRL{ENV_STRING}-v0', str(ACTIONS_PATH.joinpath(f'actions-{ENV_STRING}-05.pickle')))

# Initial setup
data_path = str(DATA_PATH)

if not os.path.exists(data_path):
    os.mkdir(data_path)

os.environ['MINERL_DATA_ROOT'] = data_path # Important


In [70]:
def log(msg, level="INFO"):
    format_dict = {
        "SUCCESS": "\U00002705 SUCCESS",
        "ERROR": "\U0000274C ERROR"
    }

    print(f"{datetime.now()} | {format_dict.get(level, level)} | {msg}")
    
def check_download(env):
    environment = get_env_name(env)
    # Downloading environment data if it doesn't exist
    env_data_path = os.path.join(os.environ['MINERL_DATA_ROOT'], environment)
    if not os.path.exists(env_data_path):
        log(f"Downloading {environment} data...")
        os.mkdir(env_data_path)
        minerl.data.download(environment = environment)
        log(f"Downloaded", level="SUCCESS")
    else:
        log(f"{environment} Exists")

def decode_batch(obj) -> list:
    """Decodes the batch of actions into a list of actions sutiable to fit into a dataframe.
    Important for kmodes/kmeans
    """
    actions = []

    for i in range(BATCH_SIZE):
        proc = NULL_ACTION.copy()
        for k in obj.keys():
            if k == "camera":
                for i, dim in enumerate(obj[k][i]):
                    proc[f"{k}{i}"] = dim
            else:
                proc[k] =  obj[k][i]
        actions.append(proc)

    return actions

def encode_action(obj, bin_prob_threshold = BIN_PROB_THRESHOLD):
    """ Encodes the action dict into a format acceptable by minerl
    """
    proc = {}

    for k, v in obj.items():
        if 'camera' not in k:
            try:
                proc[k] = array(int(float(v) > bin_prob_threshold))
            except:
                proc[k] = v
    
    proc['camera'] = array([obj.get('camera0'), obj.get('camera1')], dtype=float32)
    return proc

def decode_action(obj: dict, camera_shrink_factor=1) -> list:
    """Decodes an action to fit into a dataframe.
    Helper function for MineRLWrapper.map_action()
    """
    proc = NULL_ACTION.copy()

    for k in obj.keys():
        if k == "camera":
            for d, dim in enumerate(obj[k]):
                proc[f"{k}{d}"] = dim/camera_shrink_factor
        else:
            proc[k] = obj[k] if not isinstance(obj[k], np.ndarray) else obj[k].tolist()
    return proc

In [7]:
CAT_VARS = ["craft", "equip", "nearbyCraft", "nearbySmelt", "place"]
TIMEOUT_ACTIONS = 100000

def test_functional(obj):
    for v in CAT_VARS:
        if obj[v] != 'none':
            return True, v
    return False, None

def extract_from_env(env, n, functional=False):
    environment = get_env_name(env)
    data = minerl.data.make(environment = environment)

    iterator = BufferedBatchIter(data)
    i = 0
    collected_actions = []
    cat_var_counter = {k:0 for k in CAT_VARS}

    for current_state, action, reward, next_state, done in iterator.buffered_batch_iter(batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS):
        decoded = decode_batch(action)
        
        if functional:
            for act in decoded:
                is_functional, var = test_functional(act)
                if is_functional and cat_var_counter[var] <= (n / 5):
                    # print(cat_var_counter)
                    collected_actions.append(act)
                    cat_var_counter[var] += 1
                    print(cat_var_counter)
        else:
            collected_actions += decoded

        if len(collected_actions) >= n or i >= TIMEOUT_ACTIONS // BATCH_SIZE:
            break

    df = pd.DataFrame(collected_actions)

    return df

def extract_actions(cfg):
    return pd.concat([extract_from_env(*a) for a in cfg]).reset_index(drop=True)

In [12]:
general_cfg = [
    ["ObtainDiamond", 100],
    ["ObtainIronPickaxe", 100],
    ["Navigate", 100],
    ["Treechop", 100],
]

functional_cfg = [
    ["ObtainDiamond", 200, True]
]

log("Collecting General Actions...")
general_df = extract_actions(*general_cfg)
log("Collected General Actions", "SUCCESS")

# log("Collecting Functional Actions...")
functional_df = extract_actions(functional_cfg)
# log("Collected Functional Actions", "SUCCESS")


2022-09-28 00:12:06.372017 | INFO | Collecting General Actions...


100%|██████████| 13199/13199 [00:00<00:00, 27745.41it/s]
100%|██████████| 19218/19218 [00:00<00:00, 24916.51it/s]
100%|██████████| 12345/12345 [00:00<00:00, 23723.02it/s]
100%|██████████| 2883/2883 [00:00<00:00, 37224.23it/s]
100%|██████████| 12113/12113 [00:00<00:00, 25673.93it/s]
100%|██████████| 4769/4769 [00:00<00:00, 36969.83it/s]
100%|██████████| 11143/11143 [00:00<00:00, 35552.60it/s]
100%|██████████| 2485/2485 [00:00<00:00, 9217.91it/s]
100%|██████████| 13319/13319 [00:00<00:00, 38246.81it/s]
100%|██████████| 1227/1227 [00:00<00:00, 91775.64it/s]
100%|██████████| 1083/1083 [00:00<00:00, 94699.09it/s]
100%|██████████| 4809/4809 [00:00<00:00, 74592.22it/s]
100%|██████████| 900/900 [00:00<00:00, 50568.31it/s]
100%|██████████| 1226/1226 [00:00<00:00, 9115.92it/s]
100%|██████████| 515/515 [00:00<00:00, 40942.14it/s]
100%|██████████| 1325/1325 [00:00<00:00, 76240.20it/s]
100%|██████████| 1117/1117 [00:00<00:00, 60512.22it/s]
100%|██████████| 652/652 [00:00<00:00, 55271.87it/s]
100%|█

2022-09-28 00:13:02.740956 | ✅ SUCCESS | Collected General Actions





## 1. General Sampling

In [14]:
def run_kmeans(df, num_clusters):
    """Running Kmeans on a dataset of actions"""
    log("Running KMeans...")
    df = df.drop(CAT_VARS, axis = 1)

    kmeans = KMeans(n_clusters=num_clusters, random_state=RANDOM_STATE).fit(df.values)

    actions_list = ['attack', 'back', 'camera0', 'camera1', 
    'forward', 'jump', 'left',  'right', 'sneak','sprint']

    extracted_actions = []
    for cluster in kmeans.cluster_centers_:
        action = NULL_ACTION.copy()
        action.update({actions_list[i]: cluster[i] for i in range(len(cluster))})
        extracted_actions.append(encode_action(action))
    
    log("Extracted Actions", level="SUCCESS")
    return extracted_actions

In [19]:
km = run_kmeans(general_df, 20)

2022-09-28 00:15:33.361683 | INFO | Running KMeans...
2022-09-28 00:15:33.473956 | ✅ SUCCESS | Extracted Actions


In [39]:
# Sample opposites
OPPOSITES_PROBS = [
    {"forward": 0.05, "back": 0.95},
    {"right": 0.5, "back": 0.5},
    {"sprint": 0.25, "sneak": 0.75}
]

def remove_opposites(actions: list) -> list:
    """Removes opposite actions ocurring at the same time using a sampling
    strategy in the `OPPOSITES_PROBS` dictionary.
    Args:
        actions (list): The kmeans or kprototypes cluster centres
    Returns:
        new_actions (list): The new action set once the opposites have been removed
    """
    new_actions = []
    for i in range(len(actions)):
        act = actions[i]
        new_act = act.copy()

        for opp in OPPOSITES_PROBS:
            key1 = list(opp.keys())[0]
            key2 = list(opp.keys())[1]
            if act[key1] and act[key2]:
                if np.random.random() < opp[key1]:
                    new_act[key2] = array(0)
                else:
                    new_act[key1] = array(0)
        
        new_actions.append(new_act)

    return new_actions

In [41]:
nkm = remove_opposites(km)

## 2. Functional Actions

In [71]:
def run_kprototypes(df):
    """Running Kprototypes on a dataset of actions"""
    # log("Running KPrototypes...")

    mark_array = df.values
    categorical_features_idx = [4, 5, 9, 10, 11]

    kproto = KPrototypes(n_clusters=1, max_iter=200).fit(
        mark_array, categorical=categorical_features_idx)
    
    actions_list = ['attack', 'back', 'camera0', 'camera1', 
        'forward', 'jump', 'left',  'right', 'sneak','sprint', 
        'craft', 'equip', 'nearbyCraft', 'nearbySmelt', 'place']

    extracted_actions = []
    for cluster in kproto.cluster_centroids_:
        action = {actions_list[i]: cluster[i] for i in range(len(cluster))}
        extracted_actions.append(encode_action(action, bin_prob_threshold=0.4))

    # log("Extracted Actions", level="SUCCESS")

    return extracted_actions

In [72]:
df = functional_df.copy()
functional_actions = []
for v in CAT_VARS:
    gb = df.groupby(v)
    functional_actions += [run_kprototypes(gb.get_group(grp))[0] for grp in gb.groups if grp != 'none']

In [73]:
pd.DataFrame([decode_action(a) for a in functional_actions])

Unnamed: 0,attack,back,camera0,camera1,craft,equip,forward,jump,left,nearbyCraft,nearbySmelt,place,right,sneak,sprint
0,0,0,-0.384255,1.617932,planks,none,0,0,0,none,none,none,0,0,0
1,0,0,0.170124,0.326175,stick,none,0,0,0,none,none,none,0,0,0
2,0,0,0.099893,0.173512,none,iron_pickaxe,0,0,0,none,none,none,0,0,0
3,0,0,-0.049468,0.069348,none,stone_pickaxe,1,0,0,none,none,none,0,0,0
4,0,0,-0.527237,-0.321347,none,wooden_axe,1,0,0,none,none,none,0,0,0
5,0,0,0.57,1.500005,none,wooden_pickaxe,1,0,0,none,none,none,0,0,0
6,0,0,0.031342,0.007697,none,none,0,0,0,furnace,none,none,0,0,0
7,0,0,-0.033763,0.267059,none,none,0,0,0,iron_axe,none,none,0,0,0
8,1,0,0.795,1.350003,none,none,0,0,0,iron_pickaxe,none,none,0,0,0
9,0,0,-0.114529,0.116837,none,none,0,0,0,none,iron_ingot,none,0,0,0


In [78]:
for j in {i[0] for i in general_cfg + functional_cfg}:
    print(j)

Navigate
ObtainDiamond
ObtainIronPickaxe
Treechop
