### Action Map
A notebook that implements the functionality of mapping a dataset action to an action in our given action set

In [4]:
import torch
from pathlib import Path
# import wandb
import gym
import minerl
from collections import namedtuple
import sys
import os

# Paths
ROOT_PATH = Path(os.getcwd()).absolute().parent.parent.parent
SRC_PATH = ROOT_PATH.joinpath('src')
MINERL3161_PATH = SRC_PATH.joinpath('minerl3161')
DATA_PATH = ROOT_PATH.joinpath('data')
ACTIONS_PATH = SRC_PATH.joinpath('actions')

# sys.path.append(MINERL3161_PATH)

from minerl3161.agent import DQNAgent
from minerl3161.trainer import DQNTrainer
from minerl3161.hyperparameters import DQNHyperparameters
from minerl3161.wrappers import MineRLDiscreteActionWrapper, mineRLObservationSpaceWrapper

from datetime import datetime

import pickle

import gym
import minerl
import numpy as np
import pandas as pd
import tqdm
from kmodes.kprototypes import KPrototypes
from sklearn.cluster import KMeans
from minerl.data import BufferedBatchIter
from numpy import array, float32

In [152]:
NUM_EPOCHS = 2
BATCH_SIZE = 10
MAX_ACTIONS = 200
BIN_AS_CAT = False
BIN_PROB_THRESHOLD = 0.2
CAMERA_SHRINK_FACTOR = 100

NULL_ACTION = {
    'attack': 0,
    'back': 0,
    'camera0': 0.0,
    'camera1': 0.0,
    'craft': 'none',
    'equip': 'none',
    'forward': 0,
    'jump': 0,
    'left': 0,
    'nearbyCraft': 'none',
    'nearbySmelt': 'none',
    'place': 'none',
    'right': 0,
    'sneak': 0,
    'sprint': 0
}

In [153]:
# Util functions
StringBuilder = lambda ENV_STRING: (f'MineRL{ENV_STRING}-v0', str(ACTIONS_PATH.joinpath(f'actions-{ENV_STRING}.pickle')))

def log(msg, level="INFO"):
    format_dict = {
        "SUCCESS": "\U00002705 SUCCESS",
        "ERROR": "\U0000274C ERROR"
    }

    print(f"{datetime.now()} | {format_dict.get(level, level)} | {msg}")
    
def check_download(environment):
    # Downloading environment data if it doesn't exist
    env_data_path = os.path.join(os.environ['MINERL_DATA_ROOT'], environment)
    if not os.path.exists(env_data_path):
        log(f"Downloading {environment} data...")
        os.mkdir(env_data_path)
        minerl.data.download(environment = environment)
        log(f"Downloaded", level="SUCCESS")

def batch_to_list(obj) -> list:
    """Decodes the batch of actions into a list of actions sutiable to fit into a dataframe.
    Important for kmodes/kmeans
    """
    actions = []

    for i in range(BATCH_SIZE):
        proc = {}
        for k in obj.keys():
            proc[k] =  obj[k][i]
        actions.append(proc)

    return actions

def decode_actions(obj) -> list:
    """Decodes the batch of actions into a list of actions sutiable to fit into a dataframe.
    Important for kmodes/kmeans
    """
    if isinstance(obj['attack'], list) and len(obj['attack']) > 1:
        actions = []
        for i in range(BATCH_SIZE):
            proc = NULL_ACTION.copy()
            for k in obj.keys():
                if k == "camera":
                    if isinstance(obj[k], list):
                        for i, dim in enumerate(obj[k][i]):
                            proc[f"{k}{i}"] = dim
                    else:
                        for d, dim in enumerate(obj[k]):
                            proc[f"{k}{d}"] = dim
                else:
                    if isinstance(obj[k], list):
                        proc[k] = obj[k][i]
                    else:
                        proc[k] = obj[k]
            actions.append(proc)
        return actions
    
    proc = NULL_ACTION.copy()
    for k in obj.keys():
        if k == "camera":
            for d, dim in enumerate(obj[k]):
                proc[f"{k}{d}"] = dim/CAMERA_SHRINK_FACTOR
        else:
            proc[k] = obj[k] if not isinstance(obj[k], np.ndarray) else obj[k].tolist()
    return proc

## Setup & Choose ENV

In [17]:
# Initial setup
data_path = str(DATA_PATH)

if not os.path.exists(data_path):
    os.mkdir(data_path)

os.environ['MINERL_DATA_ROOT'] = data_path # Important

In [18]:
envs = ["Treechop", "Navigate", "ObtainIronPickaxe", "ObtainDiamond"]
ENVIRONMENT = "ObtainDiamond"

### Check Download

In [19]:
environment, save_path = StringBuilder(ENVIRONMENT)
check_download(environment)

### Extract Human data

In [20]:
data = minerl.data.make(environment = environment)
iterator = BufferedBatchIter(data)
i = 0
collected_actions = []
log("Collecting actions...")
for current_state, action, reward, next_state, done in iterator.buffered_batch_iter(batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS):
    collected_actions += batch_to_list(action)
    
    i += 1
    if i == MAX_ACTIONS // BATCH_SIZE:
        break

2022-09-12 19:49:28.304605 | INFO | Collecting actions...


100%|██████████| 18627/18627 [00:00<00:00, 23322.75it/s]
100%|██████████| 46648/46648 [00:02<00:00, 23118.70it/s]


### Initialise Env (Action Wrapper)

In [23]:
hp =DQNHyperparameters()
# Configure environment
env = gym.make(environment)
env = mineRLObservationSpaceWrapper(env, hp.inventory_feature_names)

In [145]:
CLUSTER_CENTERS = pd.DataFrame([decode_actions(i) for i in env.action_set])
CLUSTER_CENTERS

Unnamed: 0,attack,back,camera0,camera1,craft,equip,forward,jump,left,nearbyCraft,nearbySmelt,place,right,sneak,sprint
0,1,0,0.062114,-0.012135,none,none,1,0,0,none,none,none,0,0,0
1,1,0,-0.263162,-0.011394,none,none,1,0,0,none,none,none,0,0,0
2,1,0,0.195602,-0.009525,none,none,0,0,0,none,none,none,0,0,0
3,1,0,-0.049991,0.268530,none,none,1,0,0,none,none,none,0,0,0
4,1,0,-0.075911,-0.000866,none,none,1,0,0,none,none,none,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,0,0,0.000000,0.000000,none,none,0,0,0,none,coal,none,0,0,0
61,0,0,0.000000,0.000000,none,none,0,0,0,none,none,torch,0,0,0
62,0,0,0.000000,0.000000,none,none,0,0,0,none,none,crafting_table,0,0,0
63,0,0,0.000000,0.000000,none,none,0,0,0,none,none,furnace,0,0,0


## Algorithm

In [149]:
def find_cat_action(action, value):
    print(f"FINDING {action}, {value}")
    pass

def map_action(obs):
    cat_list = ['place', 'nearbyCraft', 'nearbySmelt', 'craft', 'equip']
    for cat_act in cat_list:
        if obs[cat_act] != 'none':
            return find_cat_action(cat_act, obs[cat_act])

    obs_num = list({k: v for k, v in obs.items() if k not in cat_list}.values())
    distances = [
        np.linalg.norm(obs_num - action.values) for _, action in CLUSTER_CENTERS.drop(
            cat_list, axis=1).iterrows()
    ]
    
    return np.argmin(distances)

In [156]:
ex = decode_actions(collected_actions[0])
ex

{'attack': 1,
 'back': 0,
 'camera0': 0.018000011444091798,
 'camera1': -0.01199951171875,
 'craft': 'none',
 'equip': 'none',
 'forward': 0,
 'jump': 0,
 'left': 0,
 'nearbyCraft': 'none',
 'nearbySmelt': 'none',
 'place': 'none',
 'right': 0,
 'sneak': 0,
 'sprint': 0}

In [157]:
env.action(map_action(ex))

{'attack': array(1, dtype=int64),
 'back': array(0, dtype=int64),
 'craft': 'none',
 'equip': 'none',
 'forward': array(0, dtype=int64),
 'jump': array(0, dtype=int64),
 'left': array(0, dtype=int64),
 'nearbyCraft': 'none',
 'nearbySmelt': 'none',
 'place': 'none',
 'right': array(0, dtype=int64),
 'sneak': array(0, dtype=int64),
 'sprint': array(0, dtype=int64),
 'camera': array([ 0.02751222, -0.03029674], dtype=float32)}

In [11]:
# TODO: Test mappings, adjust CAMERA_SHRINK_FACTOR