### Action Map
A notebook that implements the functionality of mapping a dataset action to an action in our given action set

In [1]:
import torch
import wandb
import gym
import minerl
from collections import namedtuple

from minerl3161.agent import DQNAgent
from minerl3161.trainer import DQNTrainer
from minerl3161.hyperparameters import DQNHyperparameters
from minerl3161.wrappers import MineRLDiscreteActionWrapper, mineRLObservationSpaceWrapper

import os
from datetime import datetime
from pathlib import Path
import pickle

import gym
import minerl
import numpy as np
import pandas as pd
import tqdm
from kmodes.kprototypes import KPrototypes
from sklearn.cluster import KMeans
from minerl.data import BufferedBatchIter
from numpy import array, float32



In [24]:
NULL_ACTION = {
    'attack': 0,
    'back': 0,
    'camera0': 0.0,
    'camera1': 0.0,
    'craft': 'none',
    'equip': 'none',
    'forward': 0,
    'jump': 0,
    'left': 0,
    'nearbyCraft': 'none',
    'nearbySmelt': 'none',
    'place': 'none',
    'right': 0,
    'sneak': 0,
    'sprint': 0
}

NUM_EPOCHS = 2
BATCH_SIZE = 10
MAX_ACTIONS = 200

# Paths
ROOT_PATH = Path(os.getcwd()).absolute().parent.parent.parent
SRC_PATH = ROOT_PATH.joinpath('src')
DATA_PATH = ROOT_PATH.joinpath('data')
ACTIONS_PATH = SRC_PATH.joinpath('actions')

In [34]:
# Util functions
StringBuilder = lambda ENV_STRING: (f'MineRL{ENV_STRING}-v0', str(ACTIONS_PATH.joinpath(f'actions-{ENV_STRING}.pickle')))

def log(msg, level="INFO"):
    format_dict = {
        "SUCCESS": "\U00002705 SUCCESS",
        "ERROR": "\U0000274C ERROR"
    }

    print(f"{datetime.now()} | {format_dict.get(level, level)} | {msg}")
    
def check_download(environment):
    # Downloading environment data if it doesn't exist
    env_data_path = os.path.join(os.environ['MINERL_DATA_ROOT'], environment)
    if not os.path.exists(env_data_path):
        log(f"Downloading {environment} data...")
        os.mkdir(env_data_path)
        minerl.data.download(environment = environment)
        log(f"Downloaded", level="SUCCESS")

def batch_to_list(obj) -> list:
    """Decodes the batch of actions into a list of actions sutiable to fit into a dataframe.
    Important for kmodes/kmeans
    """
    actions = []

    for i in range(BATCH_SIZE):
        proc = {}
        for k in obj.keys():
            proc[k] =  obj[k][i]
        actions.append(proc)

    return actions

def encode_action(obj):
    """ Encodes the action dict into a format acceptable by minerl
    """
    proc = {}

    for k, v in obj.items():
        if 'camera' not in k:
            try:
                proc[k] = array(int(float(v) > BIN_PROB_THRESHOLD)) if not BIN_AS_CAT else array(int(float(v)))
            except:
                proc[k] = v
    
    proc['camera'] = array([obj.get('camera0'), obj.get('camera1')], dtype=float32)
    return proc

## Setup & Choose ENV

In [19]:
# Initial setup
data_path = str(DATA_PATH)

if not os.path.exists(data_path):
    os.mkdir(data_path)

os.environ['MINERL_DATA_ROOT'] = data_path # Important

In [20]:
envs = ["Treechop", "Navigate", "ObtainIronPickaxe", "ObtainDiamond"]
ENVIRONMENT = "ObtainDiamond"

### Check Download

In [22]:
environment, save_path = StringBuilder(ENVIRONMENT)
check_download(environment)

### Extract Human data

In [35]:
data = minerl.data.make(environment = environment)
iterator = BufferedBatchIter(data)
i = 0
collected_actions = []
log("Collecting actions...")
for current_state, action, reward, next_state, done in iterator.buffered_batch_iter(batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS):
    collected_actions += batch_to_list(action)
    
    i += 1
    if i == MAX_ACTIONS // BATCH_SIZE:
        break

2022-09-11 18:03:19.690623 | INFO | Collecting actions...


100%|██████████| 30624/30624 [00:01<00:00, 28244.71it/s]


In [36]:
collected_actions

[{'attack': 0,
  'back': 0,
  'camera': array([0., 0.], dtype=float32),
  'craft': 'none',
  'equip': 'none',
  'forward': 0,
  'jump': 0,
  'left': 0,
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  'place': 'none',
  'right': 0,
  'sneak': 0,
  'sprint': 0},
 {'attack': 0,
  'back': 0,
  'camera': array([0., 0.], dtype=float32),
  'craft': 'none',
  'equip': 'none',
  'forward': 0,
  'jump': 0,
  'left': 0,
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  'place': 'none',
  'right': 0,
  'sneak': 0,
  'sprint': 0},
 {'attack': 0,
  'back': 0,
  'camera': array([0., 0.], dtype=float32),
  'craft': 'none',
  'equip': 'none',
  'forward': 0,
  'jump': 0,
  'left': 0,
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  'place': 'none',
  'right': 0,
  'sneak': 0,
  'sprint': 0},
 {'attack': 0,
  'back': 0,
  'camera': array([0., 0.], dtype=float32),
  'craft': 'none',
  'equip': 'none',
  'forward': 0,
  'jump': 0,
  'left': 0,
  'nearbyCraft': 'none',
  'nearbySmelt': 'none',
  '

In [None]:
# TODO: Take one from above and map to action space.