<a href="https://colab.research.google.com/github/xhxuciedu/MineRL/blob/main/Create_a_Bot_to_Find_Diamonds_in_Minecraft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Create a Bot to Find Diamonds in Minecraft

❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).

Companion notebook to execute the code from the following article: https://mlabonne.github.io/blog/minecraft/

In [1]:
# # Install JDK, OpenGL, etc.
!sudo add-apt-repository -y ppa:openjdk-r/ppa > /dev/null 2>&1
!sudo apt purge openjdk-* > /dev/null 2>&1
!sudo apt install openjdk-8-jdk xvfb xserver-xephyr vnc4server python-opengl ffmpeg > /dev/null 2>&1

# # Install MineRL, the virtual display, and a video renderer
#!pip install -q -U minerl pyvirtualdisplay colabgymrender

In [2]:
%%capture
# ^ hides output
!pip3 install --upgrade minerl
!pip3 install pyvirtualdisplay
!pip3 install -U colabgymrender

In [3]:
# RL environment
import gym
import minerl

# Visualization
from colabgymrender.recorder import Recorder
from pyvirtualdisplay import Display
from IPython.display import HTML

# Others
import numpy as np
from tqdm.notebook import tqdm
import logging
logging.disable(logging.ERROR)



Imageio: 'ffmpeg-linux64-v3.3.1' was not found on your computer; downloading it now.
Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg-linux64-v3.3.1 (43.8 MB)
Downloading: 8192/45929032 bytes (0.0%)2465792/45929032 bytes (5.4%)5472256/45929032 bytes (11.9%)9486336/45929032 bytes (20.7%)13500416/45929032 bytes (29.4%)17506304/45929032 bytes (38.1%)21454848/45929032 bytes (46.7%)25583616/45929032 bytes (55.7%)29605888/45929032 bytes (64.5%)33775616/45929032 bytes (73.5%)37871616/45929032 bytes (82.5%)41738240/45929032 bytes (90.9%)44728320/45929032 bytes (97.4%)

In [4]:
# Create virtual display
display = Display(visible=0, size=(400, 300))
display.start()

<pyvirtualdisplay.display.Display at 0x7f5a85e26990>

In [5]:
import sys
print(sys.executable)
print(sys.version)
print(sys.version_info)

/usr/bin/python3
3.7.13 (default, Apr 24 2022, 01:04:09) 
[GCC 7.5.0]
sys.version_info(major=3, minor=7, micro=13, releaselevel='final', serial=0)


## I. Scripted bot

Let's try simple actions: forward (5 steps) and wait (40 steps).

In [6]:
# Define the sequence of actions
script = ['forward'] * 5 + [''] * 40

env = gym.make('MineRLObtainDiamond-v0')
env = Recorder(env, './video', fps=60)
env.seed(21)
obs = env.reset()

for action in script:
    # Get the action space (dict of possible actions)
    action_space = env.action_space.noop()

    # Activate the selected action in the script
    action_space[action] = 1

    # Update the environment with the new action space
    obs, reward, done, _ = env.step(action_space)

env.release()
env.play()

We can create more complex sequence of actions thanks to the `str_to_act` function. Let's chop the tree this time.

In [7]:
# Code from https://github.com/KarolisRam/MineRL2021-Intro-baselines
def str_to_act(env, actions):
    action_space = env.action_space.noop()
    for action in actions.split():
        if ':' in action:
            k, v = action.split(':')
            if k == 'camera':
                action_space[k] = eval(v)
            else:
                action_space[k] = v
        else:
            action_space[action] = 1
    return action_space

script = []
script += [''] * 20 
script += ['forward'] * 5
script += ['attack'] * 61
script += ['camera:[-10,0]'] * 7  # Look up
script += ['attack'] * 240
script += ['jump']
script += ['forward'] * 10        # Jump forward
script += ['camera:[-10,0]'] * 2  # Look up
script += ['attack'] * 150
script += ['camera:[10,0]'] * 7   # Look down
script += [''] * 40

In [8]:
env = gym.make('MineRLObtainDiamond-v0')
env = Recorder(env, './video', fps=60)
env.seed(21)
obs = env.reset()
 
for action in tqdm(script):
    obs, reward, done, _ = env.step(str_to_act(env, action))

env.release()
env.play()

  0%|          | 0/543 [00:00<?, ?it/s]

In [9]:
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/3A2P0lQs2c0" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')

# II. Imitation Learning

A more flexible solution consists of training an agent to chop wood. In this example, we choose an imitation learning framework (supervised learning with a dataset of videos).

In [10]:
import torch
import torch.nn as nn


class CNN(nn.Module):
    def __init__(self, input_shape, output_dim):
        super().__init__()
        n_input_channels = input_shape[0]
        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim)
        )

    def forward(self, observations):
        return self.cnn(observations)

def dataset_action_batch_to_actions(dataset_actions, camera_margin=5):
    camera_actions = dataset_actions["camera"].squeeze()
    attack_actions = dataset_actions["attack"].squeeze()
    forward_actions = dataset_actions["forward"].squeeze()
    jump_actions = dataset_actions["jump"].squeeze()
    batch_size = len(camera_actions)
    actions = np.zeros((batch_size,), dtype=int)

    for i in range(len(camera_actions)):
        if camera_actions[i][0] < -camera_margin:
            actions[i] = 3
        elif camera_actions[i][0] > camera_margin:
            actions[i] = 4
        elif camera_actions[i][1] > camera_margin:
            actions[i] = 5
        elif camera_actions[i][1] < -camera_margin:
            actions[i] = 6
        elif forward_actions[i] == 1:
            if jump_actions[i] == 1:
                actions[i] = 2
            else:
                actions[i] = 1
        elif attack_actions[i] == 1:
            actions[i] = 0
        else:
            actions[i] = -1
    return actions

class ActionShaping(gym.ActionWrapper):
    def __init__(self, env, camera_angle=10):
        super().__init__(env)
        self.camera_angle = camera_angle
        self._actions = [
            [('attack', 1)],
            [('forward', 1)],
            [('jump', 1)],
            [('camera', [-self.camera_angle, 0])],
            [('camera', [self.camera_angle, 0])],
            [('camera', [0, self.camera_angle])],
            [('camera', [0, -self.camera_angle])],
        ]
        self.actions = []
        for actions in self._actions:
            act = self.env.action_space.noop()
            for a, v in actions:
                act[a] = v
                act['attack'] = 1
            self.actions.append(act)
        self.action_space = gym.spaces.Discrete(len(self.actions))

    def action(self, action):
        return self.actions[action]

In [11]:
%%time

# Get data
minerl.data.download(directory='data', environment='MineRLTreechop-v0')
data = minerl.data.make("MineRLTreechop-v0", data_dir='data', num_workers=2)

# Model
model = CNN((3, 64, 64), 7).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

# Training loop
step = 0
losses = []
for state, action, _, _, _ \
          in tqdm(data.batch_iter(num_epochs=6, batch_size=32, seq_len=1)):
    # Get pov observations
    obs = state['pov'].squeeze().astype(np.float32)
    # Transpose and normalize
    obs = obs.transpose(0, 3, 1, 2) / 255.0

    # Translate batch of actions for the ActionShaping wrapper
    actions = dataset_action_batch_to_actions(action)

    # Remove samples with no corresponding action
    mask = actions != -1
    obs = obs[mask]
    actions = actions[mask]

    # Update weights with backprop
    logits = model(torch.from_numpy(obs).float().cuda())
    loss = criterion(logits, torch.from_numpy(actions).long().cuda())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss
    step += 1
    losses.append(loss.item())
    if (step % 2000) == 0:
        mean_loss = sum(losses) / len(losses)
        tqdm.write(f'Step {step:>5} | Training loss = {mean_loss:.3f}')
        losses.clear()

torch.save(model.state_dict(), 'model.pth')
del data

Download: https://minerl.s3.amazonaws.com/v4/MineRLTreechop-v0.tar: 100%|██████████| 1511.0/1510.73792 [00:23<00:00, 65.28MB/s]


0it [00:00, ?it/s]

Step  2000 | Training loss = 0.904
Step  4000 | Training loss = 0.857
Step  6000 | Training loss = 0.869
Step  8000 | Training loss = 0.821
Step 10000 | Training loss = 0.821
Step 12000 | Training loss = 0.822
Step 14000 | Training loss = 0.780
Step 16000 | Training loss = 0.775
Step 18000 | Training loss = 0.767
Step 20000 | Training loss = 0.794
Step 22000 | Training loss = 0.794
Step 24000 | Training loss = 0.847
Step 26000 | Training loss = 0.777
Step 28000 | Training loss = 0.771
Step 30000 | Training loss = 0.772
Step 32000 | Training loss = 0.769
Step 34000 | Training loss = 0.755
Step 36000 | Training loss = 0.754
Step 38000 | Training loss = 0.756
Step 40000 | Training loss = 0.745
Step 42000 | Training loss = 0.781
Step 44000 | Training loss = 0.755
Step 46000 | Training loss = 0.735
Step 48000 | Training loss = 0.709
Step 50000 | Training loss = 0.723
Step 52000 | Training loss = 0.757
Step 54000 | Training loss = 0.716
Step 56000 | Training loss = 0.722
Step 58000 | Trainin

Visualize the result of the training:

In [12]:
model = CNN((3, 64, 64), 7).cuda()
model.load_state_dict(torch.load('model.pth'))

env = gym.make('MineRLObtainDiamond-v0')
env1 = Recorder(env, './video', fps=60)
env = ActionShaping(env1)

action_list = np.arange(env.action_space.n)

obs = env.reset()

for step in tqdm(range(1000)):
    # Get input in the correct format
    obs = torch.from_numpy(obs['pov'].transpose(2, 0, 1)[None].astype(np.float32) / 255).cuda()
    # Turn logits into probabilities
    probabilities = torch.softmax(model(obs), dim=1)[0].detach().cpu().numpy()
    # Sample action according to the probabilities
    action = np.random.choice(action_list, p=probabilities)

    obs, reward, _, _ = env.step(action)

env1.release()
env1.play()

  0%|          | 0/1000 [00:00<?, ?it/s]

In [13]:
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/umvrmQ_MYSI" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')

## III. Script + Imitation Learning

In [14]:
# Craft 4 planks, 2 sticks, 2 crafting tables, and place it
script = []
script += ['craft:planks'] * 6
script += ['craft:stick'] * 2
script += ['craft:crafting_table'] * 2
script += ['camera:[10,0]'] * 18
script += ['attack'] * 20
script += [''] * 10
script += ['jump']
script += [''] * 5
script += ['place:crafting_table']
script += [''] * 10

# Craft a wooden pickaxe and equip it
script += ['camera:[-1,0]']
script += ['nearbyCraft:wooden_pickaxe']
script += ['camera:[1,0]']
script += [''] * 10
script += ['equip:wooden_pickaxe']
script += [''] * 10

# Dig stone
script += ['attack'] * 500

# Craft stone pickaxe
script += [''] * 10
script += ['jump']
script += [''] * 5
script += ['place:crafting_table']
script += [''] * 10
script += ['camera:[-1,0]']
script += ['nearbyCraft:stone_pickaxe']
script += ['camera:[1,0]']
script += [''] * 10
script += ['equip:stone_pickaxe']
script += [''] * 10

In [15]:
model = CNN((3, 64, 64), 7).cuda()
model.load_state_dict(torch.load('model.pth'))

env_script = gym.make('MineRLObtainDiamond-v0')
env_cnn = Recorder(env_script, './video', fps=60)
env_script = ActionShaping(env_cnn)

action_list = np.arange(env_script.action_space.n)

for _ in range(10):
    obs = env_script.reset()
    done = False

    # 1. Get wood with the CNN
    for i in tqdm(range(3000)):
        obs = torch.from_numpy(obs['pov'].transpose(2, 0, 1)[None].astype(np.float32) / 255).cuda()
        probabilities = torch.softmax(model(obs), dim=1)[0].detach().cpu().numpy()
        action = np.random.choice(action_list, p=probabilities)
        obs, reward, done, _ = env_script.step(action)
        if done:
            break

    # 2. Craft stone pickaxe with scripted actions
    if not done:
        for action in tqdm(script):
            obs, reward, done, _ = env_cnn.step(str_to_act(env_cnn, action))
            if done:
                break

    print(obs["inventory"])
    env_cnn.release()
    env_cnn.play()

  0%|          | 0/3000 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(0), 'crafting_table': array(0), 'dirt': array(0), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(0), 'planks': array(0), 'stick': array(0), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(0)}


  0%|          | 0/3000 [00:00<?, ?it/s]

  0%|          | 0/650 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(0), 'crafting_table': array(1), 'dirt': array(30), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(1), 'planks': array(12), 'stick': array(8), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(0)}


  0%|          | 0/3000 [00:00<?, ?it/s]

  0%|          | 0/650 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(0), 'crafting_table': array(0), 'dirt': array(61), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(0), 'planks': array(0), 'stick': array(0), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(0)}


  0%|          | 0/3000 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(0), 'crafting_table': array(0), 'dirt': array(0), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(0), 'planks': array(0), 'stick': array(0), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(0)}


  0%|          | 0/3000 [00:00<?, ?it/s]

  0%|          | 0/650 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(1), 'crafting_table': array(2), 'dirt': array(9), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(0), 'planks': array(1), 'stick': array(6), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(1)}


  0%|          | 0/3000 [00:00<?, ?it/s]

  0%|          | 0/650 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(0), 'crafting_table': array(0), 'dirt': array(65), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(0), 'planks': array(0), 'stick': array(0), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(0)}


  0%|          | 0/3000 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(0), 'crafting_table': array(0), 'dirt': array(0), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(0), 'planks': array(0), 'stick': array(0), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(0)}


  0%|          | 0/3000 [00:00<?, ?it/s]

  0%|          | 0/650 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(0), 'crafting_table': array(0), 'dirt': array(48), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(0), 'planks': array(0), 'stick': array(0), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(0)}


  0%|          | 0/3000 [00:00<?, ?it/s]

  0%|          | 0/650 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(0), 'crafting_table': array(1), 'dirt': array(10), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(6), 'planks': array(12), 'stick': array(8), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(0)}


  0%|          | 0/3000 [00:00<?, ?it/s]

{'coal': array(0), 'cobblestone': array(0), 'crafting_table': array(0), 'dirt': array(0), 'furnace': array(0), 'iron_axe': array(0), 'iron_ingot': array(0), 'iron_ore': array(0), 'iron_pickaxe': array(0), 'log': array(0), 'planks': array(0), 'stick': array(0), 'stone': array(0), 'stone_axe': array(0), 'stone_pickaxe': array(0), 'torch': array(0), 'wooden_axe': array(0), 'wooden_pickaxe': array(0)}


Final result:

In [16]:
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/7LnjA7Bxf6A" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')