# AI-powered Mario

Authors: [Yuansong Feng](https://github.com/YuansongFeng), [Suraj Subramanian](https://github.com/suraj813), [Howard Wang](https://github.com/hw26), [Steven Guo](https://github.com/GuoYuzhang).  


## Welcome!
This tutorial walks you through the fundamentals of Deep Reinforcement Learning. At the end, you will implement an AI-powered Mario (using [Double Deep Q-Networks](https://arxiv.org/pdf/1509.06461.pdf)) that can play the game by itself. 

Although no prior knowledge of RL is necessary for this tutorial, you can familiarize yourself with these RL [concepts](https://spinningup.openai.com/en/latest/spinningup/rl_intro.html), and have this handy [cheatsheet](https://colab.research.google.com/drive/1eN33dPVtdPViiS1njTW_-r-IYCDTFU7N) as your companion. The full code is available [here](https://github.com/yuansongFeng/MadMario/).

## Setup

In [1]:
# Mario game environment
#!pip install gym-super-mario-bros==7.3.0 opencv-python

import os
import copy
import torch
from pathlib import Path
from collections import deque

from Agent import Mario
from Logger import MetricLogger
import random, datetime, numpy as np, cv2 
# Gym is an OpenAI toolkit for RL
import gym
from gym.spaces import Box
from gym.wrappers import FrameStack, GrayScaleObservation, TransformObservation

#NES Emulator for OpenAI Gym
from nes_py.wrappers import JoypadSpace

# Super Mario environment for OpenAI Gym
import gym_super_mario_bros

In [2]:
# Initialize Super Mario environment
env = gym_super_mario_bros.make('SuperMarioBros-v3')

# Limit the action-space to
#   0. walk right 
#   1. jump right
env = JoypadSpace(
    env,
    [['right'],
    ['right', 'A']]
)

env.reset()
next_state, reward, done, info = env.step(action=0)
print(f'{next_state.shape},\n {reward},\n {done},\n {info}')

(240, 256, 3),
 0,
 False,
 {'coins': 0, 'flag_get': False, 'life': 2, 'score': 0, 'stage': 1, 'status': 'small', 'time': 400, 'world': 1, 'x_pos': 40, 'x_pos_screen': 40, 'y_pos': 79}


In [3]:
class ResizeObservation(gym.ObservationWrapper):
    def __init__(self, env, shape):
        super().__init__(env)
        if isinstance(shape, int):
            self.shape = (shape, shape)
        else:
            self.shape = tuple(shape)

        obs_shape = self.shape + self.observation_space.shape[2:]
        self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)

    def observation(self, observation):
        observation = cv2.resize(observation, self.shape, interpolation=cv2.INTER_AREA)
        return observation


class SkipFrame(gym.Wrapper):
    def __init__(self, env, skip):
        """Return only every `skip`-th frame"""
        super().__init__(env)
        self._skip = skip

    def step(self, action):
        """Repeat action, and sum reward"""
        total_reward = 0.0
        done = False
        for i in range(self._skip):
            # Accumulate reward and repeat the same action
            obs, reward, done, info = self.env.step(action)
            total_reward += reward
            if done:
                break
        return obs, total_reward, done, info


# Apply Wrappers to environment
env = SkipFrame(env, skip=4)
env = GrayScaleObservation(env, keep_dim=False)
env = ResizeObservation(env, shape=84)
env = TransformObservation(env, f=lambda x: x / 255.)
env = FrameStack(env, num_stack=4)

# Logging 

# Let's play!

In [4]:
use_cuda = torch.cuda.is_available()

if use_cuda:
    torch.cuda.empty_cache()

import gc
gc.collect()

    
print(f"Using CUDA: {use_cuda}")
print()

save_dir = Path('checkpoints') / datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S')
save_dir.mkdir(parents=True)

mario = Mario((4, 84, 84), env.action_space.n, save_dir)
 
# path = Path('checkpoints') / 'checkpoints/2022-04-13T20-59-01/mario_net_11.chkpt'
# mario.load(path)

logger = MetricLogger(save_dir)

episodes = 40000

### for Loop that train the model num_episodes times by playing the game
for e in range(episodes):

    state = env.reset()

    # Play the game!
    while True:

        # Run agent on the state
        action = mario.act(state)

        # Agent performs action
        next_state, reward, done, info = env.step(action)

        # Remember
        mario.cache(state, info['y_pos'], next_state, action, reward, done)

        # Learn
        q, loss = mario.learn()

        # Logging
        logger.log_step(reward, loss, q)

        # Update state
        state = np.array(next_state)

        # Check if end of game
        if done or info['flag_get']:
            break

    logger.log_episode()

    if e % 20 == 0:
        logger.record(
            episode=e,
            epsilon=mario.exploration_rate,
            step=mario.curr_step
        )

Using CUDA: True



  done = torch.BoolTensor([done]).cuda() if self.use_cuda else torch.BoolTensor([done])
  return (self.ram[0x86] - self.ram[0x071c]) % 256


Episode 0 - Step 1014 - Epsilon 1 - Mean Reward 2373.0 - Mean Length 1014.0 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 6.437 - Time 2022-04-27T17:07:11


RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 5.81 GiB total capacity; 1.37 GiB already allocated; 3.31 MiB free; 1.42 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

<Figure size 432x288 with 0 Axes>

In [None]:
def render_episode(mario):
    
    done = True
    for step in range(5000):
        if done:
            state = env.reset()
        
        # Run agent on the state
        action = mario.act(state)

        # Agent performs action
        next_state, reward, done, info = env.step(action)
        
        env.render()
        
# mario = Mario(state_dim=(4, 84, 84), action_dim=env.action_space.n, save_dir=save_dir)
# path = Path('checkpoints') / 'mario_net_12.chkpt'
# mario.load(path)
mario.exploration_rate = 0
render_episode(mario)