In [1]:
#! python --version
#! pip install gym==0.21.0
#! pip install gym-retro==0.8.0
# !pip install stable-baselines3[extra]
# !pip install gymnasium[all]
# !pip install opencv-python
#! python -m retro.import .
# retro.data.list_games()
import retro
import numpy as np # to calculate the delta between the frames
import cv2 # for grayscaling
import time

In [2]:
env = retro.make(game="StreetFighterIISpecialChampionEdition-Genesis")
env.observation_space
env.action_space
env.action_space.sample()
obs = env.reset() # Reset game to starting state

## No training, just random actions

In [3]:
done = False # Set flag to flase, tells us when we dies
for game in range(1): # play one game 
   while not done: 
       if done: 
           obs = env.reset() # when we do die, we start the game again
       env.render()
       obs, reward, done, info = env.step(env.action_space.sample()) # randomly take action
       #time.sleep(0.000000001)
       #if reward > 0:
           #print(reward) # only a number when win, no rewards when getting along which makes it hard to train rl agent - sparse rewards
        #need to change this, as the spare rewards will make it hard to train the rl agent


#max score: 6300

In [4]:
info

{'enemy_matches_won': 2,
 'score': 5600,
 'matches_won': 0,
 'continuetimer': 10,
 'enemy_health': 0,
 'health': 0}

## Random actions, but initialised using a constructor

In [6]:
from gym import Env
from gym.spaces import MultiBinary, Box
#Creating custom environment that will carry out all the steps
#we pass our pass environment

class StreetFighter(Env): 
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) 
        self.action_space = MultiBinary(12) 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # want to also keep track of the previous frame to calculate a delta between the frames
        self.score = 0 
        return obs
    
    def preprocess(self, observation): 
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def step(self, action): 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 
        
        # Frame delta - use this to train our agent
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs 
        # Reshape the reward function
        reward = info['score'] - self.score 
        self.score = info['score'] 
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def close(self):
        self.game.close()

env = StreetFighter()
env.observation_space
env.action_space.sample()

obs = env.reset() # Reset game to starting state

done = False # Set flag to flase, tells us when we die
for game in range(1): # play one game 
   while not done: 
       env.render()
       obs, reward, done, info = env.step(env.action_space.sample()) # randomly take action 'max score 74900'
       #time.sleep(0.000001)
       #if reward > 0:
           #print(reward) # only a number when win, no rewards when getting along which makes it hard to train rl agent - sparse rewards
        #need to change this, as the spare rewards will make it hard to train the rl agent
env.close()

In [7]:
info
#max score': 37300

{'enemy_matches_won': 2,
 'score': 37300,
 'matches_won': 0,
 'continuetimer': 10,
 'enemy_health': 0,
 'health': 0}

## Removing the irrelevant colours

In [2]:
from gym import Env
from gym.spaces import MultiBinary, Box
#Creating custom environment that will carry out all the steps
#we pass our pass environment

class StreetFighter(Env): 
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) 
        self.action_space = MultiBinary(12) 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # want to also keep track of the previous frame to calculate a delta between the frames
        self.score = 0 
        return obs
    
    def preprocess(self, observation):
        # Convert the image to HSV color space
        hsv = cv2.cvtColor(observation, cv2.COLOR_BGR2HSV) 
        # Define color ranges for red, blue, and green in HSV
        lower_red1 = np.array([0, 50, 50])
        upper_red1 = np.array([10, 255, 255])
        lower_red2 = np.array([170, 50, 50])
        upper_red2 = np.array([180, 255, 255])
        lower_blue = np.array([100, 50, 50])
        upper_blue = np.array([140, 255, 255])
        lower_green = np.array([40, 50, 50])
        upper_green = np.array([80, 255, 255])       
        # Create masks for red, blue, and green colors
        mask_red1 = cv2.inRange(hsv, lower_red1, upper_red1)
        mask_red2 = cv2.inRange(hsv, lower_red2, upper_red2)
        mask_blue = cv2.inRange(hsv, lower_blue, upper_blue)
        mask_green = cv2.inRange(hsv, lower_green, upper_green)
        # Combine the masks
        mask_red = cv2.bitwise_or(mask_red1, mask_red2)
        mask = cv2.bitwise_or(mask_red, mask_blue)
        mask = cv2.bitwise_or(mask, mask_green)
        # Apply the mask to the original image
        filtered = cv2.bitwise_and(observation, observation, mask=mask) 
        # Convert the filtered image to grayscale
        gray = cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)
        # Resize the image
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84, 84, 1))
        return channels
    
    def step(self, action): 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 
        
        # Frame delta - use this to train our agent
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs 
        # Reshape the reward function
        reward = info['score'] - self.score 
        self.score = info['score'] 
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def close(self):
        self.game.close()

env = StreetFighter()
env.observation_space
env.action_space.sample()

obs = env.reset() # Reset game to starting state

done = False # Set flag to flase, tells us when we die
for game in range(1): # play one game 
   while not done: 
       env.render()
       obs, reward, done, info = env.step(env.action_space.sample()) # randomly take action 'max score 74900'
       #time.sleep(0.000001)
       #if reward > 0:
           #print(reward) # only a number when win, no rewards when getting along which makes it hard to train rl agent - sparse rewards
        #need to change this, as the spare rewards will make it hard to train the rl agent
env.close()

In [3]:
info
#5900

{'enemy_matches_won': 2,
 'score': 5900,
 'matches_won': 0,
 'continuetimer': 10,
 'enemy_health': 0,
 'health': 0}

In [1]:
#! python --version
#! pip install gym==0.21.0
#! pip install gym-retro==0.8.0
# !pip install stable-baselines3[extra]
# !pip install gymnasium[all]
# !pip install opencv-python
import retro
import numpy as np # to calculate the delta between the frames
import cv2 # for grayscaling
import time




import gym
from gym import Env
from gym.spaces import Box, MultiBinary
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

# Define the custom environment class
class StreetFighter(Env):
    def __init__(self, render_mode="human"):   #, render_mode='human'
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
        self.render_mode = render_mode
       
    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs

    def preprocess(self, observation):
        # Convert the image to HSV color space
        hsv = cv2.cvtColor(observation, cv2.COLOR_BGR2HSV)  
        # Define color ranges for red, blue, and green in HSV
        lower_red1 = np.array([0, 50, 50])
        upper_red1 = np.array([10, 255, 255])
        lower_red2 = np.array([170, 50, 50])
        upper_red2 = np.array([180, 255, 255])
        lower_blue = np.array([100, 50, 50])
        upper_blue = np.array([140, 255, 255])
        lower_green = np.array([40, 50, 50])
        upper_green = np.array([80, 255, 255])
        # Create masks for red, blue, and green colors
        mask_red1 = cv2.inRange(hsv, lower_red1, upper_red1)
        mask_red2 = cv2.inRange(hsv, lower_red2, upper_red2)
        mask_blue = cv2.inRange(hsv, lower_blue, upper_blue)
        mask_green = cv2.inRange(hsv, lower_green, upper_green) 
        # Combine the masks
        mask_red = cv2.bitwise_or(mask_red1, mask_red2)
        mask = cv2.bitwise_or(mask_red, mask_blue)
        mask = cv2.bitwise_or(mask, mask_green)    
        # Apply the mask to the original image
        filtered = cv2.bitwise_and(observation, observation, mask=mask) 
        # Convert the filtered image to grayscale
        gray = cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)      
        # Resize the image
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)      
        # Add the channels value
        channels = np.reshape(resize, (84, 84, 1))   
        return channels

    
    # def preprocess(self, observation):
    #     gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
    #     resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
    #     channels = np.reshape(resize, (84, 84, 1))
    #     return channels

    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score
        self.score = info['score']
        return frame_delta, reward, done, info
     
    def render(self, *args, **kwargs):
        self.game.render()
     
    def close(self):
        self.game.close()

# Create the environment
env = StreetFighter() #StreetFighter(render_mode='human') 
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log="./street_fighter_ppo/")
model.learn(total_timesteps=1)
model.save("ppo_street_fighter")
model = PPO.load("ppo_street_fighter", env=env)

# Run the game loop with the trained model
obs = env.reset()
done = False

while not done:
    env.render()
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    #time.sleep(0.000001)
    if reward > 0:
        print(reward)
    time.sleep(0.01)  # Slight delay to make the rendering visible    



Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./street_fighter_ppo/PPO_21


KeyboardInterrupt: 

In [None]:
info

In [2]:
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Box, MultiBinary
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

# Define the custom environment class
class StreetFighter(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)

    def reset(self, seed=None, options=None):
        if seed is not None:
            self.game.seed(seed)
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs, {}

    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels

    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score
        self.score = info['score']
        return frame_delta, reward, done, False, info

    def render(self, *args, **kwargs):
        self.game.render()

    def close(self):
        self.game.close()

# Create the environment
env = StreetFighter()
env = DummyVecEnv([lambda: env])

# Define the PPO model with the custom environment
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log="./street_fighter_ppo/")

# Train the model
model.learn(total_timesteps=100)

# Save the trained model
model.save("ppo_street_fighter")

# Load the trained model (if not already loaded)
model = PPO.load("ppo_street_fighter", env=env)

# Run the game loop with the trained model
obs = env.reset()
done = False

while not done:
    env.render()
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    #time.sleep(0.000001)
    if reward > 0:
        print(reward)

env.close()


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./street_fighter_ppo/PPO_19
-----------------------------
| time/              |      |
|    fps             | 191  |
|    iterations      | 1    |
|    time_elapsed    | 10   |
|    total_timesteps | 2048 |
-----------------------------
Wrapping the env in a VecTransposeImage.




[500.]
[500.]
[500.]
[300.]
[500.]
[100.]
[1000.]
[100.]
[100.]
[100.]
[100.]
[100.]
[10000.]
[500.]
[100.]
[1000.]
[300.]
[1000.]
[500.]
[500.]
[500.]
[300.]
[1000.]
[500.]
[500.]


In [3]:
info
#MAX score 27200

[{'enemy_matches_won': 2,
  'score': 20600,
  'matches_won': 1,
  'continuetimer': 10,
  'enemy_health': 0,
  'health': 0,
  'TimeLimit.truncated': False,
  'terminal_observation': array([[[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
  
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
  
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
  
         ...,
  
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
  
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
  
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]]], dtype=uint8)}]

In [1]:
import gym
from gym.wrappers import Monitor
import retro
from gym import Env, spaces
import numpy as np
import cv2
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback

# Custom environment definition
class StreetFighter(Env):
    def __init__(self, render_mode=None):
        super().__init__()
        self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = spaces.MultiBinary(12)
        self.render_mode = render_mode
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
        self.metadata = {'render.modes': ['human', 'rgb_array']}  # Set metadata for rendering

    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs
    
    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels
    
    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score
        self.score = info['score']
        return frame_delta, reward, done, info
    
    def render(self, mode='human'):
        if mode == 'rgb_array':
            return self.game.render(mode)
        elif mode == 'human':
            self.game.render(mode)
    
    def close(self):
        self.game.close()

# Mocking a spec for our custom environment to avoid warnings
class EnvSpec:
    def __init__(self, id):
        self.id = id

StreetFighter.spec = EnvSpec("StreetFighter-v0")

# Create and wrap the environment
env = StreetFighter(render_mode='human')
env = Monitor(env, './logs/', force=True)  # Wrap the environment with Monitor and force clear previous files

# Define the model
model = PPO('CnnPolicy', env, verbose=1, learning_rate=0.0003, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99)

# Define a callback for evaluation
eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000, n_eval_episodes=5, render=False)

# Train the model
timesteps = 1000000  # Adjust the number of timesteps based on your needs
model.learn(total_timesteps=timesteps, callback=eval_callback)

# Save the trained model
model.save("PPO_StreetFighter")

# Close the training environment to avoid emulator conflict
env.close()

# Create a new evaluation environment and wrap it with Monitor
eval_env = StreetFighter(render_mode='human')
eval_env = Monitor(eval_env, './logs/', force=True)

# Load the trained model
model = PPO.load("PPO_StreetFighter")

# Evaluate the model
mean_reward, _ = evaluate_policy(model, eval_env, render=True, n_eval_episodes=10)
print(f"Mean reward: {mean_reward}")

# Close the evaluation environment
eval_env.close()


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




DependencyNotInstalled: Found neither the ffmpeg nor avconv executables. On OS X, you can install ffmpeg via `brew install ffmpeg`. On most Ubuntu variants, `sudo apt-get install ffmpeg` should do it. On Ubuntu 14.04, however, you'll need to install avconv with `sudo apt-get install libav-tools`. Alternatively, please install imageio-ffmpeg with `pip install imageio-ffmpeg`