In [1]:
# import libraries
import retro # to play street fighter using o ROM
import time
from gym import Env # to wrap the environment
from gym.spaces import MultiBinary, Box
import numpy as np # to calculate the delta between the frames
import cv2 # for grayscaling
from stable_baselines3 import PPO
from gym.wrappers import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback


In [2]:
# Creating custom environment that will carry out all the steps we pass our environment

screen_size = 84

class StreetFighter(Env): 
    def __init__(self):
        super().__init__()
        # Specify action space and observation space 
        # resizing and making gray scale
        self.observation_space = Box(low=0, high=255, shape=(screen_size, screen_size, 1), dtype=np.uint8) 
        self.action_space = MultiBinary(12)
        # Startup and instance of the game 
        # additional parameter to filter only valid actions
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # want to also keep track of the previous frame to calculate a delta between the frames
        
        # Create a attribute to hold the score delta 
        self.score = 0 

        print("reset")
        return obs
    
    def preprocess(self, observation):
        # Convert the image to HSV color space
        hsv = cv2.cvtColor(observation, cv2.COLOR_BGR2HSV) 
        # Define color ranges for red, blue, and green in HSV
        lower_red1 = np.array([0, 50, 50])
        upper_red1 = np.array([10, 255, 255])
        lower_red2 = np.array([170, 50, 50])
        upper_red2 = np.array([180, 255, 255])
        lower_blue = np.array([100, 50, 50])
        upper_blue = np.array([140, 255, 255])
        lower_green = np.array([40, 50, 50])
        upper_green = np.array([80, 255, 255])       
        # Create masks for red, blue, and green colors
        mask_red1 = cv2.inRange(hsv, lower_red1, upper_red1)
        mask_red2 = cv2.inRange(hsv, lower_red2, upper_red2)
        mask_blue = cv2.inRange(hsv, lower_blue, upper_blue)
        mask_green = cv2.inRange(hsv, lower_green, upper_green)
        # Combine the masks
        mask_red = cv2.bitwise_or(mask_red1, mask_red2)
        mask = cv2.bitwise_or(mask_red, mask_blue)
        mask = cv2.bitwise_or(mask, mask_green)
        # Apply the mask to the original image
        filtered = cv2.bitwise_and(observation, observation, mask=mask) 
        # Convert the filtered image to grayscale
        gray = cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)
        # Resize the image
        resize = cv2.resize(gray, (screen_size, screen_size), interpolation=cv2.INTER_CUBIC)
        if np.random.rand() > 0.5:
            resize = cv2.flip(resize, 1)  # Flip the image horizontally
        # Add the channels value
        channels = np.reshape(resize, (screen_size, screen_size, 1))
        return channels
    
    def step(self, action): 
        # Take a step
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 
        
        # Frame delta - use this to train our agent
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = info['score'] - self.score + 0.1  # Add a small reward for staying alive
        self.score = info['score'] 
        
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def close(self):
        self.game.close()



In [3]:
env = StreetFighter()
env = Monitor(env, './logs/', force=True)  # Wrap the environment with Monitor and force clear previous files

# This is the AI model started
#model = PPO("CnnPolicy", env, verbose=1, tensorboard_log="training/Logs")
model = PPO('CnnPolicy', env, verbose=1, learning_rate=0.0001, n_steps=4096, batch_size=64, n_epochs=10, gamma=0.99)


# Define a callback for evaluation
eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000, n_eval_episodes=5, render=False)

# Train the AI model, this is where the AI model starts to learn
timesteps = 1
model.learn(total_timesteps=timesteps, callback=eval_callback)

# save the model
#model.save(f"training/models/PPO_{timesteps}_SF")
model.save("training/models/PPO_StreetFighter")

# Close the training environment to avoid emulator conflict
env.close()

# Create a new evaluation environment and wrap it with Monitor
eval_env = StreetFighter()
eval_env = Monitor(eval_env, './logs/', force=True)

# Load the trained model
model = PPO.load("training/models/PPO_StreetFighter")

# Evaluate the model
mean_reward, _ = evaluate_policy(model, eval_env, render=True, n_eval_episodes=20)
print(f"Mean reward: {mean_reward}")

# Close the evaluation environment
eval_env.close()

  logger.warn(


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




reset
-----------------------------
| time/              |      |
|    fps             | 149  |
|    iterations      | 1    |
|    time_elapsed    | 27   |
|    total_timesteps | 4096 |
-----------------------------


  logger.warn(


reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
reset
Mean reward: 3284.7549508530647


In [4]:
# create new environment
env = StreetFighter()
env.observation_space.shape
env.action_space.shape

model = PPO.load("training/models/PPO_StreetFighter")

# Reset game to starting state
obs = env.reset()
# Set flag to false
done = False
for game in range(1): 
    while not done: 
        if done: 
            obs = env.reset()
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        env.render()
        
        #time.sleep(0.01)
        # print(reward)
        #if reward > 0: 
            #print(reward)

print(info)
env.close()

reset
{'enemy_matches_won': 2, 'score': 2900, 'matches_won': 0, 'continuetimer': 10, 'enemy_health': 0, 'health': 0}
