In [1]:
#! python --version
#! pip install gym==0.21.0
#! pip install gym-retro==0.8.0
# !pip install stable-baselines3[extra]
# !pip install gymnasium[all]
#!pip install opencv-python
import retro
import numpy as np # to calculate the delta between the frames
import cv2 # for grayscaling
import time

In [None]:
#! python -m retro.import .
# retro.data.list_games()
# creating our game environment
env = retro.make(game="StreetFighterIISpecialChampionEdition-Genesis")
env.observation_space
env.action_space
env.action_space.sample()

obs = env.reset() # Reset game to starting state

done = False # Set flag to flase, tells us when we dies
for game in range(1): # play one game 
   while not done: 
       if done: 
           obs = env.reset() # when we do die, we start the game again
       env.render()
       obs, reward, done, info = env.step(env.action_space.sample()) # randomly take action
       time.sleep(0.000000001)
       if reward > 0:
           print(reward) # only a number when win, no rewards when getting along which makes it hard to train rl agent - sparse rewards
        #need to change this, as the spare rewards will make it hard to train the rl agent
info
env.close()

#

500.0
1000.0
500.0
300.0
400.0
1000.0


In [None]:
from gym import Env
from gym.spaces import MultiBinary, Box
#Creating custom environment that will carry out all the steps
#we pass our pass environment

class StreetFighter(Env): 
    def __init__(self):
        super().__init__()
        # Specify action space and observation space 
        # resizing and making gray scale
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) 
        self.action_space = MultiBinary(12)
        # Startup and instance of the game 
        # additional parameter to filter only valid actions
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # want to also keep track of the previous frame to calculate a delta between the frames
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): 
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def step(self, action): 
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 
        
        # Frame delta - use this to train our agent
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = info['score'] - self.score 
        self.score = info['score'] 
        
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def close(self):
        self.game.close()


env = StreetFighter()
env.observation_space
env.action_space.sample()

obs = env.reset() # Reset game to starting state

done = False # Set flag to flase, tells us when we die
for game in range(1): # play one game 
   while not done: 
       env.render()
       obs, reward, done, info = env.step(env.action_space.sample()) # randomly take action 'max score 74900'
       time.sleep(0.000001)
       if reward > 0:
           print(reward) # only a number when win, no rewards when getting along which makes it hard to train rl agent - sparse rewards
        #need to change this, as the spare rewards will make it hard to train the rl agent
info
env.close()

In [None]:
import gym
from gym import Env
from gym.spaces import Box, MultiBinary
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

# Define the custom environment class
class StreetFighter(Env):
    def __init__(self, render_mode='human'):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
        self.render_mode = render_mode

    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs

    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels

    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score
        self.score = info['score']
        return frame_delta, reward, done, info

    def render(self, *args, **kwargs):
        self.game.render(mode=self.render_mode)
     
    def close(self):
        self.game.close()

# Create the environment
env = StreetFighter(render_mode='human') #StreetFighter()
env = DummyVecEnv([lambda: env])

# Define the PPO model with the custom environment
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log="./street_fighter_ppo/")

# Train the model
model.learn(total_timesteps=10)

# Save the trained model
model.save("ppo_street_fighter")

# Load the trained model (if not already loaded)
model = PPO.load("ppo_street_fighter", env=env)

# Run the game loop with the trained model
obs = env.reset()
done = False

while not done:
    env.render()
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    time.sleep(0.000001)
    if reward > 0:
        print(reward)


info
env.close()

#100000 steps max score

In [None]:
# import gymnasium as gym
# from gymnasium import Env
# from gymnasium.spaces import Box, MultiBinary
# from stable_baselines3 import PPO
# from stable_baselines3.common.vec_env import DummyVecEnv
# from stable_baselines3.common.evaluation import evaluate_policy

# # Define the custom environment class
# class StreetFighter(Env):
#     def __init__(self):
#         super().__init__()
#         self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
#         self.action_space = MultiBinary(12)
#         self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)

#     def reset(self, seed=None, options=None):
#         if seed is not None:
#             self.game.seed(seed)
#         obs = self.game.reset()
#         obs = self.preprocess(obs)
#         self.previous_frame = obs
#         self.score = 0
#         return obs, {}

#     def preprocess(self, observation):
#         gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
#         resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
#         channels = np.reshape(resize, (84, 84, 1))
#         return channels

#     def step(self, action):
#         obs, reward, done, info = self.game.step(action)
#         obs = self.preprocess(obs)
#         frame_delta = obs - self.previous_frame
#         self.previous_frame = obs
#         reward = info['score'] - self.score
#         self.score = info['score']
#         return frame_delta, reward, done, False, info

#     def render(self, *args, **kwargs):
#         self.game.render()

#     def close(self):
#         self.game.close()

# # Create the environment
# env = StreetFighter()
# env = DummyVecEnv([lambda: env])

# # Define the PPO model with the custom environment
# model = PPO('CnnPolicy', env, verbose=1, tensorboard_log="./street_fighter_ppo/")

# # Train the model
# model.learn(total_timesteps=100)

# # Save the trained model
# model.save("ppo_street_fighter")

# # Load the trained model (if not already loaded)
# model = PPO.load("ppo_street_fighter", env=env)

# # Run the game loop with the trained model
# obs = env.reset()
# done = False

# while not done:
#     env.render()
#     action, _states = model.predict(obs)
#     obs, reward, done, info = env.step(action)
#     time.sleep(0.000001)
#     if reward > 0:
#         print(reward)

# info
#env.close()

#MAX score 27200