In [1]:
import gym
from gym.wrappers import Monitor
import retro
from gym import Env
from gym.spaces import MultiBinary, Box
import numpy as np
import cv2
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback

# Custom environment definition
class StreetFighter(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs
    
    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels
    
    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score
        self.score = info['score']
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
    
    def close(self):
        self.game.close()

# Create and wrap the environment
env = StreetFighter()
env = Monitor(env, './logs/', force=True)  # Wrap the environment with Monitor and force clear previous files

# Define the model
model = PPO('CnnPolicy', env, verbose=1, learning_rate=0.0003, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99)

# Define a callback for evaluation
eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000, n_eval_episodes=5, render=False)

# Train the model
timesteps = 10000
model.learn(total_timesteps=timesteps, callback=eval_callback)

# Save the trained model
model.save("PPO_StreetFighter")

# Close the training environment to avoid emulator conflict
env.close()

# Create a new evaluation environment and wrap it with Monitor
eval_env = StreetFighter()
eval_env = Monitor(eval_env, './logs/', force=True)

# Load the trained model
model = PPO.load("PPO_StreetFighter")

# Evaluate the model
mean_reward, _ = evaluate_policy(model, eval_env, render=True, n_eval_episodes=10)
print(f"Mean reward: {mean_reward}")

# Close the evaluation environment
eval_env.close()


  logger.warn(


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




-----------------------------
| time/              |      |
|    fps             | 181  |
|    iterations      | 1    |
|    time_elapsed    | 11   |
|    total_timesteps | 2048 |
-----------------------------
--------------------------------------
| time/                   |          |
|    fps                  | 79       |
|    iterations           | 2        |
|    time_elapsed         | 51       |
|    total_timesteps      | 4096     |
| train/                  |          |
|    approx_kl            | 2.110331 |
|    clip_fraction        | 0.589    |
|    clip_range           | 0.2      |
|    entropy_loss         | -7.82    |
|    explained_variance   | 0.00014  |
|    learning_rate        | 0.0003   |
|    loss                 | 6.39e+03 |
|    n_updates            | 10       |
|    policy_gradient_loss | 0.152    |
|    value_loss           | 9.64e+03 |
--------------------------------------




Error: Tried to reset environment which is not done. While the monitor is active for (unknown), you cannot call reset() unless the episode is over.

## small reward + changed the hyperparameter learning_rate=0.0001

In [None]:
import gym
from gym.wrappers import Monitor
import retro
from gym import Env
from gym.spaces import MultiBinary, Box
import numpy as np
import cv2
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback

# Custom environment definition
class StreetFighter(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs
    
    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels
    
    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score + 0.1  # Add a small reward for staying alive
        self.score = info['score']
        return frame_delta, reward, done, info

    
    def render(self, *args, **kwargs):
        self.game.render()
    
    def close(self):
        self.game.close()

# Create and wrap the environment
env = StreetFighter()
env = Monitor(env, './logs/', force=True)  # Wrap the environment with Monitor and force clear previous files

# Define the model
model = PPO('CnnPolicy', env, verbose=1, learning_rate=0.0001, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99)

# Define a callback for evaluation
eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000, n_eval_episodes=5, render=False)

# Train the model
timesteps = 10000
model.learn(total_timesteps=timesteps, callback=eval_callback)

# Save the trained model
model.save("PPO_StreetFighter")

# Close the training environment to avoid emulator conflict
env.close()

# Create a new evaluation environment and wrap it with Monitor
eval_env = StreetFighter()
eval_env = Monitor(eval_env, './logs/', force=True)

# Load the trained model
model = PPO.load("PPO_StreetFighter")

# Evaluate the model
mean_reward, _ = evaluate_policy(model, eval_env, render=True, n_eval_episodes=10)
print(f"Mean reward: {mean_reward}")

# Close the evaluation environment
eval_env.close()


## flip image

In [None]:
import gym
from gym.wrappers import Monitor
import retro
from gym import Env
from gym.spaces import MultiBinary, Box
import numpy as np
import cv2
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback

# Custom environment definition
class StreetFighter(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs
    
    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        if np.random.rand() > 0.5:
            resize = cv2.flip(resize, 1)  # Flip the image horizontally
        channels = np.reshape(resize, (84, 84, 1))
        return channels

    
    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score + 0.1  # Add a small reward for staying alive
        self.score = info['score']
        return frame_delta, reward, done, info

    
    def render(self, *args, **kwargs):
        self.game.render()
    
    def close(self):
        self.game.close()

# Create and wrap the environment
env = StreetFighter()
env = Monitor(env, './logs/', force=True)  # Wrap the environment with Monitor and force clear previous files

# Define the model
model = PPO('CnnPolicy', env, verbose=1, learning_rate=0.0001, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99)

# Define a callback for evaluation
eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000, n_eval_episodes=5, render=False)

# Train the model
timesteps = 10000
model.learn(total_timesteps=timesteps, callback=eval_callback)

# Save the trained model
model.save("PPO_StreetFighter")

# Close the training environment to avoid emulator conflict
env.close()

# Create a new evaluation environment and wrap it with Monitor
eval_env = StreetFighter()
eval_env = Monitor(eval_env, './logs/', force=True)

# Load the trained model
model = PPO.load("PPO_StreetFighter")

# Evaluate the model
mean_reward, _ = evaluate_policy(model, eval_env, render=True, n_eval_episodes=10)
print(f"Mean reward: {mean_reward}")

# Close the evaluation environment
eval_env.close()


## Removing the irrelevant colours

In [None]:
import gym
from gym.wrappers import Monitor
import retro
from gym import Env
from gym.spaces import MultiBinary, Box
import numpy as np
import cv2
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback

# Custom environment definition
class StreetFighter(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs
    
    def preprocess(self, observation):
        # Convert the image to HSV color space
        hsv = cv2.cvtColor(observation, cv2.COLOR_BGR2HSV) 
        # Define color ranges for red, blue, and green in HSV
        lower_red1 = np.array([0, 50, 50])
        upper_red1 = np.array([10, 255, 255])
        lower_red2 = np.array([170, 50, 50])
        upper_red2 = np.array([180, 255, 255])
        lower_blue = np.array([100, 50, 50])
        upper_blue = np.array([140, 255, 255])
        lower_green = np.array([40, 50, 50])
        upper_green = np.array([80, 255, 255])       
        # Create masks for red, blue, and green colors
        mask_red1 = cv2.inRange(hsv, lower_red1, upper_red1)
        mask_red2 = cv2.inRange(hsv, lower_red2, upper_red2)
        mask_blue = cv2.inRange(hsv, lower_blue, upper_blue)
        mask_green = cv2.inRange(hsv, lower_green, upper_green)
        # Combine the masks
        mask_red = cv2.bitwise_or(mask_red1, mask_red2)
        mask = cv2.bitwise_or(mask_red, mask_blue)
        mask = cv2.bitwise_or(mask, mask_green)
        # Apply the mask to the original image
        filtered = cv2.bitwise_and(observation, observation, mask=mask) 
        # Convert the filtered image to grayscale
        gray = cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)
        # Resize the image
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84, 84, 1))
        return channels

    
    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score + 0.1  # Add a small reward for staying alive
        self.score = info['score']
        return frame_delta, reward, done, info

    
    def render(self, *args, **kwargs):
        self.game.render()
    
    def close(self):
        self.game.close()

# Create and wrap the environment
env = StreetFighter()
env = Monitor(env, './logs/', force=True)  # Wrap the environment with Monitor and force clear previous files

# Define the model
model = PPO('CnnPolicy', env, verbose=1, learning_rate=0.0001, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99)

# Define a callback for evaluation
eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000, n_eval_episodes=5, render=False)

# Train the model
timesteps = 10000
model.learn(total_timesteps=timesteps, callback=eval_callback)

# Save the trained model
model.save("PPO_StreetFighter")

# Close the training environment to avoid emulator conflict
env.close()

# Create a new evaluation environment and wrap it with Monitor
eval_env = StreetFighter()
eval_env = Monitor(eval_env, './logs/', force=True)

# Load the trained model
model = PPO.load("PPO_StreetFighter")

# Evaluate the model
mean_reward, _ = evaluate_policy(model, eval_env, render=True, n_eval_episodes=10)
print(f"Mean reward: {mean_reward}")

# Close the evaluation environment
eval_env.close()
