In [1]:
import gym
from gym.wrappers import Monitor
import retro
from gym import Env
from gym.spaces import MultiBinary, Box, Discrete
import numpy as np
import cv2
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback

class StreetFighter(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = Discrete(12)  # Assuming you convert MultiBinary to Discrete
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs
    
    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels
    
    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score + 0.1  # Add a small reward for staying alive
        self.score = info['score']
        return frame_delta, reward, done, info

    def render(self, *args, **kwargs):
        self.game.render()
    
    def close(self):
        self.game.close()

# Create and wrap the environment
env = StreetFighter()
env = Monitor(env, './logs/', force=True)

# Define the DQN model
model = DQN('CnnPolicy', env, verbose=1, learning_rate=0.0001, buffer_size=100000, learning_starts=1000, batch_size=32, tau=0.1, gamma=0.99, train_freq=4, target_update_interval=10000)

# Define a callback for evaluation
eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000, n_eval_episodes=5, render=False)

# Train the model
model.learn(total_timesteps=100000, callback=eval_callback)

# Save the trained model
model.save("DQN_StreetFighter")

# Close the training environment to avoid emulator conflict
env.close()

# Create a new evaluation environment and wrap it with Monitor
eval_env = StreetFighter()
eval_env = Monitor(eval_env, './logs/', force=True)

# Load the trained model
model = DQN.load("DQN_StreetFighter")

# Evaluate the model
mean_reward, _ = evaluate_policy(model, eval_env, render=True, n_eval_episodes=10)
print(f"Mean reward: {mean_reward}")

# Close the evaluation environment
eval_env.close()


  logger.warn(


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




IndexError: invalid index to scalar variable.

## Tuned Parameters

In [None]:
import gym
from gym.wrappers import Monitor
import retro
from gym import Env
from gym.spaces import MultiBinary, Box, Discrete
import numpy as np
import cv2
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback

class StreetFighter(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = Discrete(12)  # Assuming you convert MultiBinary to Discrete
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        return obs
    
    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels
    
    def step(self, action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = (info['score'] - self.score) + 0.1  # Add a small reward for staying alive
        self.score = info['score']
        return frame_delta, reward, done, info

    def render(self, *args, **kwargs):
        self.game.render()
    
    def close(self):
        self.game.close()

# Create and wrap the environment
env = StreetFighter()
env = Monitor(env, './logs/', force=True)

# Define the DQN model with tuned parameters
model = DQN(
    'CnnPolicy',
    env,
    verbose=1,
    learning_rate=0.0005,  # Slightly higher learning rate for faster convergence
    buffer_size=200000,  # Larger buffer size for more diverse experiences
    learning_starts=5000,  # Start learning after more steps
    batch_size=64,  # Larger batch size for more stable updates
    tau=0.1,
    gamma=0.98,  # Discount factor for future rewards
    train_freq=4,  # Train every 4 steps
    target_update_interval=1000,  # More frequent target network updates
)

# Define a callback for evaluation
eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000, n_eval_episodes=5, render=False)

# Train the model
model.learn(total_timesteps=1, callback=eval_callback)

# Save the trained model
model.save("DQN_StreetFighter")

# Close the training environment to avoid emulator conflict
env.close()

# Create a new evaluation environment and wrap it with Monitor
eval_env = StreetFighter()
eval_env = Monitor(eval_env, './logs/', force=True)

# Load the trained model
model = DQN.load("DQN_StreetFighter")

# Evaluate the model
mean_reward, _ = evaluate_policy(model, eval_env, render=True, n_eval_episodes=10)
print(f"Mean reward: {mean_reward}")

# Close the evaluation environment
eval_env.close()
