In [1]:
import torch
import torch.nn as nn

class EncoderCNN(nn.Module):
    def __init__(self, input_shape, output_size):
        super(EncoderCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )
        # Calculate output size of conv_layers
        conv_out_size = self._get_conv_out(input_shape)
        self.fc_layers = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, output_size)
        )

    def _get_conv_out(self, shape):
        o = self.conv_layers(torch.zeros(1, *shape))
        return int(torch.prod(torch.tensor(o.size())))

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x


In [2]:
class InverseModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(InverseModel, self).__init__()
        self.fc_layers = nn.Sequential(
            nn.Linear(input_size * 2, 256),
            nn.ReLU(),
            nn.Linear(256, output_size)
        )

    def forward(self, state_encoding, next_state_encoding):
        x = torch.cat((state_encoding, next_state_encoding), dim=1)
        x = self.fc_layers(x)
        return x


In [3]:
class ForwardModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(ForwardModel, self).__init__()
        self.fc_layers = nn.Sequential(
            nn.Linear(input_size + output_size, 256),
            nn.ReLU(),
            nn.Linear(256, input_size)
        )

    def forward(self, state_action, next_state_encoding):
        x = torch.cat((state_action, next_state_encoding), dim=1)
        x = self.fc_layers(x)
        return x


In [2]:
import mujoco
import gymnasium as gym
import torch
import torch.nn as nn


# Initialize Mujoco's Hopper environment
env = gym.make('Hopper-v2')
state_shape = env.observation_space.shape
action_size = env.action_space.shape[0]

# Initialize ICM components
encoder = EncoderCNN(state_shape, encoding_size)
inverse_model = InverseModel(encoding_size * 2, action_size)
forward_model = ForwardModel(encoding_size + action_size, encoding_size)

# Initialize DQN agent
dqn_agent = DQNAgent(state_size=encoding_size, action_size=action_size)

# Define optimizer and loss function for ICM
optimizer_encoder = torch.optim.Adam(encoder.parameters(), lr=lr_encoder)
optimizer_inverse = torch.optim.Adam(inverse_model.parameters(), lr=lr_inverse)
optimizer_forward = torch.optim.Adam(forward_model.parameters(), lr=lr_forward)
criterion = nn.MSELoss()

# Training loop
for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0
    while not done:
        # Encode state
        state_encoding = encoder(torch.FloatTensor(state).unsqueeze(0))
        
        # Choose action using DQN agent
        action = dqn_agent.select_action(state_encoding)
        
        # Take action and observe next state, reward
        next_state, reward, done, _ = env.step(action)
        
        # Encode next state
        next_state_encoding = encoder(torch.FloatTensor(next_state).unsqueeze(0))
        
        # Compute inverse model loss
        predicted_action = inverse_model(state_encoding, next_state_encoding)
        inverse_loss = criterion(predicted_action, torch.FloatTensor(action).unsqueeze(0))
        
        # Compute forward model loss
        predicted_next_state_encoding = forward_model(torch.cat((state_encoding, action), dim=1))
        forward_loss = criterion(predicted_next_state_encoding, next_state_encoding)
        
        # Total loss
        total_loss = inverse_loss + forward_loss
        
        # Update ICM parameters
        optimizer_inverse.zero_grad()
        optimizer_forward.zero_grad()
        total_loss.backward()
        optimizer_inverse.step()
        optimizer_forward.step()
        
        # Update DQN parameters
        dqn_agent.update_parameters(total_reward)
        
        # Update state
        state = next_state
        total_reward += reward
        
    # Print total reward for the episode
    print(f"Episode {episode}: Total Reward = {total_reward}")



You appear to be missing MuJoCo.  We expected to find the file here: C:\Users\Vimarsh\.mujoco\mujoco210

This package only provides python bindings, the library must be installed separately.

Please follow the instructions on the README to install MuJoCo

    https://github.com/openai/mujoco-py#install-mujoco

Which can be downloaded from the website

    https://www.roboti.us/index.html



Exception: 
You appear to be missing MuJoCo.  We expected to find the file here: C:\Users\Vimarsh\.mujoco\mujoco210

This package only provides python bindings, the library must be installed separately.

Please follow the instructions on the README to install MuJoCo

    https://github.com/openai/mujoco-py#install-mujoco

Which can be downloaded from the website

    https://www.roboti.us/index.html
