In [1]:
import os
import random

import numpy as np
import torch
from src.test_functions import NeuralNetworkOneLayer
from tqdm.notebook import tqdm

os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = ".80"

import pickle

# Sample Data

In [2]:
# Read data
# sample_file = "./InvertedDoublePendulum-v4_samples200000.pkl"
# save_name = "InvertedDoublePendulum-v4"
# # Reward: 6.5 ~ 9.5, Err: 0.05

# sample_file = "./HalfCheetah-v4_samples2000000.pkl"
# save_name = "HalfCheetah-v4"
# # Reward: -56 ~ 1.28  , Err: 50

# sample_file = "./Swimmer-v4_samples2000000.pkl"
# save_name = "Swimmer-v4"
# # # Reward: -3.5 ~ 3.5, Err: 0.3

# sample_file = "Hopper-v4_samples2000000.pkl"
# save_name = "Hopper-v4"
# Reward: -1.8746137439401072 , 3.90962180717704, Err: 0.21



with open(sample_file, "rb") as fp:
    sample_dict = pickle.load(fp)

observations = sample_dict["observation"]
actions = sample_dict["actions"]
rewards = sample_dict["rewards"]

In [3]:
# Function to scale the values into bounds
BOUND = 9.9999


def scale_to_bounds(lb, ub, bound):
    scale = bound / np.maximum(np.abs(lb), np.abs(ub))
    return scale

In [6]:
# Scale the observations/actions to uniform bounds
lb, ub = np.min(actions), np.max(actions)
print(f"Bounds for actions are: lb = {lb}, ub = {ub}")


lb, ub = np.min(observations), np.max(observations)
print(f"Bounds for observations are: lb = {lb}, ub = {ub}")
scale = scale_to_bounds(lb, ub, BOUND)
observations *= scale
lb, ub = np.min(observations), np.max(observations)
print(f"Bounds after scaling are: lb = {lb}, ub = {ub}")

Bounds for actions are: lb = -9.999996642283573, ub = 9.999999824710532
Bounds for observations are: lb = 0.0, ub = 9.9999
Bounds after scaling are: lb = 0.0, ub = 9.9999


In [7]:
# Create sample array
xs = np.hstack((actions, observations))
ys = -rewards

print("Collected samples are in the shape of: ", xs.shape, ys.shape)
print(f"Rewards range: {np.min(rewards)} , {np.max(rewards)}")

Collected samples are in the shape of:  (2000000, 14) (2000000,)
Rewards range: -1.8746137439401072 , 3.90962180717704


# Neural Network

In [8]:
# Define variables
hidden_dims = 16
num_epochs = 100
batch_size = 1000
learning_rate = 0.00001
use_device = "cuda:0"  # "cuda:0" or "cpu"

In [9]:
# Setup NN 
input_dims = xs.shape[-1]
num_samples = xs.shape[0]
bounds = np.array([-10.0, 10.0]*input_dims)


# Create a neural network with one hidden layer
nn = NeuralNetworkOneLayer(dims=input_dims, domain=bounds, hidden_dims=hidden_dims)

criteria = torch.nn.MSELoss()
optimizer = torch.optim.Adam(nn.model.parameters(), lr=learning_rate)

try:
    device = torch.device(use_device)
except:
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [10]:
nn.model.to(device)
nn.model.train()

print(f"Training the neural network for {num_epochs} epochs")
for epoch in tqdm(range(num_epochs)):
    shuffled_indices = np.random.permutation(num_samples)
    xs_shuffled = xs[shuffled_indices]
    ys_shuffled = ys[shuffled_indices]

    for i in range(0, num_samples, batch_size):
        x = torch.FloatTensor(xs_shuffled[i : i + batch_size])
        y = torch.FloatTensor(ys_shuffled[i : i + batch_size])

        x = x.to(device)
        y = y.to(device)

        optimizer.zero_grad()
        y_pred = nn.model(x)
        loss = criteria(y_pred, y)
        loss.backward()
        optimizer.step()
        if i == 0:
            print(f"Loss at step {epoch} = {loss.item()}")

Training the neural network for 100 epochs


  0%|          | 0/100 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Loss at step 0 = 18.233884811401367
Loss at step 1 = 2.335304021835327
Loss at step 2 = 0.26897406578063965
Loss at step 3 = 0.2274719774723053
Loss at step 4 = 0.22084464132785797
Loss at step 5 = 0.23096659779548645
Loss at step 6 = 0.20581994950771332
Loss at step 7 = 0.22670578956604004
Loss at step 8 = 0.20735017955303192
Loss at step 9 = 0.2381734848022461
Loss at step 10 = 0.23744513094425201
Loss at step 11 = 0.1987154334783554
Loss at step 12 = 0.22855956852436066
Loss at step 13 = 0.22944533824920654
Loss at step 14 = 0.23685184121131897
Loss at step 15 = 0.24826891720294952
Loss at step 16 = 0.2204466015100479
Loss at step 17 = 0.21390578150749207
Loss at step 18 = 0.2000812441110611
Loss at step 19 = 0.2198389619588852
Loss at step 20 = 0.24096256494522095
Loss at step 21 = 0.21327118575572968
Loss at step 22 = 0.25725486874580383
Loss at step 23 = 0.19167888164520264
Loss at step 24 = 0.2197103500366211
Loss at step 25 = 0.23100662231445312
Loss at step 26 = 0.233393207192

In [12]:
# Evalute the neural network
nn.model.eval()
losses = []
for i in range(0, num_samples, batch_size):
    x = torch.FloatTensor(xs[i : i + batch_size])
    y = torch.FloatTensor(ys[i : i + batch_size])

    x = x.to(device)
    y = y.to(device)

    with torch.no_grad():
        y_pred = nn.model(x)
    loss = criteria(y_pred, y)
    losses.append(loss.item())

print(f"Mean loss: {np.mean(losses)}")
print(f"Max/Min loss: {np.max(losses)} , {np.min(losses)}")

Mean loss: 0.21844428827241064
Max/Min loss: 0.49200719594955444 , 0.12358696013689041


In [50]:
# Save the model
print("Saving the model")
model_info = {
    "input_dims": input_dims,
    "hidden_dims": hidden_dims,
    "test_function": save_name,
    "bounds": bounds,
    "state_dict": nn.model.state_dict(),
}
torch.save(
    model_info,
    f"src/nn_models/nn_one_layer_{save_name}_{input_dims}_{hidden_dims}.pt",
)

Saving the model
