# Tube Learning Notebook

## DataFrame Construction

In [94]:
import pandas as pd
import ast
import numpy as np
import glob

In [103]:
# Function to safely evaluate lists
def safe_eval(col):
    try:
        return ast.literal_eval(col)
    except ValueError:
        return col  # Return as is if it's not a string representation of a list

# Initialize an empty DataFrame
all_data = pd.DataFrame()

# Use glob to find all the files that match the pattern
file_list = glob.glob('data/trajectory_data_*.csv')

# Loop through the files sorted to maintain the order
for filename in sorted(file_list):
    temp_df = pd.read_csv(filename)
    # Apply transformations right after reading
    temp_df['joint_positions'] = temp_df['joint_positions'].apply(safe_eval)
    temp_df['joint_velocities'] = temp_df['joint_velocities'].apply(safe_eval)
    all_data = pd.concat([all_data, temp_df], ignore_index=True)


In [104]:
# Now create the derived columns
all_data['x_t'] = all_data.apply(lambda row: row['joint_positions'] + row['joint_velocities'], axis=1)
all_data['u_t'] = all_data.apply(lambda row: [row['velocity_x'], row['velocity_y']], axis=1)
all_data['z_t'] = all_data.apply(lambda row: [row['traj_x'], row['traj_y']], axis=1)
all_data['v_t'] = all_data.apply(lambda row: [row['reduced_command_x'], row['reduced_command_y']], axis=1)


# Since w_t and w_{t+1} are derived from calculations, no need for safe_eval
all_data['w_t'] = np.sqrt((all_data['position_x'] - all_data['traj_x'])**2 + (all_data['position_y'] - all_data['traj_y'])**2)
all_data['group'] = all_data['episode_number'].astype(str) + '_' + all_data['robot_index'].astype(str)

# Example to debug with a smaller subset
all_data['x_{t+1}'] = all_data.groupby('group')['x_t'].shift(-1)
all_data['z_{t+1}'] = all_data.groupby('group')['z_t'].shift(-1)
all_data['w_{t+1}'] = all_data.groupby('group')['w_t'].shift(-1)

# Function to drop the first and last 10 data points from each episode
def drop_edges(group):
    return group.iloc[1:-1]
all_data = all_data.groupby('group', group_keys=False).apply(drop_edges)

# Drop rows where x_{t+1}, z_{t+1}, and w_{t+1} do not exist
all_data.dropna(subset=['x_{t+1}', 'z_{t+1}', 'w_{t+1}'], inplace=True)

# Select and order the final columns
final_df = all_data[['group', 'x_t', 'u_t', 'z_t', 'v_t', 'w_t', 'x_{t+1}', 'z_{t+1}', 'w_{t+1}']]

We have $D=\{\omega_t, x_t, u_t, z_t, v_t, \omega_{t+1}, x_{t+1}, z_{t+1}\}$:

In [105]:
# Print the final DataFrame
final_df.head()

Unnamed: 0,group,x_t,u_t,z_t,v_t,w_t,x_{t+1},z_{t+1},w_{t+1}
1000,1.0_0,"[-0.016817141324281693, 0.41679614782333374, -...","[0.499429464331994, -0.0896594062640303]","[0.0083996439705537, -0.0044380149992368]","[0.4199822079150199, -0.2219007549217102]",0.013292,"[-0.023087259382009506, 0.4356231689453125, -0...","[0.0167992879411074, -0.0088760299984736]",0.015988
2000,1.0_0,"[-0.023087259382009506, 0.4356231689453125, -0...","[0.4589464469104678, -0.1296557248570755]","[0.0167992879411074, -0.0088760299984736]","[0.4199822079150199, -0.2219007549217102]",0.015988,"[-0.0035673792008310556, 0.4758116602897644, -...","[0.0251989319116611, -0.0133140449977105]",0.015224
3000,1.0_0,"[-0.0035673792008310556, 0.4758116602897644, -...","[0.4631590609554294, -0.2217794437849829]","[0.0251989319116611, -0.0133140449977105]","[0.4199822079150199, -0.2219007549217102]",0.015224,"[0.008097478188574314, 0.4927358627319336, -0....","[0.0335985758822148, -0.0177520599969473]",0.01343
4000,1.0_0,"[0.008097478188574314, 0.4927358627319336, -0....","[0.5037832817056503, -0.2699768350131482]","[0.0335985758822148, -0.0177520599969473]","[0.4199822079150199, -0.2219007549217102]",0.01343,"[0.014785615727305412, 0.5032246708869934, -0....","[0.0419982198527686, -0.0221900749961841]",0.012822
5000,1.0_0,"[0.014785615727305412, 0.5032246708869934, -0....","[0.5225719853098, -0.2828309319952543]","[0.0419982198527686, -0.0221900749961841]","[0.4199822079150199, -0.2219007549217102]",0.012822,"[0.01734859123826027, 0.514514148235321, -0.73...","[0.0503978638233223, -0.026628089995421]",0.014061


In [117]:
# Count the number of data points in each group
group_sizes = all_data.groupby('group').size()
print(group_sizes.unique())

[8]


Optional Saving:

In [9]:
final_df.to_csv('processed_trajectory_data.csv', index=False)

## Network Construction

In [13]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from torch.utils.tensorboard import SummaryWriter
import ast

2024-07-02 11:20:33.706095: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Load in the data:

In [14]:
# Load data (assuming DataFrame is saved in a CSV file named 'data.csv')
final_df = pd.read_csv('processed_trajectory_data.csv')

# Convert to tensors and split data
X = torch.tensor(final_df[['x_t', 'u_t', 'z_t', 'v_t']].values.tolist(), dtype=torch.float32)
y = torch.tensor(final_df[['w_t', 'w_{t+1}']].values, dtype=torch.float32)
dataset = TensorDataset(X, y[:, 1].unsqueeze(1))

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

ValueError: too many dimensions 'str'

Model specifications:

In [None]:
class TubeWidthPredictor(nn.Module):
    def __init__(self):
        super(TubeWidthPredictor, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(16, 64),  # Adjust input size based on your data structure
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.network(x)

class AsymmetricLoss(nn.Module):
    def __init__(self, alpha=0.9, delta=1.0):
        super(AsymmetricLoss, self).__init__()
        self.alpha = alpha
        self.huber = nn.HuberLoss(delta=delta)

    def forward(self, y_pred, y_true):
        residual = y_true - y_pred
        loss = torch.where(residual > 0, self.alpha * residual, (1 - self.alpha) * residual.abs())
        return self.huber(loss, torch.zeros_like(loss))
    
def train(num_epochs):
    model.train()
    for epoch in range(num_epochs):
        for batch_idx, (data, targets) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            writer.add_scalar('Loss/train', loss.item(), epoch * len(train_loader) + batch_idx)

def test():
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data, targets in test_loader:
            outputs = model(data)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    print(f'Test Loss: {total_loss / len(test_loader)}')

In [None]:
model = TubeWidthPredictor()
criterion = AsymmetricLoss(alpha=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.001)

writer = SummaryWriter('runs/tube_width_experiment')
train(50)
test()
writer.close()