In [None]:
import sys
sys.path.append('..')
from LATMOS import *

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

---

In [None]:
def train_model_spot(model, train_loader, val_loader, 
                num_epochs, learning_rate,  patience=30):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Early stopping variables
    best_loss = float('inf')
    patience_counter = 0
    bar = tqdm(range(num_epochs), leave=False)
    for epoch in bar:
        model.train()
        total_loss = 0
        total_batches = 0

        for batch_ap, batch_state, batch_acceptance in train_loader:
            optimizer.zero_grad()
            # Forward pass
            state_output, acceptance_output = model(batch_ap)

            # Flatten acceptance output and target
            state_output_flat = state_output.reshape(-1, state_output.shape[-1])
            batch_state_flat = batch_state[:, 1:].reshape(-1)
            acceptance_output_flat = acceptance_output.reshape(-1, 2)
            batch_acceptance_flat = batch_acceptance[:, 1:].reshape(-1)
            # Compute losses
            state_loss = criterion(state_output_flat, batch_state_flat)
            acceptance_loss = criterion(acceptance_output_flat, batch_acceptance_flat)
            loss = state_loss + acceptance_loss
            # loss = acceptance_loss
            loss.backward()
            optimizer.step()

            total_loss += loss.detach().item()
            total_batches +=  batch_ap.shape[0]

        avg_loss = total_loss / total_batches
        
        # Early stopping check
        if avg_loss < best_loss:
            best_loss = avg_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"\nEarly stopping triggered after {epoch + 1} epochs")
                break

        # Evaluate on test set
        train_accuracy = evaluate_model_spot(model, train_loader)
        val_accuracy = evaluate_model_spot(model, val_loader)
        bar.set_postfix({"Average Loss": avg_loss, 
                         "Train State Accuracy": train_accuracy[0], "Train Acceptance Accuracy": train_accuracy[1],
                         "Val State Accuracy": val_accuracy[0], "Val Acceptance Accuracy": val_accuracy[1]})

def evaluate_model_spot(model, val_loader):
    model.eval()
    correct_states = 0
    correct_acceptance = 0
    total_traces = 0
    
    with torch.inference_mode():
        for batch_ap, batch_state, batch_acceptance in val_loader:
            state_output, acceptance_output = model(batch_ap)
            
            # For state_output of shape (batch, seq_len, num_states)
            predicted_states = torch.argmax(state_output, dim=-1)  # (batch, seq_len)
            ground_truth_states = batch_state[:, 1:]               # (batch, seq_len)
            
            # For acceptance_output of shape (batch, seq_len, 2)
            predicted_acceptance = torch.argmax(acceptance_output, dim=-1)  # (batch, seq_len)
            ground_truth_acceptance = batch_acceptance[:, 1:]  # Assuming this is already class indices
            
            # Count sequences where all predictions are correct
            correct_states += torch.all(predicted_states == ground_truth_states, dim=1).sum().item()
            correct_acceptance += torch.all(predicted_acceptance == ground_truth_acceptance, dim=1).sum().item()
            
            total_traces += batch_state.size(0)
    
    return correct_states / total_traces, correct_acceptance / total_traces


---

train one model

In [None]:
idx = 0
data = np.load(f'data/spot_{idx}_0_0.npz')
train_state_vectors = data['train_state_vectors']
train_ap_vectors = data['train_ap_vectors']
train_acceptance_vectors = data['train_acceptance_vectors']
val_state_vectors = data['val_state_vectors']
val_ap_vectors = data['val_ap_vectors']
val_acceptance_vectors = data['val_acceptance_vectors']

# Convert numpy arrays to PyTorch tensors
train_ap_tensor = torch.tensor(train_ap_vectors, dtype=torch.float).to(device)
train_state_tensor = torch.tensor(train_state_vectors, dtype=torch.long).to(device)
train_acceptance_tensor = torch.tensor(train_acceptance_vectors, dtype=torch.long).to(device)

val_ap_tensor = torch.tensor(val_ap_vectors, dtype=torch.float).to(device)
val_state_tensor = torch.tensor(val_state_vectors, dtype=torch.long).to(device)
val_acceptance_tensor = torch.tensor(val_acceptance_vectors, dtype=torch.long).to(device)

# Create datasets
train_dataset = TensorDataset(train_ap_tensor, train_state_tensor, train_acceptance_tensor)
val_dataset = TensorDataset(val_ap_tensor, val_state_tensor, val_acceptance_tensor)

# Create dataloaders
batch_size = 2**9
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
model = create_model('ssm', 
                     input_size=train_ap_tensor.shape[-1], 
                     hidden_size = train_state_tensor.shape[-1]*8, 
                     output_size=train_state_tensor.shape[-1], 
                     device=device)

# Train the model
train_model_spot(model, train_loader, val_loader, num_epochs=1000, learning_rate=1e-3)

# Final evaluation
final_accuracy = evaluate_model_spot(model, val_loader)
print(f"Final Test Accuracy: {final_accuracy}")

---

report on all datasets

In [None]:
def report():
    # Define dimensions
    models = ['gru', 'attention', 'ssm']  # Currently only one model
    train_vars = [0, 0, 0, 0.1, 0.2]
    val_vars = [0, 0.1, 0.2, 0, 0]
    train_vars = [0]
    val_vars = [0]
    indices = [0, 1, 2]
    HIDDEN_FACTOR = 12

    # Initialize results arrays
    # Shape: (num_models, num_variance_pairs, num_indices)
    train_accuracies = np.zeros((len(models), len(train_vars), len(indices), 2))
    val_accuracies = np.zeros_like(train_accuracies)
    
    # Nested loops with explicit indices for array assignment
    for model_idx, nn_model in enumerate(models):
        for var_idx, (train_var, val_var) in enumerate(zip(train_vars, val_vars)):
            for data_idx, idx in enumerate(indices):
                
                data = np.load(f'data/spot_{idx}_{train_var}_{val_var}.npz')
                
                # Load and process data
                train_state_vectors = data['train_state_vectors']
                train_ap_vectors = data['train_ap_vectors']
                train_acceptance_vectors = data['train_acceptance_vectors']
                val_state_vectors = data['val_state_vectors']
                val_ap_vectors = data['val_ap_vectors']
                val_acceptance_vectors = data['val_acceptance_vectors']
                
                # Convert to PyTorch tensors
                train_ap_tensor = torch.tensor(train_ap_vectors, dtype=torch.float).to(device)
                train_state_tensor = torch.tensor(train_state_vectors, dtype=torch.long).to(device)
                train_acceptance_tensor = torch.tensor(train_acceptance_vectors, dtype=torch.long).to(device)
                val_ap_tensor = torch.tensor(val_ap_vectors, dtype=torch.float).to(device)
                val_state_tensor = torch.tensor(val_state_vectors, dtype=torch.long).to(device)
                val_acceptance_tensor = torch.tensor(val_acceptance_vectors, dtype=torch.long).to(device)
                
                # Create datasets and dataloaders
                train_dataset = TensorDataset(train_ap_tensor, train_state_tensor, train_acceptance_tensor)
                val_dataset = TensorDataset(val_ap_tensor, val_state_tensor, val_acceptance_tensor)
                
                batch_size = 2**9
                train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
                val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

                # Create and train model
                model = create_model(nn_model,
                                  input_size=train_ap_tensor.shape[-1],
                                  hidden_size=int(train_state_tensor.shape[-1]*HIDDEN_FACTOR),
                                  output_size=train_state_tensor.shape[-1],
                                  device=device)
                
                train_model_spot(model, train_loader, val_loader, num_epochs=500, learning_rate=1e-4)
                
                # Evaluate and store results
                train_acc = evaluate_model_spot(model, train_loader)
                val_acc = evaluate_model_spot(model, val_loader)
                
                train_accuracies[model_idx, var_idx, data_idx] = train_acc
                val_accuracies[model_idx, var_idx, data_idx] = val_acc
                
                # Print current results
                print(f"{nn_model:5} on {idx:2}\n"
                      f"| train ({train_var}): {train_acc}\n"
                      f"| val ({val_var}): {val_acc}")
                
                # Cleanup
                del model
                torch.cuda.empty_cache()
    
    return train_accuracies, val_accuracies

# Example usage
train_accs, val_accs = report()

In [None]:
# Now you can analyze the results using numpy operations
# For example, to get mean accuracies across datasets for each variance pair:
mean_train_accs = np.mean(train_accs, axis=2)  # Average across datasets
mean_val_accs = np.mean(val_accs, axis=2)

mean_val_accs.round(3)