In [5]:
from torch.autograd import Variable
import os
import matplotlib.pyplot as plt
from collections import Counter
import torch
import pandas as pd
import json
import pickle
import torch.utils.data
from tqdm import tqdm
import math
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import numpy as np
import re

# Set random seed for reproducibility
seed = 0
np.random.seed(seed)
torch.manual_seed(seed)
# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
path_to_dataset = os.path.join(os.getcwd(), 'SGD Dataset')
path_to_train = os.path.join(path_to_dataset, "TrainSet.json")
path_to_test = os.path.join(path_to_dataset, "TestSet.json")

In [None]:
def extract_feature_vectors(sentence, embed, rnn, type="last"):
    rnn.eval()
    with torch.no_grad():
        sentence_tensor = torch.LongTensor([ord(c) for c in sentence]).unsqueeze(1)  # Shape: (seq_len, 1)
        hidden_state = rnn.state0(batch_size=1)
        hidden_states = []  # Store all hidden states
        
        for char in sentence_tensor:
            char_var = Variable(char)
            emb = embed(char_var)
            hidden_state, _ = rnn(emb, hidden_state)
            hidden_states.append(hidden_state[0].detach().cpu())  # Extract h_t (first element of tuple)

        # Last hidden state
        last_feature_vector = hidden_states[-1].squeeze()

        # Average hidden state
        avg_feature_vector = torch.mean(torch.stack(hidden_states), dim=0).squeeze()

        return last_feature_vector if type == "last" else avg_feature_vector

In [None]:
def precompute_embeddings(file_path, embed, rnn, output_path, type="average"):
    with open(file_path, 'r') as f:
        file_contents = json.load(f)
        dialogues = [dialogue['turns'] for dialogue in file_contents]
        all_embeddings = []
        
        for dialog in tqdm(dialogues, desc="Processing dialogues for file {}".format(output_path)):
            embeddings = [extract_feature_vectors(turn['utterance'], embed, rnn, type=type) for turn in dialog]
            all_embeddings.append(embeddings)
        
        # Save embeddings to disk
        torch.save(all_embeddings, output_path)

# Example usage
checkpoint = torch.load('mlstm-ns.pt', map_location='cpu')
encoder = checkpoint['rnn']
embed = checkpoint['embed']
precompute_embeddings(path_to_train, embed, encoder, 'train_embeddings_average.pt', type="average")
precompute_embeddings(path_to_test, embed, encoder, 'test_embeddings_average.pt', type="average")
precompute_embeddings(path_to_train, embed, encoder, 'train_embeddings_last.pt', type="last")
precompute_embeddings(path_to_test, embed, encoder, 'test_embeddings_last.pt', type="last")

  checkpoint = torch.load('mlstm-ns.pt', map_location='cpu')
Processing dialogues for file train_embeddings_average.pt:   0%|          | 0/5403 [00:00<?, ?it/s]


AttributeError: 'list' object has no attribute 'size'

In [34]:
import torch
from torch.autograd import Variable
import json
from tqdm import tqdm

def extract_feature_vectors(sentence, embed, rnn, type="last", device='cuda'):
    """
    Extract feature vectors using GPU
    """
    rnn.eval()
    # Move models to GPU
    embed.to(device)
    rnn.to(device)
    
    if not sentence:  # Handle empty utterances
        output_size = encoder.h2o.weight.shape[1]  # Get the embedding size dynamically
        return torch.zeros(output_size).cpu()  # Return a zero vector

    with torch.no_grad():
        # Create tensor on GPU directly
        sentence_tensor = torch.LongTensor([ord(c) for c in sentence]).unsqueeze(1).to(device)  # Shape: (seq_len, 1)
        
        # Get initial state and move to device
        hidden_state = rnn.state0(batch_size=1)
        h_0, c_0 = hidden_state
        h_0 = h_0.to(device)
        c_0 = c_0.to(device)
        hidden_state = (h_0, c_0)
        
        hidden_states = []  # Store all hidden states
        
        for char in sentence_tensor:
            emb = embed(char)
            hidden_state, _ = rnn(emb, hidden_state)
            # Only move to CPU after processing is complete
            hidden_states.append(hidden_state[0].detach())  # Keep on GPU for now
        
        # Stack all hidden states on GPU and then move to CPU at the end
        stacked_states = torch.stack(hidden_states)
        
        # Last hidden state
        last_feature_vector = stacked_states[-1].squeeze()
        
        # Average hidden state
        avg_feature_vector = torch.mean(stacked_states, dim=0).squeeze()
        
        # Return to CPU only at the end
        if type == "last":
            return last_feature_vector.cpu()
        else:
            return avg_feature_vector.cpu()

def precompute_embeddings(file_path, embed, rnn, output_path, type="average", device='cuda'):
    """
    Precompute embeddings using GPU
    """
    with open(file_path, 'r') as f:
        file_contents = json.load(f)
        dialogues = [dialogue['turns'] for dialogue in file_contents]
        all_embeddings = []
        
        for dialog in tqdm(dialogues, desc="Processing dialogues for file {}".format(output_path)):
            embeddings = [extract_feature_vectors(turn['utterance'], embed, rnn, type=type, device=device) for turn in dialog]
            all_embeddings.append(embeddings)
        
        # Save embeddings to disk
        torch.save(all_embeddings, output_path)

# Example usage
def run_embedding_extraction(model_path='mlstm-ns.pt', path_to_train=None, path_to_test=None):
    # Check if CUDA is available, otherwise fall back to CPU
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")
    
    # Load model to specified device
    checkpoint = torch.load(model_path, map_location=device)
    encoder = checkpoint['rnn']
    embed = checkpoint['embed']
    
    # Process train data
    # if path_to_train:
        # print("Processing training data...")
        # precompute_embeddings(path_to_train, embed, encoder, 
        #                      'train_embeddings_average.pt', 
        #                      type="average", device=device)
        
        # precompute_embeddings(path_to_train, embed, encoder, 
        #                      'train_embeddings_last.pt', 
        #                      type="last", device=device)
    
    # Process test data
    if path_to_test:
        print("Processing test data...")
        precompute_embeddings(path_to_test, embed, encoder, 
                             'test_embeddings_average.pt', 
                             type="average", device=device)
        
        # precompute_embeddings(path_to_test, embed, encoder, 
        #                      'test_embeddings_last.pt', 
        #                      type="last", device=device)
    
    print("All processing complete!")

In [35]:
run_embedding_extraction(model_path='mlstm-ns.pt', path_to_train=path_to_train, path_to_test=path_to_test)

Using device: cuda


  checkpoint = torch.load(model_path, map_location=device)


Processing test data...


Processing dialogues for file test_embeddings_average.pt: 100%|██████████| 1331/1331 [15:36<00:00,  1.42it/s]


All processing complete!
