# Imports

In [1]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine learning
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Deep learning with TensorFlow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.utils import to_categorical

# Deep learning with PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision import models
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.models import resnet18, ResNet18_Weights

# Natural Language Processing (NLP)
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# Miscellaneous
import os
import re
import time
import pickle
from PIL import Image

from google.colab import drive
drive.mount('/content/drive')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Mounted at /content/drive


# Dataset

In [2]:
nltk.download('punkt')
nltk.download('stopwords')

b_size = 1

data = torch.load("/content/drive/My Drive/Machine Learning/COSMOS/FINAL_PROJECT/DER/ekman.pt")

data_loader = DataLoader(data, batch_size = b_size, shuffle = True, pin_memory = True, num_workers = 2)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [3]:
import os.path

frame_process = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
])

def crop(image):
  return image.crop((80, 58, 577, 428))

spect_process = transforms.Compose([
    transforms.Lambda(crop),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

frame_directory = "/content/drive/My Drive/Machine Learning/COSMOS/FINAL_PROJECT/DER/ekman6_split/"
spect_directory = "/content/drive/My Drive/Machine Learning/COSMOS/FINAL_PROJECT/DER/ekman6_spectro/"
folders = ["anger/", "disgust/", "fear/", "joy/", "sadness/", "surprise/"]
df = pd.read_csv("/content/drive/My Drive/Machine Learning/COSMOS/FINAL_PROJECT/DER/ekman6_texts/en_transcripts.csv")

def preprocess_text(text: str) -> str:
    # remove links
    text = re.sub(r"http\S+", "", text)
    # remove special chars and numbers
    text = re.sub("[^A-Za-z]+", " ", text)
    # remove stopwords
    # 1. tokenize
    tokens = nltk.word_tokenize(text)
    # 2. check if stopword
    tokens = [w.lower() for w in tokens if not w in stopwords.words("english")]
    return tokens

def get_frame_tensor(i):
  frames = []
  last_valid_filename = ""
  for j in range(1, 11):
    filename = str(i) + "_" + str(j) + ".jpg"
    if not os.path.isfile(frame_directory + folders[(int)(i/50)] + filename):
      filename = last_valid_filename
    if filename == "":
      print(str(i) + "_" + str(j) + ".jpg")
    file = Image.open(frame_directory + folders[(int)(i/50)] + filename)
    file = frame_process(file)
    frames.append(file)
    last_valid_filename = filename
  frames = torch.stack(frames)
  return frames

def get_spect_tensor(i):
  filename = str(i) + ".jpg"
  file = Image.open(spect_directory + folders[(int)(i/50)] + filename)
  file = spect_process(file)
  return file

dictionary = {
    'EMPTY': 1 # EMPTY --> signal that the text is empty and contains nothing
}

def get_text_tensor(i):
  text = df.columns[i]
  text = preprocess_text(text)
  liszt = [] # the processed version of the text
  for i in range(len(text)):
    if text[i] in dictionary:
      liszt.append((int)(dictionary[text[i]]))
    else:
      size = len(dictionary) + 1
      dictionary[text[i]] = size
      liszt.append((int)(dictionary[text[i]]))
  return torch.Tensor(liszt).to(device).to(torch.int64), len(text)

In [4]:
text_tensors = []
text_lengths = []
MXLEN = 0

num_input_videos = 300

for i in range(num_input_videos):
  tt, text_length = get_text_tensor(i)
  text_tensors.append(tt)
  text_lengths.append(text_length)
  MXLEN = max(MXLEN, (int)(tt.size(0)))

text_tensors = torch.nn.utils.rnn.pad_sequence(text_tensors, batch_first = True)

In [5]:
#frame_tensors = []
spect_tensors = []

for i in range(300):
  #print(i)
  #frame_tensors.append(get_frame_tensor(i))
  spect_tensors.append(get_spect_tensor(i))

In [6]:
ds = torch.load("/content/drive/My Drive/Machine Learning/COSMOS/FINAL_PROJECT/DER/frame.pt")
print(ds.size())

torch.Size([300, 10, 3, 32, 32])


# Evaluation Loop

In [8]:
import __main__
setattr(__main__, "Smash", Smash)
model = torch.load(os.path.join("/content/drive/My Drive/Machine Learning/COSMOS/FINAL_PROJECT/DER/vmodel1.pt","<path to pickle>"), map_location=torch.device("cpu"))
model.eval()

predicts = []
corrects = []
mapping = ["anger", "disgust", "fear", "joy", "sadness", "surprise"]

index = 0

with torch.no_grad():
  for indices, label in data_loader:
    indices = indices.cuda()
    label = label.cuda()

    # getting the text for the index-th video
    text_batch = []
    for index in indices:
      text = text_tensors[index].to(device)
      # if the text ends up being nothing
      if text_lengths[index] == 0:
        text[0] = dictionary['EMPTY']
        # dimensionality voodoo
        text_lengths[index] = 1
      text_batch.append(text)
    text_batch = torch.stack(text_batch)
    text_batch = text_batch.squeeze(1)

    # getting the spectrograms
    spect_batch = []
    for index in indices:
      spect = spect_tensors[index].to(device)
      spect.requires_grad = True
      spect_batch.append(spect)
    spect_batch = torch.stack(spect_batch)

    # getting all of the frames
    frame_batch = []
    for index in indices:
      frames = ds[index.cpu()].to(device)
      frames = frames.squeeze(0)
      frame_batch.append(frames)
    frame_batch = torch.stack(frame_batch)
    frame_batch = torch.transpose(frame_batch, 1, 2)

    # getting the text lengths
    textl = []
    for index in indices:
      textl.append(text_lengths[index])

    # output of the final NN on the super-tensor
    y_pred = model(spect_batch, text_batch, textl, frame_batch).to(device)

    prediction = y_pred.argmax(dim=1)

    predicts.append(prediction.item())
    corrects.append(label.item())

    if index < 5:
      print(index, mapping[prediction.item()])

    index += 1

confucian = metrics.confusion_matrix(corrects, predicts)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = mapping)


NameError: name 'Smash' is not defined