In [None]:
import clip
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json 
import sys 
from PIL import Image

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

sys.path.insert(0, os.path.dirname(os.getcwd()))
import loaders
from utils import scores as sc
from utils import evaluation as ev

In [None]:
# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

In [None]:
import logging
# Set Logger
logger = logging.getLogger('notebook_logger')
logger.setLevel(logging.DEBUG)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

# Show One Example of Dataset

In [None]:
# Showing one example
# TODO: Add VSNR for cosine similarity
data_loader = loaders.DataLoader(data_source = "real", logger=logger)
df_table = data_loader.load_dialogue_df()
k = 5
data_loader.showing_example(k)
data_loader.show_clip_similarity(k, df_table, model, preprocess)

# Define OOD Categories below

In [None]:
ood_category = ['vehicle']
ind_category = [x for x in data_loader.supercategories if x not in ood_category]
df_table['OOD'] = df_table['supercategories'].apply(lambda x: 0 if any(item in x for item in ind_category) else 1)
df_table['OOD'].value_counts()

# Load CLIP features for images and dialogues with Model CLIP ViT-B32

In [None]:
## Dialogue Processing
dialogue_clip = np.load(f'{data_loader.data_dir}/CLIP/mmd_dialogs_truncate/mmd_clip_dialog_features.npy')
df_table['dialogue_clip'] = list(dialogue_clip)

## Image Processing
image_clip = np.load(f'{data_loader.data_dir}/CLIP/mmd_imgs/mmd_clip_img_features.npy')
image_annotation = pd.read_json(f'{data_loader.data_dir}/CLIP/mmd_imgs/mmd_imgs_filenames.json')
image_annotation = image_annotation.rename(columns={0:"img_file"}).join(pd.DataFrame(pd.DataFrame(image_clip.tolist()).apply(np.array, axis=1)))
image_annotation.rename(columns={0:"image_clip"}, inplace=True)
df_table = df_table.merge(image_annotation, on='img_file', how='left')

In [None]:
dialogue_df_segment = pd.read_json(f'{data_loader.data_dir}/CLIP/mmd_dialogs_segment/new_mmd_expand_dialog.json')
dialogue_clip_segment = np.load(f'{data_loader.data_dir}/CLIP/mmd_dialogs_segment/new_mmd_dialog_features.npy')
dialogue_df_segment['dialogue_clip'] = list(dialogue_clip_segment)

In [None]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer(classes=ind_category)
df_table['encoded_label'] = list(mlb.fit_transform(df_table['supercategories']))
encoded_df = pd.DataFrame(df_table['encoded_label'].tolist(), columns=ind_category)
df_table = pd.concat([df_table, encoded_df], axis=1)

In [None]:
import torch.nn.functional as F
from tqdm.notebook import tqdm
tqdm.pandas()

if data_loader.data_source == "qa":
    df_table['image_id'] = df_table['image_id'].apply(lambda x: f"COCO_train2014_{int(x):012d}")

categories_clip = {}
for categories in ind_category:
    text = 'Category ' + categories
    text_tokens = clip.tokenize([text]).to(device)  
    with torch.no_grad():
        text_features = model.encode_text(text_tokens).flatten().cpu().numpy()
        categories_clip[categories] = text_features

def calculate_similarity_score(row, type = "image"):
    if type == "image":
        column = 'image_clip'
    elif type == "dialogue":
        column = 'dialogue_clip'

    cosine_sim = 0
    cosine_sim_max = 0
    for categories in ind_category:
        text_features = categories_clip[categories]
        cosine_sim_current = np.dot(text_features, row[column]) / (np.linalg.norm(text_features) * np.linalg.norm(row[column]))
        cosine_sim += cosine_sim_current
        cosine_sim_max = max(cosine_sim_max, cosine_sim_current)


    return cosine_sim, cosine_sim_max

df_table['image_score'], df_table['image_score_max'] = zip(*df_table.progress_apply(calculate_similarity_score, axis=1))
dialogue_df_segment['dialogue_score'], dialogue_df_segment['dialogue_score_max'] = zip(*dialogue_df_segment.progress_apply(calculate_similarity_score, type = "dialogue", axis=1))
df_table['dialogue_score'], df_table['dialogue_score_max'] = zip(*df_table.progress_apply(calculate_similarity_score, axis=1, args=('dialogue',)))

In [None]:
df_table['dialogue_score_segment'] = pd.DataFrame(dialogue_df_segment.groupby('index')['dialogue_score'].mean())['dialogue_score'].values
df_table['dialogue_score_segment_max'] = pd.DataFrame(dialogue_df_segment.groupby('index')['dialogue_score_max'].mean())['dialogue_score_max'].values

In [None]:
import matplotlib.pyplot as plt

# Assuming df_table is preloaded with the appropriate columns
# Simplifying the code and plotting all histograms in a 2x2 layout

# Create figure and axes for a 2x2 grid
fig, axs = plt.subplots(3, 2, figsize=(10, 10))

# Data for histograms
columns = ['image_score', 'image_score_max', 'dialogue_score', 'dialogue_score_max', 'dialogue_score_segment', 'dialogue_score_segment_max']
titles = ['Image Scores Distribution', 'Image Max Scores Distribution',
          'Dialogue Sum Scores Distribution', 'Dialogue Max Scores Distribution',
          'Dialogue Segment Sum Scores Distribution', 'Dialogue Segment Max Scores Distribution']
x_labels = ['Image Score', 'Image Max Score', 'Dialogue Score', 'Dialogue Max Score', 'Dialogue Segment Score', 'Dialogue Segment Max Score']

# Loop through to plot each histogram in its subplot
for i, ax in enumerate(axs.flatten()):
    ood_scores = df_table[df_table['OOD'] == 1][columns[i]]
    non_ood_scores = df_table[df_table['OOD'] == 0][columns[i]]
    ax.hist(non_ood_scores, bins=80, alpha=0.5, label='ID', density=True)
    ax.hist(ood_scores, bins=80, alpha=0.5, label='OOD', density=True)
    ax.legend(loc='upper right')
    ax.set_title(titles[i])
    ax.set_xlabel(x_labels[i])
    ax.set_ylabel('Density')

plt.tight_layout()
plt.show()

In [None]:
import importlib
importlib.reload(ev)

In [None]:
def image_text_similarity(row):
    a = row['dialogue_clip']
    b = row['image_clip']
    cos_sim = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
    return cos_sim
df_table['image_text_similarity'] = df_table.apply(image_text_similarity, axis=1)
df_table['overall_simialrity'] = df_table['image_text_similarity'] * (df_table['image_score_max'] + 0.0001*df_table['dialogue_score_max'])

In [None]:
print('Image Max:', ev.fpr_evaluation(df_table['OOD'].values, -df_table['image_score_max'].values, 0.95))
print('Image Sum:', ev.fpr_evaluation(df_table['OOD'].values, -df_table['image_score'].values, 0.95))
print('Dialogue Max:', ev.fpr_evaluation(df_table['OOD'].values, -df_table['dialogue_score_max'].values, 0.95))
print('Dialogue Sum:', ev.fpr_evaluation(df_table['OOD'].values, -df_table['dialogue_score'].values, 0.95))
print('Dialogue Segment Max:', ev.fpr_evaluation(df_table['OOD'].values, -df_table['dialogue_score_segment_max'].values, 0.95))
print('Dialogue Segment Sum:', ev.fpr_evaluation(df_table['OOD'].values, -df_table['dialogue_score_segment'].values, 0.95))
print("Overall Max:", ev.fpr_evaluation(df_table['OOD'].values, -df_table['overall_simialrity'].values, 0.95))

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from models.DNN import model 

image_model_loader = model.model_loader(logger=logger,
                                  num_epochs=15)

dialogue_model_loader = model.model_loader(logger=logger,
                                    num_epochs=15,
                                    learning_rate=0.001)

In [None]:
(
    df_ind_train, 
    df_test, 
    X_train_image, 
    X_test_image, 
    X_train_dialogue, 
    X_test_dialogue, 
    Y_train, 
    Y_test) = image_model_loader.create_dataset(data_loader, df_table)


In [None]:
image_model_loader.train_model(X_train_image, Y_train)

In [None]:
dialogue_model_loader.train_model(X_train_dialogue, Y_train)

In [None]:
score_type_list = ["mp", "energy", "maxlogits", "msp"]

for score_type in score_type_list:
    image_score_sum, image_score_max = image_model_loader.evaluate_on_test(X_test_image, 
                                                        Y_test,
                                                        score_type=score_type,\
                                                        return_score=True)
    
    dialogue_score_sum, dialogue_score_max = dialogue_model_loader.evaluate_on_test(X_test_dialogue, 
                                                        Y_test,
                                                        score_type=score_type,\
                                                        return_score=True)
    
    df_test[f'{score_type}_sum_image'] = image_score_sum
    df_test[f'{score_type}_max_image'] = image_score_max
    df_test[f'{score_type}_sum_dialogue'] = dialogue_score_sum
    df_test[f'{score_type}_max_dialogue'] = dialogue_score_max
    df_test[f'{score_type}_overall_simialrity'] = df_test['image_text_similarity'] * (df_test[f'{score_type}_max_image'] + df_test[f'{score_type}_max_dialogue'])

In [None]:
import matplotlib.pyplot as plt

ood_scores = df_test[df_test['OOD'] == 1][f'{score_type}_max_image']
non_ood_scores = df_test[df_test['OOD'] == 0][f'{score_type}_max_image']

plt.hist(non_ood_scores, bins=50, alpha=0.5, label='ID', density = 1)
plt.hist(ood_scores, bins=50, alpha=0.5, label='OOD', density = 1)
plt.legend(loc='upper right')

plt.title('Energy Distribution')
plt.xlabel('Image Score')
plt.ylabel('Frequency')

plt.show()

In [None]:
import matplotlib.pyplot as plt

ood_scores = df_test[df_test['OOD'] == 1][f'{score_type}_max_dialogue']
non_ood_scores = df_test[df_test['OOD'] == 0][f'{score_type}_max_dialogue']

plt.hist(non_ood_scores, bins=50, alpha=0.5, label='ID', density = 1)
plt.hist(ood_scores, bins=50, alpha=0.5, label='OOD', density = 1)
plt.legend(loc='upper right')

plt.title('Energy Distribution')
plt.xlabel('Image Score')
plt.ylabel('Density')

plt.show()

In [None]:
import matplotlib.pyplot as plt

ood_scores = df_test[df_test['OOD'] == 1][f'{score_type}_overall_simialrity']
non_ood_scores = df_test[df_test['OOD'] == 0][f'{score_type}_overall_simialrity']

plt.hist(non_ood_scores, bins=50, alpha=0.5, label='ID', density = 1)
plt.hist(ood_scores, bins=50, alpha=0.5, label='OOD', density = 1)
plt.legend(loc='upper right')

plt.title('Energy Distribution')
plt.xlabel('Image Score')
plt.ylabel('Density')

plt.show()

In [None]:
logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test['image_score_max'].values, 0.95))
logger.info(ev.auroc_evaluation(df_test['OOD'].values, -df_test['image_score_max'].values))
logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test['image_score_max'].values))
###
logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test['image_score'].values, 0.95))
logger.info(ev.auroc_evaluation(df_test['OOD'].values, -df_test['image_score'].values))
logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test['image_score'].values))
###
logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test['dialogue_score_max'].values, 0.95))
logger.info(ev.auroc_evaluation(df_test['OOD'].values, -df_test['dialogue_score_max'].values))
logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test['dialogue_score_max'].values))
###
logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test['dialogue_score'].values, 0.95))
logger.info(ev.auroc_evaluation(df_test['OOD'].values, -df_test['dialogue_score'].values))
logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test['dialogue_score'].values))
###
logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test['dialogue_score_segment_max'].values, 0.95))
logger.info(ev.auroc_evaluation(df_test['OOD'].values, -df_test['dialogue_score_segment_max'].values))
logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test['dialogue_score_segment_max'].values))
###
logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test['dialogue_score_segment'].values, 0.95))
logger.info(ev.auroc_evaluation(df_test['OOD'].values, -df_test['dialogue_score_segment'].values))
logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test['dialogue_score_segment'].values))
###
logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test['overall_simialrity'].values, 0.95))
logger.info(ev.auroc_evaluation(df_test['OOD'].values, -df_test['overall_simialrity'].values))
logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test['overall_simialrity'].values))

In [None]:

for score_type in score_type_list:
    if score_type == "energy":
        logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_sum_image'].values, 0.95))
        logger.info(ev.auoc_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_sum_image'].values))
        logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_sum_image'].values))

        logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_sum_dialogue'].values, 0.95))
        logger.info(ev.auoc_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_sum_dialogue'].values))
        logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_sum_dialogue'].values))


    logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_max_image'].values, 0.95))
    logger.info(ev.auoc_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_max_image'].values))
    logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_max_image'].values))
    ###
    logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_max_dialogue'].values, 0.95))
    logger.info(ev.auoc_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_max_dialogue'].values))
    logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_max_dialogue'].values))
    ###
    logger.info(ev.fpr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_overall_simialrity'].values, 0.95))
    logger.info(ev.auoc_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_overall_simialrity'].values))
    logger.info(ev.aupr_evaluation(df_test['OOD'].values, -df_test[f'{score_type}_overall_simialrity'].values))