In [6]:
# Here, we compute the intrinsic evaluation metric:  
# - mean inter-type cosine similarity ((DiffSim ↓) 

In [1]:
import os
from datetime import datetime
from tqdm import trange
from src.utils.data_util import DataHandlerCLS
from src.train_valid_test_step import *
from config import Config as config
from torch.multiprocessing import set_start_method
from src.model.bart_adapters import BartAdapterCombined, BartAdapter
import torch.nn as nn
from sklearn.metrics.pairwise import cosine_distances
from scipy import spatial
import json
from transformers import BartModel

In [2]:
    
def write_json_file(path, data):
    with open(path, 'w') as f:
        json.dump(data, f)
    return

In [3]:
def mean_pooling(token_embeddings, attention_mask):
    # token_embeddings: [batch_size, max_seq_len, hidden_dim]
    # token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

In [4]:
# Load model
data_handler = DataHandlerCLS()

# Manage and initialize model
# ---------------------------------------------------------------------------------
# Initialize model
model = BartAdapterCombined(data_handler.config)
model.to(config.DEVICE)
model.eval()

Number of idioms: 1521
Training dataset size: 32693
Validation dataset size: 4102
Testing dataset size: 4080
Load base model from facebook/bart-base
=> Initializing Adapters with Fusion Module...

[COMPOSITIONAL MODULE]: 
==> Using BART output as compositional embedding!

[NON-COMPOSITIONAL MODULE]: 
==> Loading Adapter from /home/zzeng/workspace/UIUC_research/RepresentationLearning/models/PIER/checkpoints/bart-adapters_non-compositional_magpie_random-GIEA/best/
==> Non-compositional adapter loaded!

[COMBINED MODULE]: 
==> Adding fusion module!
Fuse[compositional, non-compositional]
==> Loading Adapter Fusion from /home/zzeng/workspace/UIUC_research/RepresentationLearning/models/PIER/checkpoints/bart-adapters_fusion_magpie_random-PIER/best/
==>  Adapter Fusion Loaded!


BartAdapterCombined(
  (model): BartForConditionalGeneration(
    (model): BartModel(
      (shared): Embedding(50265, 768, padding_idx=1)
      (encoder): BartEncoder(
        (invertible_adapters): ModuleDict()
        (embed_tokens): Embedding(50265, 768, padding_idx=1)
        (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
        (layers): ModuleList(
          (0): BartEncoderLayer(
            (self_attn): BartAttention(
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=

In [5]:
# print out current test model information
print('Adapter Name: {}'.format(config.ADAPTER_NAME))
print('Adapter Split: {}'.format(config.SPLIT))
print('Task Split: {}'.format(config.CLS_TYPE))

Adapter Name: fusion
Adapter Split: random
Task Split: random


In [6]:
# Run prediction on test set
idioms2embed = {}

model.eval()

bbar = tqdm(enumerate(data_handler.testset_generator),
                ncols=100, leave=False, total=data_handler.config.num_batch_test)

for idx, data in bbar:

    with torch.no_grad():
        # model forward pass to compute loss
        outputs = model.model(**data['inputs'])
    embeds = model.mean_pooling(outputs.last_hidden_state, data['phrase_masks'])
    embeds = embeds.detach().cpu().numpy()
    idioms = data['idioms']
    labels = data['labels']
    for i, idiom in enumerate(idioms): 
        if idiom not in idioms2embed: 
            idioms2embed[idiom] = {'idiomatic':[], 'literal':[]}
        if labels[i] == 1: 
            idioms2embed[idiom]['idiomatic'].append(embeds[i])
            assert 'literal' in idioms2embed[idiom]
        else: 
            idioms2embed[idiom]['literal'].append(embeds[i])
            assert 'idiomatic' in idioms2embed[idiom]
    
            
            
bbar = tqdm(enumerate(data_handler.validset_generator),
                ncols=100, leave=False, total=data_handler.config.num_batch_valid)

for idx, data in bbar:

    with torch.no_grad():
        # model forward pass to compute loss
        outputs = model.model(**data['inputs'])
    embeds = model.mean_pooling(outputs.last_hidden_state, data['phrase_masks'])
    embeds = embeds.detach().cpu().numpy()
    idioms = data['idioms']
    labels = data['labels']
    for i, idiom in enumerate(idioms): 
        if idiom not in idioms2embed: 
            idioms2embed[idiom] = {'idiomatic':[], 'literal':[]}
        if labels[i] == 1: 
            idioms2embed[idiom]['idiomatic'].append(embeds[i])
        else: 
            idioms2embed[idiom]['literal'].append(embeds[i])
    
    

                                                                                                    

In [8]:
# note that in here, we keep the idioms with larger than one literal sentences and one idiomatic sentences in the test set
idioms2embed_keep = {}
count = 0 
for idiom in idioms2embed.keys(): 
    if len(idioms2embed[idiom]['idiomatic']) > 0 and len(idioms2embed[idiom]['literal']) > 0: 
        idioms2embed_keep[idiom] = {'idiomatic': [], 'literal': []}
        count += len(idioms2embed[idiom]['idiomatic'])
        count += len(idioms2embed[idiom]['literal'])
        idioms2embed_keep[idiom]['idiomatic'] =  np.mean(idioms2embed[idiom]['idiomatic'], 0).tolist()
        idioms2embed_keep[idiom]['literal'] =  np.mean(idioms2embed[idiom]['literal'], 0).tolist()

idioms_embed_cossim = []
idiom_to_cosim = {}
for idiom in idioms2embed_keep: 
    score = 1 - spatial.distance.cosine(idioms2embed_keep[idiom]['idiomatic'], idioms2embed_keep[idiom]['literal'])
    idioms_embed_cossim.append(score)
    idiom_to_cosim[idiom] = score
print("mean inter-type cosine similarity (DiffSim):")
print(np.mean(idioms_embed_cossim))       

mean inter-type cosine similarity (DiffSim):
0.32302344495937846
