In [114]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
# see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="4"

In [115]:
from tensorflow.python.client import device_lib
print (device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6543432497992315206
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 23551737856
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 5205972013172701355
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:c1:00.0, compute capability: 8.6"
xla_global_id: 416903419
]


2023-12-13 18:11:03.926497: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /device:GPU:0 with 22460 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:c1:00.0, compute capability: 8.6


In [116]:
import time
import os
#import neccesary packages
#import tensorflow_hub as hub
import tensorflow as tf
import pickle
from keras import backend as K
import numpy as np
from sklearn_extra.cluster import KMedoids
from tensorflow import keras
from tensorflow.keras.layers import Concatenate, Dense, Input, LSTM, Embedding, Dropout, Activation, GRU, Flatten
from datetime import datetime
from scipy.spatial import distance_matrix
import sys

In [117]:
from transformers import BertTokenizer, TFBertModel

In [118]:
import keras.backend as K
import operator


In [152]:

from tensorflow.keras import layers, Model, regularizers

def make_variables(tf_name, k1, k2, initializer):
     
    return tf.Variable(initializer(shape=[k1, k2], dtype=tf.float32), trainable=True, name=tf_name)

#prototype layer
class prototypeLayer(keras.layers.Layer):
    def __init__(self, k_protos, vect_size, k_cents):
        super(prototypeLayer, self).__init__(name='proto_layer')
        self.n_protos = k_protos
        self.vect_size = vect_size
        self.prototypes = make_variables("prototypes", k_protos, vect_size,
                                         initializer=tf.constant_initializer(k_cents))
    
    @tf.function
    def call(self, inputs):
        print("inputs ", inputs.shape)
        tmp1 = tf.expand_dims(inputs, 2)
        print("tmp1 ", tmp1.shape)
        tmp1 = tf.broadcast_to(tmp1, [tf.shape(tmp1)[0], tf.shape(tmp1)[1], self.n_protos, self.vect_size])

        print("tmp1 ", tmp1.shape)
        print("prototypes ", self.prototypes)
        tmp2 = tf.broadcast_to(self.prototypes,
                               [tf.shape(tmp1)[0], tf.shape(tmp1)[1], self.n_protos, self.vect_size])

        print("tmp2 ", tmp2.shape)
        tmp3 = tmp1 - tmp2
        tmp4 = tmp3 * tmp3
        distances = tf.reduce_sum(tmp4, axis=3)
        print("distances ", distances.shape)
        
        return distances, self.prototypes

#distance layer: to convert the full distance matrix to sparse similarity matrix
class distanceLayer(keras.layers.Layer):
    def __init__(self):
        super(distanceLayer, self).__init__(name='distance_layer')
        self.a = 0.1
        self.beta = 1e6

    def e_func(self, x, e=2.7182818284590452353602874713527):
        return tf.math.pow(e, -(self.a * x))

    @tf.function
    def call(self, full_distances):
        min_dist_ind = tf.nn.softmax(-full_distances * self.beta)
        e_dist = self.e_func(full_distances) + 1e-8
        dist_hot_vect = min_dist_ind * e_dist
        return dist_hot_vect
    
    
class PrototypeCNN_Bert(Model):
    """
    A CNN for text classification.
    Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
    """
    def __init__(self, sequence_length, num_classes, tokenizer,bert_model, embedding_size, filter_sizes, num_filters, l2_reg_lambda, dropout_keep_prob, k_protos, vect_size):

        
        super(PrototypeCNN_Bert, self).__init__()
        self.k_protos = k_protos
        self.vect_size = vect_size
        self.full_distences = None
        self.full_onehot_distances = None
        self.embedding = bert_model
        self.tokenizer = tokenizer
        self.max_l =sequence_length
        
        RNN_CELL_SIZE = 128
        self.convs = []
        
        for filter_size in filter_sizes:
            conv_block = tf.keras.Sequential([
                layers.Conv2D(num_filters, (filter_size, embedding_size), 
                              padding='valid', activation='relu'),
                layers.MaxPooling2D(pool_size=(sequence_length - filter_size + 1, 1), 
                                    strides=(1,1), padding='valid')])
            self.convs.append(conv_block)

        self.flatten = layers.Flatten()
        self.distance_layer = distanceLayer()
        self.LSTM = LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True)
        self.dropout = layers.Dropout(dropout_keep_prob)  # keep_prob will be supplied by call argument
        self.fc = layers.Dense(num_classes, 
                               kernel_regularizer=regularizers.l2(l2_reg_lambda), 
                               activation='softmax')

    def init_prototypelayer(self, k_cents):
        self.proto_layer = prototypeLayer(self.k_protos, self.vect_size, k_cents)
        
    def call(self, x):
        
         
        # Embedding layer
       
        x = self.tokenizer(x, padding = "max_length", max_length=self.max_l, return_tensors ="tf",truncation = True )
        outputs = self.embedding(input_ids = x["input_ids"], attention_mask = x["attention_mask"], output_hidden_states =True)
        x = list(outputs.hidden_states)[-1]
        x = tf.expand_dims(x, -1)

        print("embedding ", x.shape)
        
        
        # batch * 768
        pooled_outputs = []
        for conv in self.convs:
            c = conv(x)
            pooled_outputs.append(c)
            print(c.shape)

        # Combine all the pooled features
        x = tf.concat(pooled_outputs, axis=-1)
        print(x.shape)
        x = self.flatten(x)
        print(x.shape)
        
    
        x = tf.expand_dims(x, axis=0)
        
        full_distances, protos = self.proto_layer(x)
         
        dist_hot_vect = self.distance_layer(full_distances)
        
        # 1*batch_size*10
        
        
#         lstmop, forward_h, forward_c = self.LSTM(tf.squeeze(dist_hot_vect))
#         z1 = self.fc(lstmop[:, -1, :])
#         z = tf.squeeze(z1, axis=0)
        
        x = self.dropout(dist_hot_vect)
        x = self.fc(x)
        x = tf.squeeze(x, axis=0)
        


        #return x, self.fc.weights[0], self.fc.weights[1]

       
        
        return x, full_distances, protos
    
    def embed(self,x):
        # Embedding layer
        
        x = self.tokenizer(x, padding = "max_length", max_length=self.max_l, return_tensors ="tf",truncation = True  )
        x = self.embedding(input_ids = x["input_ids"], attention_mask = x["attention_mask"])[0]
        x = tf.expand_dims(x, -1) #2*200*768*
        

        pooled_outputs = []
        for conv in self.convs:
            #print(x.shape)
            
            c = conv(x)
            
            pooled_outputs.append(c)

        # Combine all the pooled features
        x = tf.concat(pooled_outputs, axis=-1)
        x = self.flatten(x)
        
        return x
    
    def full_distance(self, x):
        
        x = tokenizer(x, padding = "max_length", max_length=200, return_tensors ="tf"  )
        x = self.embedding(x)
        x = tf.expand_dims(x, -1) #2*200*768*1
  
        pooled_outputs = []
        for conv in self.convs:
            c = conv(x)
            pooled_outputs.append(c)

        # Combine all the pooled features
        x = tf.concat(pooled_outputs, axis=-1)
        x = self.flatten(x)
        
        x = tf.expand_dims(x, axis=0)
        full_distances, protos = self.proto_layer(x)
        
        return full_distances
    
    def one_hot_distance(self, x):
        
         # Embedding layer
        x = self.embedding(x)
        x = tf.expand_dims(x, -1)
  
        pooled_outputs = []
        for conv in self.convs:
            c = conv(x)
            pooled_outputs.append(c)
        return pooled_outputs

        # # Combine all the pooled features
        # x = tf.concat(pooled_outputs, axis=-1)
        # x = self.flatten(x)
        
        # x = tf.expand_dims(x, axis=0)
        # full_distances, protos = self.proto_layer(x)
       
        # dist_hot_vect = self.distance_layer(full_distances)
        
        # return dist_hot_vect
    

In [120]:
#this method simple project prototypes to the closest sentences in
#sample_sent_vects
def projection(sample_sentences,sample_sent_vects,data_size=10000):
    prototypes = ProtoCNN.proto_layer.prototypes
    d_pos = {}
    #for each prototype
    for p_count, p in enumerate(prototypes):
        print('[db] p_count = ', p_count)
        s_count = 0
        d_pos[p_count] = {}
        #find its distances to all sample sentences
        for i, s in enumerate(sample_sent_vects[:data_size]):
            if len(sample_sentences[i]) < 5 or len(sample_sentences[i]) > 100:
                continue
            d_pos[p_count][i] = np.linalg.norm(sample_sent_vects[i] - p)
            s_count += 1
    #sort those distances, then assign the closest ones to new prototypes
    new_protos = []
    for p_count, p in enumerate(prototypes):
        sorted_d = sorted(d_pos[p_count].items(), key=operator.itemgetter(1))
        new_protos.append(sample_sent_vects[sorted_d[0][0]])
    #return these values

    return new_protos

In [121]:
#show the list of prototypes
def showPrototypes(sample_sentences,sample_sent_vects, sample_y, k_protos=10,printOutput=False, k_closest_sents = 20):
    
    prototypes = ProtoCNN.proto_layer.prototypes.numpy()
    #data_size = 10000
    d_pos = {}
    data_size = 150000
    for p_count, p in enumerate(prototypes):
       
        s_count = 0
        d_pos[p_count] = {}
        for i, s in enumerate(sample_sent_vect[:data_size]):
            #if len(sample_sentences[i]) < 20 or len(sample_sentences[i]) > 100:
            if len(sample_sentences[i]) < 30 or sample_y[i][1]==0:
                continue
            d_pos[p_count][i] = np.linalg.norm(sample_sent_vect[i] - p)
            s_count += 1
 

    mappedPrototypes = {}    
   
    recorded_protos_score = {}
    print("Prototypes: ")
    for l in range(k_protos):
        # print("prototype index = ", l)
        recorded_protos_score[l] = {}
        sorted_d = sorted(d_pos[l].items(), key=operator.itemgetter(1))
        print(l)
        mappedPrototypes[l]=[]
        for k in range(k_closest_sents):
            i = sorted_d[k][0]
            score = sorted_d[k][1]
            # print("[db] sorted_d ",sorted_d[0])
            # print("[db] sample_sentences[sorted_d[0][0]]: ",sample_sentences[sorted_d[0][0]])
            mappedPrototypes[l].append((sample_sentences[i].strip(), score, sample_y[i][1]))
            if k<10:
                print(sorted_d[k], sample_sentences[i],sample_y[i][1])
        #print(mappedPrototypes[l])

    
    return mappedPrototypes

In [122]:
#method to generate the number of closest sentences to each prototype
def protoFreq(self,sample_sent_vect):
    d = {}
    for sent in sample_sent_vect:
        sent_dist = {}
        for i, p in enumerate(self.prototypes):
            sent_dist[i] = np.linalg.norm(sent - p)
            if i not in d:
                d[i] = 0
        sorted_sent_d = sorted(sent_dist.items(), key=operator.itemgetter(1))
        # print(sorted_sent_d)
        picked_protos = sorted_sent_d[0][0]
        d[picked_protos] += 1
    print("Prototype freq = ", d)
    x = sorted(d.items(), key=lambda item: item[1], reverse=True)
    print("sorted :",x)

#re-train the model with new pruned prototype



In [123]:
def pruningTrain(self,new_k_protos,x_train,y_train,x_test,y_test):
    #print("[db] self prototypes: ",self.prototypes)
    k_cents = self.prototypes[:new_k_protos]
    k_cents = [p.numpy() for p in k_cents]
    #print("[db] k_cents = ",k_cents)
    self.createModel(k_cents=k_cents,k_protos=new_k_protos)
    self.train(x_train,y_train,x_test,y_test)

# generate the sentence value for each prototype
# and 10 closest sentences to it


In [124]:
def showTrajectory(self,input,sample_sentences,sample_vect):
    if len(self.mappedPrototypes) == 0:
        self.showPrototypes(sample_sentences,sample_vect,printOutput=False)
    prototypes = [self.mappedPrototypes[k].strip() for k in self.mappedPrototypes]
    vP, vS = self.embed(prototypes), self.embed(input)
    dStoP = {}
    for sCount, s in enumerate(vS):
        dStoP[sCount] = {}
        for i, p in enumerate(vP):
            dStoP[sCount][i] = np.linalg.norm(vS[sCount] - p)

    mappedProtos, mappedScore, mappedDist = [], [], []
    for sCount, s in enumerate(vS):
        sorted_d = sorted(dStoP[sCount].items(), key=operator.itemgetter(1))
        mappedProtos.append(prototypes[sorted_d[0][0]])

    #for small dataset, we use a pretrained sentiment model. We can use any
    #model for sentiment scores
    from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
    sid_obj = SentimentIntensityAnalyzer()
    print("[db] mappedProtos ", mappedProtos)
    scores = []
    for s in mappedProtos:
        # sentiment_dict = sid_obj.polarity_scores(s)
        scores.append(0.5 + sid_obj.polarity_scores(s)['compound'] / 2)
    return scores

In [125]:
dev_sample_percentage = .1


# Model Hyperparameters
embedding_dim = 768
filter_sizes ="3,4,5"
num_filters = 128
dropout_keep_prob = 0.5
l2_reg_lambda = 0.5
max_l =100
# Training parameters
batch_size = 4096
num_epochs = 100
evaluate_every = 100
checkpoint_everyt = 100
num_checkpoints = 5

# Misc Parameters
allow_soft_placement = True
log_device_placement = False

# Data preprocessing

In [154]:
timestamp = str(int(time.time()))

out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("output directory: ", out_dir)
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
# Data Preparation
# ==================================================

# Load data

print("loading data...")
x = pickle.load(open("./mainbalancedpickle.p","rb"))
revs, W, W2, word_idx_map, vocab, max_l = x[0], x[1], x[2], x[3], x[4], x[5]
print("data loaded!")# Load data


output directory:  /big/xw384/schoolwork/NLP+DEEP LEARNING/Project/CASCADE/src/runs/1702670078
loading data...
data loaded!


In [128]:
len(revs)

219368

In [127]:
revs[0]

{'y': 1,
 'id': 'c07fd66',
 'text': 'religion must have the answer',
 'author': 'Reedzit',
 'topic': 'science',
 'label': [0, 1],
 'num_words': 5,
 'split': 1}

In [80]:
bert_model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(bert_model_name)
bert_model = TFBertModel.from_pretrained(bert_model_name)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [153]:

max_l = 100

x_text = []
y = []

test_x = []
test_y = []

for i in range(len(revs)):
    if revs[i]['split']==1:
        x_text.append(revs[i]['text'])
        y.append(revs[i]['label'])
    else:
        test_x.append(revs[i]['text'])
        test_y.append(revs[i]['label'])  

y = np.asarray(y)
y_test = np.asarray(test_y)



In [82]:
x_text[:5]

['religion must have the answer',
 "it 's obviously tracks from a giant water tractor , farming for giant arctic sea prawn !",
 'wow he smoked pot oh lord hes such a horrible person now',
 "wow , his girlfriend is uhm ah fuck it , he 's an olympic champion , who am i to pass judgement",
 'i think the government should track every mormon in the country for subversive activity']

In [83]:
np.random.seed(42)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = np.asarray(x_text)[shuffle_indices]
y_shuffled = np.asarray(y)[shuffle_indices]

# Split train/test set
# TODO: This is very crude, should use cross-validation

dev_sample_index = -1 * int(dev_sample_percentage * float(len(y)))
x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

x_train = np.asarray(x_train)
x_dev = np.asarray(x_dev)
y_train = np.asarray(y_train)
y_dev = np.asarray(y_dev)


Train/Dev split: 139232/15470


In [84]:
x_train[:1]

array(["why ca n't i have friends like this"], dtype='<U1720')

In [85]:
x_train[0],y_train [0]

("why ca n't i have friends like this", array([1, 0]))

In [86]:
for layer in bert_model.layers:
    layer.trainable = False

In [87]:
word_idx_map["@"] = 0
rev_dict = {v: k for k, v in word_idx_map.items()}

In [88]:
k_protos, vect_size = 10, 384

In [155]:
ProtoCNN = PrototypeCNN_Bert(sequence_length=max_l,
    num_classes=len(y_train[0]),
    tokenizer = tokenizer,
    bert_model = bert_model,
    embedding_size=embedding_dim,
    filter_sizes=list(map(int, filter_sizes.split(","))),
    num_filters=num_filters,
    l2_reg_lambda=l2_reg_lambda,
    dropout_keep_prob = dropout_keep_prob,
    k_protos = k_protos,
    vect_size = vect_size)

In [156]:
data = x_text[:2]
y = ProtoCNN.embed(data)
print(y.shape)

(2, 384)


In [101]:
import random
import copy

In [102]:
#random.shuffle(x_text)
sample_sentences = x_text[:15000]
sample_sentences_vects = []
for i in range(300):
    batch = sample_sentences[i*50:(i+1)*50]
    vect = ProtoCNN.embed(batch)
    sample_sentences_vects.append(vect.numpy())

In [93]:
sample_sentences_vect = np.concatenate(sample_sentences_vects, axis=0)

In [94]:
sample_sentences_vect.shape

(15000, 384)

In [95]:
k_protos = 10
kmedoids = KMedoids(n_clusters=k_protos, random_state=0).fit(sample_sentences_vect)
k_cents = kmedoids.cluster_centers_
print(k_cents.shape)

(10, 384)


In [157]:
ProtoCNN.init_prototypelayer(k_cents)

In [159]:
y, dist, protos = ProtoCNN(x_text[:2])

embedding  (2, 100, 768, 1)
(2, 1, 1, 128)
(2, 1, 1, 128)
(2, 1, 1, 128)
(2, 1, 1, 384)
(2, 384)


In [160]:
print(y.shape, dist.shape, protos.shape)

(2, 2) (1, 2, 10) (10, 384)


In [161]:
def make_variables(tf_name, k1, k2, initializer):
     
    return tf.Variable(initializer(shape=[k1, k2], dtype=tf.float32), trainable=True, name=tf_name)


def pw_distance(A):
    r = tf.reduce_sum(A * A, 1)
    r = tf.reshape(r, [-1, 1])
    D = r - 2 * tf.matmul(A, tf.transpose(A)) + tf.transpose(r)
    return D

def tight_pos_sigmoid_offset(x, offset, e=2.7182818284590452353602874713527):
    return 1 / (1 + tf.math.pow(e, (1 * (offset * x - 0.5))))


In [162]:
cost2 = tf.reduce_sum(tf.reduce_min(dist, axis=1))          

In [164]:
d= pw_distance(protos)
diag_ones = tf.convert_to_tensor(np.eye(k_protos, dtype=float))
diag_ones = tf.dtypes.cast(diag_ones, tf.float32)
d1 = d + diag_ones * tf.reduce_max(d)
d2 = tf.reduce_min(d1, axis=1)
min_d2_dist = tf.reduce_min(d2)
# the third loss term
cost3 = tight_pos_sigmoid_offset(min_d2_dist, 1) + 1e-8

In [165]:
tf.reduce_min(dist, axis=1).shape

TensorShape([1, 10])

In [166]:
cost2.shape

TensorShape([])

In [57]:
ProtoCNN(x_text[:2], training=False)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.47345367, 0.5265463 ],
       [0.76886785, 0.23113218]], dtype=float32)>

In [58]:
ProtoCNN.summary()

Model: "prototype_cnn__bert"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 tf_bert_model_1 (TFBertMode  multiple                 109482240 
 l)                                                              
                                                                 
 sequential (Sequential)     (None, 1, 1, 128)         295040    
                                                                 
 sequential_1 (Sequential)   (None, 1, 1, 128)         393344    
                                                                 
 sequential_2 (Sequential)   (None, 1, 1, 128)         491648    
                                                                 
 flatten (Flatten)           multiple                  0         
                                                                 
 distance_layer (distanceLay  multiple                 0         
 er)                                           

# Model training and testing

In [59]:
batch_size = 64
accumulated_steps = 70

In [61]:

timestamp = str(int(time.time()))
# Output directory for models and summaries
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))

# Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
    
checkpoint_prefix = os.path.join(checkpoint_dir, "model")

Writing to /big/xw384/schoolwork/NLP+DEEP LEARNING/Project/CASCADE/src/runs/1698891988



In [62]:
x_train.shape

(139232,)

In [63]:
y_train.shape

(139232, 2)

In [64]:
#ProtoCNN = tf.keras.models.load_model(os.path.join(out_dir,"my_weights-finetune.pt"))

In [65]:
#We use Adam optimizer with default learning rate 0.0001.
#Change this value based on your preference
out_dir = "/big/xw384/schoolwork/NLP+DEEP LEARNING/Project/CASCADE/src/runs/"+timestamp
opt = tf.keras.optimizers.Adam(learning_rate=.0001)
#ProtoCNN.compile(optimizer=opt, loss='categorical_crossentropy',metrics=['accuracy'])
criterion = keras.losses.CategoricalCrossentropy(from_logits=False, reduction=keras.losses.Reduction.SUM)

In [66]:
#loaded_object = pickle.load(open(os.path.join(out_dir,"optimizer.pt"), 'rb'))
#ProtoCNN.optimizer.set_weights(loaded_object)

In [67]:

# i = 0

# maxEvalRes = 0

# checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
#     filepath = checkpoint_dir,  # Specify the path to save the checkpoints
#     save_weights_only=True,  # Save only the model weights
#     monitor='val_loss',  # Monitor the validation loss for saving the best weights
#     save_best_only=True,  # Save only the best weights based on the monitored metric
#     verbose=1  # Print a message when a checkpoint is saved
# )    
       

In [68]:
class DataLoader:
    def __init__(self, data, labels, batch_size=200, shuffle=True):
        self.data = data
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(data))

       
        
    def __len__(self):
        # Returns the number of batches
        return int(np.ceil(len(self.data) / self.batch_size))
    
    def __iter__(self):
        # Shuffles the indexes if required
        if self.shuffle:
            np.random.shuffle(self.indexes)
        
        # Yield batches
        for i, start_idx in enumerate(range(0, len(self.data), self.batch_size)):
            end_idx = min(start_idx + self.batch_size, len(self.data))
            batch_indexes = self.indexes[start_idx:end_idx]
            yield i, self.data[batch_indexes].tolist(), self.labels[batch_indexes]

In [69]:
batch_size = 64
accumulation_steps = 70

train_accuracy_metric = tf.keras.metrics.CategoricalAccuracy(name='accuracy')
valid_accuracy_metric = tf.keras.metrics.CategoricalAccuracy(name='accuracy')

In [185]:
train_loader = DataLoader(x_train, y_train,batch_size=64)

In [186]:
len(train_loader)

2176

In [46]:
EPOCHS = 10

In [47]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
for epoch in range(EPOCHS):
    epoch_loss = 0
    accumulated_gradients = [tf.zeros_like(var) for var in ProtoCNN.trainable_variables]
    accumulated_loss = 0
    train_accuracy_metric.reset_state()
    for i, inputs, labels in train_loader: 
       
        
        with tf.GradientTape() as tape:
        
            predictions = ProtoCNN(inputs, training=True) 
            loss = criterion(labels, predictions)
            gradients = tape.gradient(loss, ProtoCNN.trainable_weights)

            break;

            # Accumulate gradients
            accumulated_gradients = [acc_grad + grad for acc_grad, grad in zip(accumulated_gradients, gradients)]
            accumulated_loss +=loss
            train_accuracy_metric.update_state(labels, predictions)
            # Apply gradients every accumulation_steps or at the last batch
        if (i + 1) % accumulation_steps == 0 or i == len(train_loader) - 1:
            accumulated_gradients = [grad / accumulation_steps for grad in accumulated_gradients]
            opt.apply_gradients(zip(accumulated_gradients, ProtoCNN.trainable_weights))
            accumulated_gradients = [tf.zeros_like(var) for var in ProtoCNN.trainable_variables]

            print(f"Epoch: {epoch}, Loss: {accumulated_loss.numpy()/(batch_size*accumulation_steps)} {batch_size*(i+1)}/139232 accuracy:{train_accuracy_metric.result().numpy()}")
            accumulated_loss = 0
        epoch_loss += loss


    valid_loss = 0
    y_true = None
    y_pred = None
    valid_accuracy_metric.reset_state()
    for i, inputs, labels in dev_loader: 
       
        
        with tf.GradientTape() as tape:
        
            predictions = ProtoCNN(inputs, training=True)
            loss = criterion(labels, predictions)
            #labels = tf.reshape(labels,[1,2])
            accumulated_loss +=loss
            valid_loss += loss
            valid_accuracy_metric.update_state(labels, predictions)
            
        if (i + 1) % accumulation_steps == 0 or i == len(train_loader) - 1:
           
            print(f"Epoch: {epoch}, Loss: {accumulated_loss.numpy()/(batch_size*accumulation_steps)} {batch_size*(i+1)}/139232 accuracy:{valid_accuracy_metric.result().numpy()}")
            accumulated_loss = 0
        epoch_loss += loss
                  
       

In [None]:
#ProtoCNN.fit(["I am a boy","She is a girl"], y_train, batch_size = 4096, epochs=2000, verbose=1, validation_data= (x_dev, y_dev))

In [None]:
#pickle.dump(opt.get_weights(), open(os.path.join(out_dir, 'optimizer.pt'), 'wb+'))

In [None]:
ProtoCNN.save_weights(os.path.join(out_dir,"my_weights-finetune.model"))

In [201]:
ProtoCNN.save(os.path.join(out_dir,"my_weights-finetune.pt"))

INFO:tensorflow:Assets written to: /big/xw384/schoolwork/NLP+DEEP LEARNING/Project/CASCADE/src/runs/1686708033/my_weights-finetune.pt/assets


In [None]:
ProtoCNN.load_weights(os.path.join(out_dir,"my_weights-finetune.model"))

In [81]:
ProtoCNN.compile()

In [178]:
ProtoCNN(x_text[:5])

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[0.3167334 , 0.68326664],
       [0.5730032 , 0.4269968 ],
       [0.31576294, 0.68423706],
       [0.51213247, 0.4878675 ],
       [0.588644  , 0.411356  ]], dtype=float32)>

In [57]:
def dev_step(x_batch, y_batch):
    """
    Evaluates model on a dev set
    """
    logits= ProtoCNN.predict(x_batch)

   

    prediction_losses = tf.keras.losses.categorical_crossentropy(y_batch, tf.nn.softmax(logits))

    loss =  prediction_losses 


    predictions = tf.argmax(logits, 1)
    correct_predictions = tf.equal(predictions, tf.argmax(y_batch, 1))
    
    return loss, correct_predictions
    

    

In [58]:
correct_predictions_test = None

In [54]:
from tqdm import tqdm

In [91]:

# Create testing dataset
test_loader = tf.data.Dataset.from_tensor_slices((x_test, y_test))


NameError: name 'x_test' is not defined

In [179]:
ProtoCNN.load_weights("runs/PROTOCNN/epoch_788/best_classifier.ckpt")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f4b40613340>

In [175]:
test_loader = DataLoader(np.array(test_x),np.array(test_y),batch_size=64)

In [74]:
test_accuracy_metric = tf.keras.metrics.CategoricalAccuracy(name='accuracy')

In [124]:
len(test_loader)

1011

In [123]:
accumulation_steps

70

In [118]:
test_loss = 0
test_accuracy_metric.reset_state()
for i, inputs, labels in test_loader: 
  
    
    predictions = ProtoCNN(inputs, training=False)
 
    test_accuracy_metric.update_state(labels, predictions)
        
    if (i + 1) % accumulation_steps == 0 or i == len(test_loader) - 1:
       
      print(test_accuracy_metric.result().numpy())
    

0.65870535
0.65982145
0.66220236
0.6640067
0.6635268
0.66343004
0.6619898
0.66330916
0.6624504
0.6618973
0.6627638
0.6635231
0.6644918
0.6643335
0.66472334


In [139]:
test_loss = 0
test_accuracy_metric.reset_state()
for i, inputs, labels in test_loader: 
  
    
    predictions = ProtoCNN(inputs, training=False)
 
    test_accuracy_metric.update_state(labels, predictions)
        
    if (i + 1) % accumulation_steps == 0 or i == len(test_loader) - 1:
       
      print(test_accuracy_metric.result().numpy())
    

0.63058037
0.634375
0.6334077
0.63097095
0.62995535
0.6307664
0.63115436
0.6301897
0.63149804
0.6321875
0.6326299
0.63238466
0.63174796
0.6328922
0.63286734


In [85]:
class CNN_Bert(Model):
    """
    A CNN for text classification.
    Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
    """
    def __init__(self, sequence_length, num_classes, tokenizer,bert_model, embedding_size, filter_sizes, num_filters, l2_reg_lambda, dropout_keep_prob, k_protos, vect_size):


        super(CNN_Bert, self).__init__()
        self.k_protos = k_protos
        self.vect_size = vect_size
        self.full_distences = None
        self.full_onehot_distances = None
        self.embedding = bert_model
        self.tokenizer = tokenizer
        self.max_l =sequence_length

        RNN_CELL_SIZE = 128
        self.convs = []

        for filter_size in filter_sizes:
            conv_block = tf.keras.Sequential([
                layers.Conv2D(num_filters, (filter_size, embedding_size),
                              padding='valid', activation='relu'),
                layers.MaxPooling2D(pool_size=(sequence_length - filter_size + 1, 1),
                                    strides=(1,1), padding='valid')])
            self.convs.append(conv_block)

        self.flatten = layers.Flatten()
        self.distance_layer = distanceLayer()
        self.LSTM = LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True)
        self.dropout = layers.Dropout(dropout_keep_prob)  # keep_prob will be supplied by call argument
        self.fc = layers.Dense(num_classes,
                               kernel_regularizer=regularizers.l2(l2_reg_lambda),
                               activation='softmax')

    def init_prototypelayer(self, k_cents):
        self.proto_layer = prototypeLayer(self.k_protos, self.vect_size, k_cents)

    def call(self, x):


        # Embedding layer
        x = self.tokenizer(x, padding = "max_length", max_length=self.max_l, return_tensors ="tf",truncation = True )
        x = self.embedding(input_ids = x["input_ids"], attention_mask = x["attention_mask"], output_hidden_states =True)[0]
        x = tf.expand_dims(x, -1)


        # batch * 768
        pooled_outputs = []
        for conv in self.convs:
            c = conv(x)
            pooled_outputs.append(c)

        # Combine all the pooled features


        x = tf.concat(pooled_outputs, axis=3)
        x = self.flatten(x)
        x = self.dropout(x, training=True)
        x = self.fc(x)


        return x

    def embed(self,x):
        # Embedding layer

        x = self.tokenizer(x, padding = "max_length", max_length=self.max_l, return_tensors ="tf",truncation = True  )
        x = self.embedding(input_ids = x["input_ids"], attention_mask = x["attention_mask"])[0]
        x = tf.expand_dims(x, -1) #2*200*768*


        pooled_outputs = []
        for conv in self.convs:
            c = conv(x)

            pooled_outputs.append(c)

        # Combine all the pooled features
        x = tf.concat(pooled_outputs, axis=-1)
        x = self.flatten(x)

        return x

    def full_distance(self, x):

        x = tokenizer(x, padding = "max_length", max_length=200, return_tensors ="tf"  )
        x = self.embedding(x)
        x = tf.expand_dims(x, -1) #2*200*768*1

        pooled_outputs = []
        for conv in self.convs:
            c = conv(x)
            pooled_outputs.append(c)

        # Combine all the pooled features
        x = tf.concat(pooled_outputs, axis=-1)
        x = self.flatten(x)

        x = tf.expand_dims(x, axis=0)
        full_distances, protos = self.proto_layer(x)

        return full_distances

    def one_hot_distance(self, x):

         # Embedding layer
        x = self.embedding(x)
        x = tf.expand_dims(x, -1)

        pooled_outputs = []
        for conv in self.convs:
            c = conv(x)
            pooled_outputs.append(c)
        return pooled_outputs

        # # Combine all the pooled features
        # x = tf.concat(pooled_outputs, axis=-1)
        # x = self.flatten(x)

        # x = tf.expand_dims(x, axis=0)
        # full_distances, protos = self.proto_layer(x)

        # dist_hot_vect = self.distance_layer(full_distances)

        # return dist_hot_vect

In [86]:
CNN = CNN_Bert(sequence_length=max_l,
    num_classes=len(y_train[0]),
    tokenizer = tokenizer,
    bert_model = bert_model,
    embedding_size=embedding_dim,
    filter_sizes=list(map(int, filter_sizes.split(","))),
    num_filters=num_filters,
    l2_reg_lambda=l2_reg_lambda,
    dropout_keep_prob = dropout_keep_prob,
    k_protos = k_protos,
    vect_size = vect_size)

In [101]:
CNN.load_weights("runs/10_31_baseline_bert/best_classifier.ckpt")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f4b3b082f20>

In [102]:
test_loss = 0
test_accuracy_metric.reset_state()
for i, inputs, labels in test_loader: 
  
    
    predictions = CNN(inputs, training=False)
 
    test_accuracy_metric.update_state(labels, predictions)
        
    if (i + 1) % accumulation_steps == 0 or i == len(test_loader) - 1:
       
      print(test_accuracy_metric.result().numpy())
    

0.68794644
0.68415177
0.68541664
0.6876674
0.68857145
0.6886533
0.6878508
0.6873884
0.6875
0.6887054
0.6885958
0.68816966
0.6878434
0.688345
0.68881637


In [167]:
file_path = 'losses.pkl'

# Open the file in binary read mode
with open(file_path, 'rb') as file:
    # Load the contents from the file and assign it to a variable
    # This will deserialize the object contained in the file
    your_data = pickle.load(file)


In [168]:
train_loss, dev_loss, train_acc, dev_acc,test_acc  = your_data

In [169]:
train_acc[0]

0.67638904

In [140]:
dev_acc[0]

0.78642535

In [170]:
test_acc

[(1, 0.7893638),
 (2, 0.79043084),
 (3, 0.7911731),
 (4, 0.7915133),
 (5, 0.79154426),
 (6, 0.79152876),
 (7, 0.79152876),
 (8, 0.79152876),
 (9, 0.79152876),
 (10, 0.79152876),
 (11, 0.79152876),
 (12, 0.79152876),
 (13, 0.79152876),
 (14, 0.79152876),
 (15, 0.79152876),
 (16, 0.79152876),
 (17, 0.79152876),
 (18, 0.79152876),
 (19, 0.79152876),
 (20, 0.79152876),
 (21, 0.79152876),
 (22, 0.79152876),
 (24, 0.79152876),
 (25, 0.79152876),
 (26, 0.79152876),
 (27, 0.7915133),
 (28, 0.7936783),
 (29, 0.7985804),
 (30, 0.80342066),
 (31, 0.805632),
 (32, 0.8077815),
 (33, 0.80897224)]

In [135]:
result = [x.numpy() for x in your_data[1]]

AttributeError: 'int' object has no attribute 'numpy'

In [197]:
min(result)

9611.796

In [198]:
np.argmax(np.asarray(result))

119

In [199]:
len(result)

1407

In [None]:
for x_batch, y_batch in tqdm(test_loader.batch(4096)):    
    test_loss, correct_predictions = dev_step(x_batch, y_batch)  
    if correct_predictions_test is None:
        correct_predictions_test = correct_predictions
    else:
        correct_predictions_test = tf.concat((correct_predictions_test, correct_predictions), axis=0)

test_accuracy = tf.reduce_mean(tf.cast(correct_predictions_test, tf.float32))
print("test accuracy {}".format(test_accuracy))

In [166]:
sarc_comments = [ rev['text'] for rev in revs if rev['label'][1]==1 ]

In [167]:
sarc_comments[500:600]

['looks like its time to convince the chineese that siberian tiger bones improve boners',
 'shutting down the government and the military at the same time !',
 "yay , now i do n't have to pay taxes !",
 'a modern day shakespeare',
 'god im done with this sub fuckin circle jerk shit',
 'looks like a fun format',
 'this film is a serious game changer !',
 "well , if you do n't have anything to hide then there is nothing to fear",
 'admissions are great , i get to skip the whole investigation and just ban you !',
 'as we all know , apple are the only ones who can innovate',
 'hahahaha handicapped people omg lol',
 "it 's worth every penny to bring jeeezus back",
 'i hear mien kamph is a very popular book',
 "well i'm glad to know that the government still considers shutting down voluntary marketplaces an essential service during the shutdown",
 'trade geno and sid for miller sounds like a great trade',
 'wow , never saw that coming',
 "if english was good enough for jesus , then it 's goo

In [170]:
#test giving a prediction value to an input
testS = ["i guess no one at google 's ever been on a plane and wanted to listen to their music library",
         "it 's like windows phone 7 and that worked great",
        'religion must have the answer',
        'until a republican does it'
        ]


In [171]:
x= ProtoCNN.embed(testS )

In [180]:
ProtoCNN(testS, training= False)

<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[0.38821465, 0.6117853 ],
       [0.46638945, 0.5336105 ],
       [0.3167334 , 0.68326664],
       [0.5046599 , 0.4953401 ]], dtype=float32)>

In [169]:
ProtoCNN.full_distance(x)

<tf.Tensor: shape=(1, 4, 10), dtype=float32, numpy=
array([[[ 2.7111988 ,  2.8435276 ,  2.8580358 ,  2.4625027 ,
          2.8837852 ,  3.2298265 ,  2.8851266 ,  2.779249  ,
          2.7834678 ,  2.695945  ],
        [13.555579  , 13.56708   , 13.608713  , 13.089714  ,
         13.671051  , 14.761423  , 14.198931  , 13.693032  ,
         13.776615  , 13.339384  ],
        [ 8.140151  ,  8.293659  ,  8.2329035 ,  7.875981  ,
          8.249426  ,  8.662914  ,  8.463434  ,  8.189972  ,
          8.333049  ,  7.9773016 ],
        [ 0.99792427,  1.0499238 ,  1.040509  ,  0.7297432 ,
          1.0358859 ,  1.2475713 ,  1.0940286 ,  1.0371547 ,
          1.0157193 ,  0.9604418 ]]], dtype=float32)>

In [170]:
ProtoCNN.one_hot_distance(x)

<tf.Tensor: shape=(1, 4, 10), dtype=float32, numpy=
array([[[0.        , 0.        , 0.        , 0.78172654, 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.27009773, 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.4549362 , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.9296246 , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ]]],
      dtype=float32)>

# Show prototypes

In [183]:
len(x_train)

139232

In [None]:
 vect = ProtoCNN.embed(inputs) 

In [191]:
sample_sentences_vects = []
sample_labels = []
for i, inputs, labels in train_loader: 
   
    vect = ProtoCNN.embed(inputs) 
    sample_sentences_vects.append(vect.numpy())
    sample_labels.extend(labels.tolist())

    if i ==200:
        break
       

In [193]:
sample_sent_vect = tf.concat(sample_sentences_vects, axis=0)

In [194]:
mapped_prototype = showPrototypes(sample_sentences,sample_sent_vect, sample_labels, k_protos=10,printOutput=False, k_closest_sents = 100)

NameError: name 'sample_y' is not defined

In [109]:
mapped_prototype[3]

[('these kids are going places in life', 0.3135772, 1),
 ('i hear there is even an uk one', 0.3172326, 1),
 ('who let her leave the house line that', 0.32124335, 1),
 ('that seems like someone i wanna hang out with', 0.32230243, 1),
 ("you could n't make up a title like that", 0.3228807, 1),
 ("someone 's moving house and home", 0.32551184, 1),
 ("he looks exactly like what i 'd imagine someone like this to look like",
  0.32663426,
  1),
 ('robin has been in every fe game since the first one', 0.32718822, 1),
 ('obby nothing gets me hot like o o', 0.32777935, 1),
 ('something for meth or something like that', 0.32828987, 1),
 ('i just love that it takes the video that long to get to the point',
  0.3287512,
  1),
 ("that looks more like it 's next to point", 0.32890695, 1),
 ('she really seems to be enjoying it', 0.33070856, 1),
 ('the one with the bear , i think', 0.33257347, 1),
 ('i think i had a seizure while reading this', 0.33272243, 1),
 ('funniest thing ive ever seen in my ent

In [110]:
mapped_prototype_150000 = mapped_prototype 

In [125]:
def find_similar_sentences(sent, mapped_prototype):
    sentence_embed = ProtoCNN.embed(np.expand_dims(sent, 0))
   
    protos = [x[0] for x in mapped_prototype]
    proto_embed = ProtoCNN.embed(encode(protos))
    distances = [(index, np.linalg.norm(embed - sentence_embed)) for index, embed in enumerate(proto_embed)]
    output = [mapped_prototype[x[0]] for x in sorted(distances, key= lambda x: x[1])]
    
    return output[:10]

In [150]:
find_similar_sentences(x[0], mapped_prototype[3])

[('better let as many of them into europe as possible', 0.3523066, 1),
 ('onlinebots living up to their name', 0.3604018, 1),
 ('everything happens for a reason', 0.3676815, 1),
 ("you could n't make up a title like that", 0.3228807, 1),
 ('why not put the full thing on it', 0.34032157, 1),
 ('really terrible stuff in there', 0.35132548, 1),
 ('you know i think this guy has a chance of making it', 0.34213576, 1),
 ('i came here to cringe not to think', 0.3506609, 1),
 ('deserves to crash with a shirt like that', 0.35434902, 1),
 ('your not supposed to mention that', 0.36335245, 1)]

In [149]:
find_similar_sentences(x[1], mapped_prototype[3])

[("i 've been thinking about this all day", 0.35759634, 1),
 ('by doing the same thing i do every night and day nothing', 0.3557903, 1),
 ('now that looks like a president i could have a beer with', 0.3606794, 1),
 ('this person is going to go far in life', 0.33753684, 1),
 ('a superior phone , like say , a galaxy would have been able to take that',
  0.36363792,
  1),
 ('burning man became terrible exactly the year after i went that one time',
  0.35728908,
  1),
 ('i cant stay and work here for ever', 0.35292253, 1),
 ('better let as many of them into europe as possible', 0.3523066, 1),
 ('sounds like jesus himself said this', 0.3607124, 1),
 ('should have shot him or strung him up from a tree just in case',
  0.35523936,
  1)]

In [147]:
find_similar_sentences(x[2], mapped_prototype[3])

[('can i request one for my school', 0.35782492, 1),
 ('your not supposed to mention that', 0.36335245, 1),
 ('i cant stay and work here for ever', 0.35292253, 1),
 ('sad thing is , i can actually belive this', 0.34167826, 1),
 ("you go first and then i 'll think about it", 0.36763626, 1),
 ('funniest thing ive ever seen in my entire life', 0.33317474, 1),
 ('you know i think this guy has a chance of making it', 0.34213576, 1),
 ('i just love that it takes the video that long to get to the point',
  0.3287512,
  1),
 ('everything happens for a reason', 0.3676815, 1),
 ("i do n't think i 'd take my salt any other way", 0.35186923, 1)]

In [174]:
testS[3]

'until a republican does it'

In [171]:
find_similar_sentences(x[3], mapped_prototype[3])

[('right that almost the same thing', 0.35840473, 1),
 ('while i read this during a shit', 0.34476843, 1),
 ('your not supposed to mention that', 0.36335245, 1),
 ('just the fucking way i like it', 0.3665198, 1),
 ("you could n't make up a title like that", 0.3228807, 1),
 ('sad thing is , i can actually belive this', 0.34167826, 1),
 ('something for meth or something like that', 0.32828987, 1),
 ('she really seems to be enjoying it', 0.33070856, 1),
 ('see what happens when you do spinning shit', 0.36211467, 1),
 ('that was me , i got high and started drawing on shit', 0.344531, 1)]

In [326]:
distances[1][0]

1

In [318]:
mapped_prototype[3][63]

("i 've been thinking about this all day", 0.35759634, 1)

In [294]:
proto3_embed list_c.index(max_val)

<tf.Tensor: shape=(100, 384), dtype=float32, numpy=
array([[0.        , 0.        , 0.        , ..., 0.01747742, 0.        ,
        0.01237033],
       [0.        , 0.        , 0.        , ..., 0.01747742, 0.        ,
        0.01237033],
       [0.        , 0.        , 0.        , ..., 0.01747742, 0.        ,
        0.01237033],
       ...,
       [0.        , 0.        , 0.        , ..., 0.01747742, 0.        ,
        0.01237033],
       [0.        , 0.        , 0.        , ..., 0.01747742, 0.        ,
        0.01237033],
       [0.        , 0.        , 0.        , ..., 0.01747742, 0.        ,
        0.05296943]], dtype=float32)>

In [319]:
mapped_prototype[3]

[('these kids are going places in life', 0.3135772, 1),
 ('i hear there is even an uk one', 0.3172326, 1),
 ('who let her leave the house line that', 0.32124335, 1),
 ('that seems like someone i wanna hang out with', 0.32230243, 1),
 ("you could n't make up a title like that", 0.3228807, 1),
 ("someone 's moving house and home", 0.32551184, 1),
 ("he looks exactly like what i 'd imagine someone like this to look like",
  0.32663426,
  1),
 ('robin has been in every fe game since the first one', 0.32718822, 1),
 ('obby nothing gets me hot like o o', 0.32777935, 1),
 ('something for meth or something like that', 0.32828987, 1),
 ('i just love that it takes the video that long to get to the point',
  0.3287512,
  1),
 ("that looks more like it 's next to point", 0.32890695, 1),
 ('she really seems to be enjoying it', 0.33070856, 1),
 ('the one with the bear , i think', 0.33257347, 1),
 ('i think i had a seizure while reading this', 0.33272243, 1),
 ('funniest thing ive ever seen in my ent

In [279]:
mapped_prototype[0]

[('shrek , especially the first one', 0.5935609, 0),
 ("holy shit it 's already been a year \\?", 0.6024625, 0),
 ('brazilian in britain , probably both', 0.6050657, 0),
 ('the rifle from a regular store and my sister painted it', 0.60792977, 0),
 ('do you mind giving me the demo of that \\?', 0.60851675, 0),
 ('none of them can stop the time', 0.6088037, 0),
 ('anyone have a video of the incident \\?', 0.60930187, 0),
 ('one of the names is visible towards the bottom', 0.6107298, 0),
 ('give portland some love for once', 0.61119914, 0),
 ('make it twice as thick with a battery that lasts twice as long',
  0.6117877,
  0),
 ('he just got on a list by searching for that', 0.61233556, 0),
 ('16 is a worryingly high number', 0.6130593, 0),
 ("what 's the source for this \\?", 0.61373526, 0),
 ('hope you enjoy it other people are paying for it', 0.61425763, 0),
 ('did you even watch the video \\?', 0.6157763, 0),
 ('they were not going to gain anything anyway', 0.6159876, 0),
 ('the us cou

In [31]:
prototype_class_vec[:2,1]=1

In [32]:
prototype_class_vec[2:,0]=1

In [33]:
prototype_class_vec

array([[0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.]])

In [23]:
# Case 1: labels with valid indices, but K > N
labels = torch.tensor([0, 0, 0, 1, 0, 0, 1])  # 7 indices, all within range


In [24]:
selected_prototypes = prototype_class_vec[:, labels]

In [25]:
selected_prototypes

tensor([[0., 0., 0., 1., 0., 0., 1.],
        [0., 0., 0., 1., 0., 0., 1.],
        [1., 1., 1., 0., 1., 1., 0.],
        [1., 1., 1., 0., 1., 1., 0.]])

In [34]:
prototypes_of_correct_class = tf.gather(prototype_class_vec, labels, axis=1)


In [35]:
prototypes_of_correct_class 

<tf.Tensor: shape=(4, 7), dtype=float64, numpy=
array([[0., 0., 0., 1., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0., 1.],
       [1., 1., 1., 0., 1., 1., 0.],
       [1., 1., 1., 0., 1., 1., 0.]])>

In [None]:
prototypes_of_correct_class = tf.transpose(prototypes_of_correct_class)

# Step 3: Calculate prototypes of wrong class
prototypes_of_wrong_class = 1 - prototypes_of_correct_class

In [37]:
distances = torch.rand(4, 7)

In [38]:
distances

tensor([[0.9815, 0.3509, 0.6898, 0.0900, 0.0014, 0.8479, 0.1243],
        [0.7572, 0.7420, 0.4903, 0.2274, 0.6909, 0.9754, 0.2643],
        [0.6819, 0.6446, 0.6939, 0.3473, 0.2511, 0.8586, 0.8949],
        [0.2355, 0.7581, 0.0550, 0.3438, 0.3973, 0.6611, 0.3795]])

In [39]:
prototypes_of_correct_class * distances

<tf.Tensor: shape=(4, 7), dtype=float64, numpy=
array([[0.        , 0.        , 0.        , 0.09001052, 0.        ,
        0.        , 0.12433195],
       [0.        , 0.        , 0.        , 0.22741288, 0.        ,
        0.        , 0.26425809],
       [0.68193752, 0.64455527, 0.69392854, 0.        , 0.25112051,
        0.85856164, 0.        ],
       [0.2355209 , 0.75813454, 0.0550034 , 0.        , 0.39734125,
        0.66105431, 0.        ]])>