In [None]:
!pip install pyarrow
! pip install datasets evaluate
!pip install transformers==4.28.0
!pip install timm
!pip install thefuzz
!pip install dedupe
!pip install pandas_dedupe

In [21]:
from huggingface_hub import notebook_login
import pandas as pd
from datasets import Dataset as HFDataset
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from google.colab import drive
from transformers import AutoTokenizer
from transformers import DataCollatorWithPadding
import evaluate
import numpy as np
from thefuzz import fuzz
from thefuzz import process
import dedupe
import pandas_dedupe
import json
import warnings
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import Dataset, random_split
from torch.utils.data import ConcatDataset
from transformers import AdamW

In [3]:
complete_food_data = pd.read_excel('/content/data_bulk_raw.xlsx')

In [4]:
complete_food_data = complete_food_data.dropna(subset=['Food Product Category'])

In [5]:
columns_list = ['Product Type', 'Food Product Category']
renaming_mapping = {'Product Type': 'text', 'Food Product Category': 'labels'}
fpg_add_list =[]
fpg_rm_list =['turkey', 'bread grains & legumes']

columns_list1 = ['Product Type', 'Primary Food Product Category']
renaming_mapping1 = {'Product Type': 'text', 'Primary Food Product Category': 'labels'}
add_list1=['meat']
rm_list1=['meats', 'turkey']

columns_list2 = ['Product Type', 'Food Product Category', 'Basic Type']
renaming_mapping2 = {'Product Type': 'text', 'Food Product Category': 'food_category', 'Basic Type': 'labels'}
food_add_list =[]
food_rm_list =['turkey']




In [9]:
def preprocess_function(examples, max_length=128):
    """
    Preprocesses the input examples using the tokenizer with padding.

    Args:
        examples (dict): A dictionary containing the input examples.
        max_length (int): The maximum length to which sequences will be padded.

    Returns:
        dict: A dictionary containing the preprocessed examples.
    """
    # Tokenize and truncate the text data
    tokenized_inputs = tokenizer(
        examples["text"],
        truncation=True,
        max_length=max_length,
        padding="max_length",  # Pad to max_length
        return_tensors="pt"  # Return PyTorch tensors
    )

    return tokenized_inputs

def compute_metrics(eval_pred):
    """
    Computes the evaluation metrics based on the evaluation predictions.

    Args:
        eval_pred (tuple): A tuple containing the evaluation predictions and labels.

    Returns:
        dict: A dictionary containing the computed evaluation metrics.
    """
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

class CustomDataSet():

    def __init__(self, dataframe):
        '''
        attributes:
        self.data (dataframe)
        self.dataset_observations (int)
        self.matching_fucntion (function)
        self.match_threshold (int)
        self.labels_grouping (dict)
        self.labels_set (set)
        self.columns_list (list)
        self.renaming_mapping (dict)
        self.add_list (list)
        self.rm_list (list)
        self.label_correspondence (dict)
        self.reversed_label_correspondence (dict)
        self.label_mapping (???)
        self.id2label (json)
        self.label2id (json)
        '''
        self.data = dataframe
        self.dataset_observations = len(self.data)
        self.name = None

    def create_ss_column(self, new_column, extract_column):
        #complete_food_data[new_column] = complete_food_data[extract_column].str.lower().str.split(',').str[-1].str.strip().apply(lambda x: 'SS' if x == 'ss' else 'NA')
        complete_food_data[new_column] = complete_food_data[extract_column].apply(lambda x: 'SS' if isinstance(x, str) and (' ss' in x.lower() or ' ss ' in x.lower()) else 'NA')

    def create_basic_type_column(self, new_column, extract_column):
        self.data[new_column] = self.data[extract_column].str.lower().str.split(',').str[0].str.strip()

    def clean_data(self, columns_list: list, renaming_mapping: dict) -> pd.DataFrame:

        self.columns_list = columns_list
        self.renaming_mapping = renaming_mapping

        self.data = self.data[columns_list]
        self.data.rename(columns=renaming_mapping, inplace=True)

        for column in list(renaming_mapping.values()):
            self.data[column] = self.data[column].str.lower().str.replace(',', '').str.strip()
            self.data[column] = self.data[column].str.replace('prequalified: ', '')
            self.data.dropna(inplace=True)
            self.data.reset_index(drop=True, inplace=True)

        self.data = self.data.drop_duplicates().reset_index(drop=True)

    def group_labels(self, category: str, category_label: str, labels_column = 'labels', matching_function= fuzz.ratio, thresh = 75, topset = False) -> dict:

        self.matching_function = matching_function
        self.match_threshold = thresh

        #subset dataframe by category
        if category == None or category_label == None:
            column_name = self.data[[labels_column]]
        else:
            column_name = self.data[self.data[category] == category_label][[labels_column]]

        column_name['value_counts_column'] = column_name[labels_column].map(column_name[labels_column].value_counts())
        column_name = column_name.sort_values('value_counts_column', ascending=False).reset_index(drop=True)

        column_name = column_name[[labels_column]].values.flatten().tolist()
        print('number of non unique entries in column', len(column_name))

        #Create list of dictionaries of subgroups
        label_groups = {}
        for label_entry in column_name:
            counter = 0
            for label_group_key in label_groups.keys():
                if matching_function(label_entry, label_group_key) >= thresh:
                    counter += 1
                    if label_entry != label_group_key:
                        label_groups[label_group_key].append(label_entry)
            if counter == 0:
                label_groups[label_entry] = []

        if topset:
            return label_groups
        else:
            self.labels_grouping = label_groups

    def create_label_groupings_dictionary(self, topset_column_name='food_category'):
        self.label_groupings_dictionary = {}

        for category_label in self.data[topset_column_name].unique():
            self.label_groupings_dictionary[category_label] = self.group_labels(category=topset_column_name, category_label=category_label, topset=True)

    def clean_labels(self, add_list, rm_list, labels_to_correct ='food_category', inforich= False):

        def alter_food_set(labels_set, add_list, rm_list):
            print('number of elements in food set before additions', len(labels_set))
            labels_set.update(add_list)
            labels_set.difference_update(rm_list)

            print('number of elements in food set after additions', len(labels_set))

        def add_and_remove_keys_from_dict(input_dict, keys_to_add, keys_to_remove, default_value = []):
            for key in keys_to_remove:
                if key in input_dict:
                    del input_dict[key]

            for key in keys_to_add:
                if key not in input_dict:
                    input_dict[key] = default_value

        def find_best_match(misspelled_value, choices):
            best_match = process.extractOne(misspelled_value, choices)
            return best_match[0]

        self.add_list = add_list
        self.rm_list = rm_list
        self.labels_set = set(self.labels_grouping)

        alter_food_set(self.labels_set, self.add_list, self.rm_list)
        add_and_remove_keys_from_dict(self.labels_grouping, self.add_list, self.rm_list)

        self.data['corrected_labels'] = self.data['labels'].apply(lambda x: find_best_match(x, self.labels_set))

        if inforich:
            self.data[labels_to_correct] = self.data[labels_to_correct].apply(lambda x: find_best_match(x, self.labels_set))
            self.data = self.data[['text', 'food_category', 'labels']]
        else:
            self.data['corrected_labels'] = self.data['labels'].apply(lambda x: find_best_match(x, self.labels_set))
            self.data = self.data[['text', 'corrected_labels']]
            self.data.rename(columns={'corrected_labels': 'labels'}, inplace=True)

    def factorize_dataset(self):
        self.data['labels'], self.label_mapping = pd.factorize(self.data['labels'])
        label_correspondence = dict(zip(self.data['labels'].unique(), self.label_mapping))
        self.id2label = {int(key): value for key, value in label_correspondence.items()}
        self.label2id = {value: key for key, value in self.id2label.items()}

    def shuffle_dataset(self):
        self.data = shuffle(self.data)
        self.data.reset_index(drop=True, inplace=True)

def sweep_data(dataframe, add_list, rm_list, columns_list, renaming_mapping, category=None, category_label=None, thresh=85, match_function=fuzz.ratio, labels_column = 'labels', new=False):
    dataset = CustomDataSet(dataframe)

    if new == 'Basic Type':
        dataset.create_basic_type_column('Basic Type', 'Product Name')

    if new == 'Serving Size':
        dataset.create_ss_column('Serving Size', 'Product Name')

    dataset.clean_data(columns_list, renaming_mapping)
    dataset.group_labels(category=category ,category_label=category_label, thresh=thresh, matching_function=match_function, labels_column=labels_column)
    dataset.clean_labels(add_list, rm_list)
    dataset.factorize_dataset()
    return dataset

def create_food_dataset(dataframe=complete_food_data, columns_list=columns_list2, renaming_mapping=renaming_mapping2, add_list=food_add_list, rm_list = food_rm_list, category=None, category_label=None, thresh=85, match_function=fuzz.ratio, labels_column= 'food_category'):

    food_dataset = CustomDataSet(dataframe)
    food_dataset.clean_data(columns_list, renaming_mapping)
    food_dataset.group_labels(category=category ,category_label=category_label, thresh=thresh, matching_function=match_function, labels_column= labels_column)
    food_dataset.clean_labels(add_list, rm_list, inforich=True)

    return food_dataset

def create_datasets(food_dataset):
    DataSets_list = []
    DataSet2id = {}
    for index, name in enumerate(food_dataset.labels_set):
        dataset = CustomDataSet(food_dataset.data[food_dataset.data['food_category'] == list(food_dataset.labels_set)[index]])
        dataset.name = name
        dataset.data.name = name
        DataSets_list.append(dataset)
        DataSet2id[name] = index

    return DataSets_list, DataSet2id

def clean_datasets(DataSets_list, DataSet2id, total_add_list, total_rm_list, new):
    cleaned_datasets = []
    ClnDataSet2id = {}
    for index, dataset in enumerate(DataSets_list):
        test_columns_list = ['text', 'labels']
        test_renaming_mapping = {}
        cleaned_dataset = sweep_data(dataset.data, total_add_list[DataSet2id[dataset.name]], total_rm_list[DataSet2id[dataset.name]], test_columns_list, test_renaming_mapping, new)
        cleaned_dataset.name = dataset.name
        cleaned_dataset.data.name = dataset.name
        cleaned_datasets.append(cleaned_dataset)
        ClnDataSet2id[dataset.name] = index

    return cleaned_datasets, ClnDataSet2id



In [10]:
ss_dataset = CustomDataSet(complete_food_data)
ss_dataset.create_ss_column('Serving Size', 'Product Name')
ss_dataset.clean_data(['Product Type', 'Serving Size'], {'Product Type': 'text', 'Serving Size': 'labels'})
ss_dataset.factorize_dataset()
ss_dataset.shuffle_dataset()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data.rename(columns=renaming_mapping, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column] = self.data[column].str.lower().str.replace(',', '').str.strip()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column] = self.data[column].str.replace('prequalified: ', '')
A value is trying to be set on a copy of a slice f

In [11]:
fpg = sweep_data(complete_food_data, fpg_add_list, fpg_rm_list, columns_list, renaming_mapping, new='Basic Type')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data.rename(columns=renaming_mapping, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column] = self.data[column].str.lower().str.replace(',', '').str.strip()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column] = self.data[column].str.replace('prequalified: ', '')
A value is trying to be set on a copy of a slice f

number of non unique entries in column 80894
number of elements in food set before additions 27
number of elements in food set after additions 26


In [12]:
data_min_len = min(len(fpg.data), len(ss_dataset.data))
data_min_len

80894

Don't drop empty rows yet.

In [15]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [48]:
food_dataset = HFDataset.from_pandas(fpg.data.head(data_min_len))
food_dataset = food_dataset.shuffle()
food_dataset = food_dataset.train_test_split(test_size=0.15)
tokenized_fds = food_dataset.map(preprocess_function, batched=True)
tokenized_fds = tokenized_fds.remove_columns(['text'])
tokenized_fds.set_format("torch")

single_dataset = HFDataset.from_pandas(ss_dataset.data.head(data_min_len))
single_dataset = single_dataset.shuffle()
single_dataset = single_dataset.train_test_split(test_size=0.15)
tokenized_single = single_dataset.map(preprocess_function, batched=True)
tokenized_single = tokenized_single.remove_columns(['text'])
tokenized_single.set_format("torch")

Map:   0%|          | 0/68759 [00:00<?, ? examples/s]

Map:   0%|          | 0/12135 [00:00<?, ? examples/s]

Map:   0%|          | 0/68759 [00:00<?, ? examples/s]

Map:   0%|          | 0/12135 [00:00<?, ? examples/s]

In [31]:
len(tokenized_fds['train']) == len(tokenized_single['train'])


True

In [29]:
# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased', )

# Freeze the BERT encoder layers
for param in bert_model.parameters():
    param.requires_grad = False

# Define two separate classification heads
class ClassificationHead(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ClassificationHead, self).__init__()
        self.fc = nn.Linear(input_size, num_classes)

    def forward(self, x):
        return self.fc(x)

# Combine BERT encoder and classification heads
class BertWithTwoHeads(nn.Module):
    def __init__(self, bert_model, classification_head1, classification_head2):
        super(BertWithTwoHeads, self).__init__()
        self.bert = bert_model
        self.classification_head1 = classification_head1
        self.classification_head2 = classification_head2

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output  # Use the [CLS] token representation

        logits1 = self.classification_head1(pooled_output)
        logits2 = self.classification_head2(pooled_output)

        return logits1, logits2

# Define the number of classes for each classification task
fpg_num_classes = int(len(fpg.id2label))
ss_num_classes = int(len(ss_dataset.id2label))

# Create instances of the classification heads
fpg_classification_head = ClassificationHead(bert_model.config.hidden_size, fpg_num_classes)
ss_classification_head = ClassificationHead(bert_model.config.hidden_size, ss_num_classes)

# Create the final model
bert_with_two_heads = BertWithTwoHeads(bert_model, fpg_classification_head, ss_classification_head)

# Example input
input_text = "beef patty 2 oz ss"
input_ids = tokenizer.encode(input_text, add_special_tokens=True, padding='max_length', max_length=128, truncation=True, return_tensors='pt')
attention_mask = input_ids != tokenizer.pad_token_id

# Forward pass through the model
logits1, logits2 = bert_with_two_heads(input_ids, attention_mask)

print("Logits for Classification Head 1:", logits1)
print("Logits for Classification Head 2:", logits2)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Logits for Classification Head 1: tensor([[-0.3851,  0.4425,  0.1774, -0.3909, -0.1325, -0.0766, -0.5688,  0.3749,
          0.0725,  0.2132, -0.1242, -0.2107,  0.5079, -0.0280, -0.1563,  0.4236,
         -0.0584,  0.1004,  0.2153, -0.5839,  0.1580, -0.2079, -0.3218, -0.2741,
         -0.6136, -0.2210]], grad_fn=<AddmmBackward0>)
Logits for Classification Head 2: tensor([[-0.4837, -0.1625]], grad_fn=<AddmmBackward0>)


In [58]:
from transformers import BertModel, BertTokenizer
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import Dataset, random_split
from torch.utils.data import ConcatDataset
from transformers import AdamW

# Define your datasets
train_dataset1 = tokenized_fds['train']
train_dataset2 = tokenized_single['train']
val_dataset1 = tokenized_fds['test']
val_dataset2 = tokenized_single['test']

# # Calculate the number of dummy examples needed for padding the training datasets
# max_train_dataset_size = max(len(train_dataset1), len(train_dataset2))
# num_dummy_examples1 = max_train_dataset_size - len(train_dataset1)
# num_dummy_examples2 = max_train_dataset_size - len(train_dataset2)

# # Calculate the number of dummy examples needed for padding the validation datasets
# max_val_dataset_size = max(len(val_dataset1), len(val_dataset2))
# val_num_dummy_examples1 = max_val_dataset_size - len(val_dataset1)
# val_num_dummy_examples2 = max_val_dataset_size - len(val_dataset2)

# # Create a Dataset class for padding
# class PaddingDataset(Dataset):
#     def __init__(self, original_dataset, num_dummy_examples):
#         self.original_dataset = original_dataset
#         self.num_dummy_examples = num_dummy_examples

#     def __len__(self):
#         return self.num_dummy_examples

#     def __getitem__(self, idx):
#         # You can return any dummy example here as needed
#         # For simplicity, we'll return zeros for input_ids, attention_mask, and labels
#         return {'text': '0', 'token_type_ids': [0] * max_length, 'input_ids': [0] * max_length, 'attention_mask': [0] * max_length, 'labels': 0} ##### Check if this 0 causes issues later

# # Create padded datasets for training
# padded_train_dataset1 = ConcatDataset([train_dataset1, PaddingDataset(train_dataset1, num_dummy_examples1)])
# padded_train_dataset2 = ConcatDataset([train_dataset2, PaddingDataset(train_dataset2, num_dummy_examples2)])

# # Create padded datasets for validation
# padded_val_dataset1 = ConcatDataset([val_dataset1, PaddingDataset(val_dataset1, val_num_dummy_examples1)])
# padded_val_dataset2 = ConcatDataset([val_dataset2, PaddingDataset(val_dataset2, val_num_dummy_examples2)])

# # Create DataLoaders with the same batch size for training and validation
# batch_size = 32  # Set your desired batch size
# train_dataloader1 = DataLoader(padded_train_dataset1, batch_size=batch_size, shuffle=True)
# train_dataloader2 = DataLoader(padded_train_dataset2, batch_size=batch_size, shuffle=True)
# val_dataloader1 = DataLoader(padded_val_dataset1, batch_size=batch_size, shuffle=False)
# val_dataloader2 = DataLoader(padded_val_dataset2, batch_size=batch_size, shuffle=False)
max_length = 128

batch_size = 32
#data_collator = DataCollatorWithPadding(tokenizer, max_length=max_length, padding=True)
train_dataloader1 = DataLoader(train_dataset1, batch_size=batch_size, shuffle=True)
train_dataloader2 = DataLoader(train_dataset2, batch_size=batch_size, shuffle=True)
val_dataloader1 = DataLoader(val_dataset1, batch_size=batch_size, shuffle=True)
val_dataloader2 = DataLoader(val_dataset2, batch_size=batch_size, shuffle=True)

# Define the model and optimizer
model = bert_with_two_heads  # Your previously defined model
optimizer = AdamW(model.parameters(), lr=2e-5)  # You can adjust the learning rate

# Loss function for each classification task
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.CrossEntropyLoss()

# Training loop
num_epochs = 5  # You can adjust the number of epochs

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss1 = 0.0
    total_loss2 = 0.0

    for batch1, batch2 in zip(train_dataloader1, train_dataloader2):

          input_ids1 = batch1['input_ids'].to(device)
          attention_mask1 = batch1['attention_mask'].to(device)
          labels1 = batch1['labels'].to(device)

          input_ids2 = batch2['input_ids'].to(device)
          attention_mask2 = batch2['attention_mask'].to(device)
          labels2 = batch2['labels'].to(device)

          optimizer.zero_grad()

          logits1, logits2 = model(input_ids1, attention_mask1)

          loss1 = criterion1(logits1, labels1)
          loss2 = criterion2(logits2, labels2)

          total_loss1 += loss1.item()
          total_loss2 += loss2.item()

          loss = loss1 + loss2
          loss.backward()

          optimizer.step()

          # print('BATCH 1', batch1)
          # print('BATCH 2', batch2)

    avg_loss1 = total_loss1 / len(train_dataloader1)
    avg_loss2 = total_loss2 / len(train_dataloader2)

    print(f"Epoch {epoch + 1}/{num_epochs}")
    print(f"Classification 1 Loss: {avg_loss1:.4f}")
    print(f"Classification 2 Loss: {avg_loss2:.4f}")




Epoch 1/5
Classification 1 Loss: 2.4053
Classification 2 Loss: 0.3818
Epoch 2/5
Classification 1 Loss: 2.3894
Classification 2 Loss: 0.3817


KeyboardInterrupt: ignored

In [62]:
#Evaluation loop
model.eval()
with torch.no_grad():
    # Define evaluation metrics for each task
    correct1 = 0
    total1 = 0
    correct2 = 0
    total2 = 0

    for batch1, batch2 in zip(val_dataloader1, val_dataloader2):
        input_ids1 = batch1['input_ids'].to(device)
        attention_mask1 = batch1['attention_mask'].to(device)
        labels1 = batch1['labels'].to(device)

        input_ids2 = batch2['input_ids'].to(device)
        attention_mask2 = batch2['attention_mask'].to(device)
        labels2 = batch2['labels'].to(device)

        logits1, logits2 = model(input_ids1, attention_mask1)

        _, predicted1 = torch.max(logits1, 1)
        _, predicted2 = torch.max(logits2, 1)

        total1 += labels1.size(0)
        correct1 += (predicted1 == labels1).sum().item()

        total2 += labels2.size(0)
        correct2 += (predicted2 == labels2).sum().item()

    accuracy1 = correct1 / total1
    accuracy2 = correct2 / total2

    print(f"Classification 1 Accuracy: {accuracy1:.4f}")
    print(f"Classification 2 Accuracy: {accuracy2:.4f}")

Classification 1 Accuracy: 0.2966
Classification 2 Accuracy: 0.8689


In [63]:
model.parameters

<bound method Module.parameters of BertWithTwoHeads(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerN

In [73]:
from torch.nn.functional import softmax

# Example input
input_text = "Salmon single serve"
input_ids = tokenizer.encode(input_text, add_special_tokens=True, padding='max_length', max_length=128, truncation=True, return_tensors='pt').to(device)
attention_mask = (input_ids != tokenizer.pad_token_id).to(device)

# Forward pass through the model
logits1, logits2 = model(input_ids, attention_mask)

print("Logits for Classification Head 1:", logits1)
print("Logits for Classification Head 2:", logits2)

# Apply softmax to logits to get probabilities
probs1 = softmax(logits1, dim=1)
probs2 = softmax(logits2, dim=1)

# Get the predicted class for each head
_, predicted_class1 = torch.max(probs1, dim=1)
_, predicted_class2 = torch.max(probs2, dim=1)

# Print the predicted classes
print("Predicted Class for Classification Head 1:", predicted_class1.item())
print("Predicted Class for Classification Head 2:", predicted_class2.item())

Logits for Classification Head 1: tensor([[-0.1957,  0.3841,  1.5266, -0.0807,  0.7579, -0.3416, -4.2230,  0.2675,
         -0.7639,  0.5745,  0.7775, -3.3818, -1.1717, -1.0990, -1.7333, -2.0349,
         -1.5744, -0.9057, -1.3449, -2.6572, -1.7468, -1.6396, -1.0362, -0.0685,
         -2.4569, -3.8841]], device='cuda:0', grad_fn=<AddmmBackward0>)
Logits for Classification Head 2: tensor([[ 0.7970, -1.2909]], device='cuda:0', grad_fn=<AddmmBackward0>)
Predicted Class for Classification Head 1: 2
Predicted Class for Classification Head 2: 0


In [74]:
print(fpg.id2label[predicted_class1.item()])
print(ss_dataset.id2label[predicted_class2.item()])



condiments & snacks
na
