<a href="https://colab.research.google.com/github/vishalraut88/BERT_Models/blob/main/Huggingface_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch

In [None]:
import pandas as pd
from tqdm.notebook import tqdm
import numpy as np

In [None]:
df = pd.read_csv('smileannotationsfinal.csv',names=['id','text','category'])
df.set_index('id',inplace=True)

In [None]:
df=df[~df.category.str.contains('\|')]
df=df[df.category!='nocode']
df.category.value_counts()

happy           1137
not-relevant     214
angry             57
surprise          35
sad               32
disgust            6
Name: category, dtype: int64

In [None]:
label_dict = {
    'happy':0,
 'not-relevant':1,
 'angry':2,
 'surprise':3,
 'sad':4,
 'disgust':5
}

In [None]:
df["label"]=df.category.apply(lambda x: label_dict[x])

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_val,y_train,y_val=train_test_split(df.index.values,df["category"].values,test_size=0.15,random_state=17,stratify=df.label.values)

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.1-py3-none-any.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m95.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m108.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.2-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.2/199.2 KB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.2 tokenizers-0.13.2 transformers-4.27.1


**Loading tokenizer and encoding the data**

In [None]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',do_lower_case=True)  

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
train_encode = tokenizer.batch_encode_plus(df.loc[X_train].text.values,
                            padding=True,
                            add_special_tokens=True,
                            max_length=256,
                            return_tensors="pt",
                            return_attention_mask=True
                            )


val_encode = tokenizer.batch_encode_plus(df.loc[X_val].text.values,
                            padding=True,
                            add_special_tokens=True,
                            max_length=256,
                            return_tensors="pt",
                            return_attention_mask=True
                            )




In [None]:
from torch.utils.data.dataset import TensorDataset
input_ids = train_encode["input_ids"]
attention_mask = train_encode["attention_mask"]
label_id = torch.tensor(df.loc[X_train].label.values)

train_data = TensorDataset(input_ids,attention_mask,label_id)

input_ids = val_encode["input_ids"]
attention_mask = val_encode["attention_mask"]
label_id = torch.tensor(df.loc[X_val].label.values)

val_data = TensorDataset(input_ids,attention_mask,label_id)


Initialize the Bert model and change number of labels

In [None]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                        num_labels=len(label_dict),
                                                        output_attentions=False,
                                                        output_hidden_states=False

)

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Create Dataloaders for running in batches

In [None]:
from torch.utils.data import DataLoader,RandomSampler,SequentialSampler


dataloader_train = DataLoader(batch_size=4,
                              dataset=train_data,
                              sampler=RandomSampler(train_data)
                              )

dataloader_val = DataLoader(batch_size=32,
                              dataset=val_data  ,
                              sampler=RandomSampler(val_data)
                              )

Optimizers

In [None]:
from transformers import AdamW,get_linear_schedule_with_warmup

In [None]:
optimizer = AdamW(model.parameters(),
      lr=1e-5,
      eps=1e-8)


scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,
                                            num_training_steps=len(dataloader_train)* epochs )



Training loop

In [None]:
epochs=10
import random

seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(device)

cuda


In [None]:
epochs=10

In [None]:
for epoch in tqdm(range(1,epochs+1)):
  model.train()

  loss_train_total=0

  progress_bar =tqdm(dataloader_train,desc="Epoch"+str(epoch),
                     leave=False,
                     disable=False)
  
  for batch in progress_bar:
    model.zero_grad()

    batch=tuple(b.to(device) for b in batch)

    inputs={"input_ids":batch[0],
            "attention_mask":batch[1],
            "labels":batch[2]
            }

    outputs=model(**inputs)

    loss=outputs[0]

    loss_train_total+=loss.item()

    loss.backward()

    torch.nn.utils.clip_grad_norm(model.parameters(),1,0)

    optimizer.step()
    scheduler.step()

    progress_bar.set_postfix({'training_loss':'{:.3f}'.format(loss.item()/len(batch))})

  torch.save(model.state_dict(), f'Model bert_ft{epoch}.model')

  tqdm.write(f'\n Epoch {epoch}')

  loss_train_avg=loss_train_total/len(dataloader_train)  

  tqdm.write(f'Training loss:{loss_train_avg}')

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch1:   0%|          | 0/315 [00:00<?, ?it/s]

  torch.nn.utils.clip_grad_norm(model.parameters(),1,0)



 Epoch {epoch}
Training loss:0.7822221194467848


Epoch2:   0%|          | 0/315 [00:00<?, ?it/s]


 Epoch {epoch}
Training loss:0.42901212840917563


Epoch3:   0%|          | 0/315 [00:00<?, ?it/s]


 Epoch {epoch}
Training loss:0.2556466587094797


Epoch4:   0%|          | 0/315 [00:00<?, ?it/s]


 Epoch {epoch}
Training loss:0.1620724475247756


Epoch5:   0%|          | 0/315 [00:00<?, ?it/s]


 Epoch {epoch}
Training loss:0.10710600881674696


Epoch6:   0%|          | 0/315 [00:00<?, ?it/s]


 Epoch {epoch}
Training loss:0.07725795255678278


Epoch7:   0%|          | 0/315 [00:00<?, ?it/s]


 Epoch {epoch}
Training loss:0.05685019444171635


Epoch8:   0%|          | 0/315 [00:00<?, ?it/s]


 Epoch {epoch}
Training loss:0.04900992205292578


Epoch9:   0%|          | 0/315 [00:00<?, ?it/s]


 Epoch {epoch}
Training loss:0.04386781466725682


Epoch10:   0%|          | 0/315 [00:00<?, ?it/s]


 Epoch {epoch}
Training loss:0.042682409210367096
