In [2]:
import transformers 
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments,get_scheduler
from torch import nn, optim 
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
import os
import torch
from tqdm import tqdm
from accelerate import Accelerator

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model = AutoModelForCausalLM.from_pretrained("heegyu/gpt2-emotion")

In [4]:
tokenizer = AutoTokenizer.from_pretrained('heegyu/gpt2-emotion')

In [5]:
class movRevDataSet(Dataset):
    def __init__(self,root_dir,transform = None):
        self.root_dir = root_dir 
        self.transform = transform
        self.file_list = os.listdir(root_dir)
    def __len__(self):
        return len(self.file_list)
    def __getitem__(self, idx):
        file_name = self.file_list[idx]
        file_path = os.path.join(self.root_dir, file_name)
        with open(file_path, 'r', encoding = 'utf-8') as file:
            txt = file.read()
#         label = int(file_name.split('_')[1].split('.')[0])
        return txt

In [6]:
trainDataRoot = './aclImdb/train/merged'
testDataRoot = './aclImdb/test/merged'

In [7]:
movRevTrainDS = movRevDataSet(root_dir=trainDataRoot)
movRevTrainDS, movRevValDS = torch.utils.data.random_split(movRevTrainDS,[0.8,0.2])
movRevTestDS = movRevDataSet(root_dir= testDataRoot)

In [8]:
movRevTrainDL= DataLoader(movRevTrainDS, batch_size = 8, shuffle= True, )
movRevValDL = DataLoader(movRevValDS, batch_size =8, shuffle = True, )
movRevTestDL = DataLoader(movRevTestDS, batch_size = 8,)

In [9]:
for i, (data) in enumerate(movRevTrainDL):
        print(f"Sample {i+1}:")
        print(data)
#         print("Label:", label)
        if i == 4:
            break

Sample 1:
['A missed train. A wrong phone number. An extra cup of coffee. What happens to those around you when you make a seemingly innocuous decision? Most people don\'t give it a thought as they absorbed in their own thoughts and actions.<br /><br />"Happenstance" tells the story of the interrelations and cause-and-effect of the mundane as it pertains to a group of normal Parisian folk. It has all the components of what passes for contemporary theater, with the full cast of the dysfunctional and disillusioned.<br /><br />There\'s a cheating husband, an illegal immigrant, a classic slacker, a pickpocket, a crazy grandmother, an annoying girlfriend, a selfish roommate, and a homeless man. Audrey Tautou serves as the erstwhile protagonist (in the sense that she\'s on camera as much as anyone else and opens and closes the film) and normal girl who just can\'t seem to find the right rhythm in her life.<br /><br />She learns at the beginning of her day from a stranger on a train what her 

In [10]:
optimizer = AdamW(model.parameters(),lr=2e-5)
num_epochs = 30
num_training_steps = num_epochs * len(movRevTrainDL)
lr_scheduler = get_scheduler(
    "linear",
    optimizer = optimizer,
    num_warmup_steps=1000,
    num_training_steps=num_training_steps
)

In [11]:
progress_bar = tqdm(range(num_training_steps))

  0%|                                                 | 0/75000 [00:00<?, ?it/s]

In [12]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
model.to(device)
print(model.device)

mps:0


In [13]:
model.train()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [18]:
pad_idx = tokenizer.pad_token_id
criterion = nn.CrossEntropyLoss(ignore_index = pad_idx)

def loss_func(model, DL, criterion, optimizer = None, scheduler = None):
    N = len(DL.dataset)
    rloss = 0 
    for texts in tqdm(DL, leave=False):
        texts = [s for s in texts]
        batch_tokenized = tokenizer(texts ,max_length=1024,padding="max_length", truncation = True, return_tensors='pt').input_ids.to(device)
        batch_tokenized[:, -1] = 50256
        # print(batch_tokenized)
        output = model(batch_tokenized[:,:-1])
        logit= outputs.logits
        predicted_token_ids = torch.argmax(logits, dim = -1)
        print(predicted_token_ids)
        

loss_func(model, movRevTrainDL, criterion)
        




        
    


  0%|                                                  | 0/2500 [00:00<?, ?it/s][A
                                                                                [A

RuntimeError: MPS backend out of memory (MPS allocated: 81.29 GB, other allocations: 174.73 MB, max allowed: 81.60 GB). Tried to allocate 383.25 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [None]:

for epoch in range(num_epochs):
    for batch in movRevTrainDL:
        input_tokens = batch[0].to(device)
        attention_mask = batch[1].to(device)
        outputs = model(input_tokens, attention_mask=attention_mask)
        print(outputs)

        