In [2]:

# import torch
# print(f"CUDA available: {torch.cuda.is_available()}")
# print(f"Number of GPUs: {torch.cuda.device_count()}")
# print(f"Current GPU: {torch.cuda.current_device()}")
# print(f"GPU Name: {torch.cuda.get_device_name(0)}")
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import torch
torch.cuda.empty_cache()
print(os.getenv("PYTORCH_CUDA_ALLOC_CONF"))


expandable_segments:True


In [3]:
%pwd

'd:\\Text_summarization\\notebook'

In [4]:
os.chdir("../")
%pwd

'd:\\Text_summarization'

In [5]:
# !pip install accelerate
# !pip install transformers accelerate
import accelerate
print(accelerate.__version__)

  from .autonotebook import tqdm as notebook_tqdm


1.6.0


In [6]:
%pwd
# !pip install --upgrade transformers
# !pip uninstall transformers



'd:\\Text_summarization'

In [7]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_ckpt: Path
    num_train_epochs: int
    warmup_steps: int
    per_device_train_batch_size: int
    weight_decay: float
    logging_steps: int
    eval_steps: int # added
    evaluation_strategy: str # added
    save_steps: int # changed from float to int
    gradient_accumulation_steps: int

In [8]:

from src.TEXT_SUMMARIZATION.constants import *
from src.TEXT_SUMMARIZATION.common import read_yaml, create_directories

In [9]:

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.TrainingArguments

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig( root_dir=config.root_dir, data_path=config.data_path,
                            model_ckpt=config.model_ckpt,
                            num_train_epochs=params.num_train_epochs, 
                            warmup_steps=params.warmup_steps, 
                            per_device_train_batch_size=params.per_device_train_batch_size,
                            weight_decay=params.weight_decay, 
                            logging_steps=params.logging_steps, 
                            eval_steps=params.eval_steps, 
                            evaluation_strategy=params.evaluation_strategy, 
                            save_steps=params.save_steps, 
                            gradient_accumulation_steps=params.gradient_accumulation_steps)
                         

        return model_trainer_config

In [10]:
# !pip uninstall transformers
# !pip install transformers

from transformers import TrainingArguments,Trainer
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset,load_from_disk

In [11]:
class ModelTrainer:
    def __init__(self,config:ModelTrainerConfig):
        self.config = config

    def train(self):
        import gc
        device =  "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.config.model_ckpt)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_ckpt).to(device)
        seq2seq_data_collator = DataCollatorForSeq2Seq(tokenizer, model=model_pegasus)
        
        #loading data 
        dataset_samsum_pt = load_from_disk(self.config.data_path)

        # trainer_args = TrainingArguments(
        #     output_dir=self.config.root_dir, num_train_epochs=self.config.num_train_epochs, warmup_steps=self.config.warmup_steps,
        #     per_device_train_batch_size=self.config.per_device_train_batch_size, per_device_eval_batch_size=self.config.per_device_train_batch_size,
        #     weight_decay=self.config.weight_decay, logging_steps=self.config.logging_steps,
        #     evaluation_strategy=self.config.evaluation_strategy, eval_steps=self.config.eval_steps, save_steps=1e6,
        #     gradient_accumulation_steps=self.config.gradient_accumulation_steps
        # ) 


        trainer_args = TrainingArguments( 
                output_dir=self.config.root_dir, 
                num_train_epochs=self.config.num_train_epochs,
                warmup_steps=self.config.warmup_steps, 
                per_device_train_batch_size=self.config.per_device_train_batch_size,
                weight_decay=self.config.weight_decay, 
                logging_steps=self.config.logging_steps, 
                evaluation_strategy=self.config.evaluation_strategy, 
                eval_steps=self.config.eval_steps, 
                save_steps=self.config.save_steps, 
                gradient_accumulation_steps=self.config.gradient_accumulation_steps,
                fp16 = True)


        trainer = Trainer(model=model_pegasus, args=trainer_args,
                  tokenizer=tokenizer, data_collator=seq2seq_data_collator,
                  train_dataset=dataset_samsum_pt["train"], 
                  eval_dataset=dataset_samsum_pt["validation"])
        
        trainer.train()

        ## Save model
        model_pegasus.save_pretrained(os.path.join(self.config.root_dir,"pegasus-samsum-model"))
        ## Save tokenizer
        tokenizer.save_pretrained(os.path.join(self.config.root_dir,"tokenizer"))

In [12]:
from src.TEXT_SUMMARIZATION.exception import CustomException
import sys
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config = model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise CustomException(e,sys)

  return t.to(
  0%|          | 0/613 [00:00<?, ?it/s]You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  2%|▏         | 10/613 [00:47<45:55,  4.57s/it] 

{'loss': 3.2334, 'learning_rate': 9e-07, 'epoch': 0.02}


  3%|▎         | 20/613 [01:29<37:42,  3.82s/it]

{'loss': 3.252, 'learning_rate': 1.9e-06, 'epoch': 0.03}


  5%|▍         | 30/613 [02:16<45:58,  4.73s/it]

{'loss': 3.2067, 'learning_rate': 2.9e-06, 'epoch': 0.05}


  7%|▋         | 40/613 [02:57<40:48,  4.27s/it]

{'loss': 3.1919, 'learning_rate': 3.8e-06, 'epoch': 0.07}


  8%|▊         | 50/613 [03:40<37:53,  4.04s/it]

{'loss': 3.1515, 'learning_rate': 4.800000000000001e-06, 'epoch': 0.08}


 10%|▉         | 60/613 [04:20<38:28,  4.17s/it]

{'loss': 3.1133, 'learning_rate': 5.8e-06, 'epoch': 0.1}


 11%|█▏        | 70/613 [05:04<39:34,  4.37s/it]

{'loss': 3.0521, 'learning_rate': 6.800000000000001e-06, 'epoch': 0.11}


 13%|█▎        | 80/613 [05:46<35:41,  4.02s/it]

{'loss': 2.9431, 'learning_rate': 7.7e-06, 'epoch': 0.13}


 15%|█▍        | 90/613 [06:29<39:30,  4.53s/it]

{'loss': 2.8766, 'learning_rate': 8.7e-06, 'epoch': 0.15}


 16%|█▋        | 100/613 [07:15<38:39,  4.52s/it]

{'loss': 2.7153, 'learning_rate': 9.7e-06, 'epoch': 0.16}


 18%|█▊        | 110/613 [07:58<35:47,  4.27s/it]

{'loss': 2.7205, 'learning_rate': 1.0700000000000001e-05, 'epoch': 0.18}


 20%|█▉        | 120/613 [08:43<37:12,  4.53s/it]

{'loss': 2.7164, 'learning_rate': 1.1700000000000001e-05, 'epoch': 0.2}


 21%|██        | 130/613 [09:25<37:21,  4.64s/it]

{'loss': 2.5685, 'learning_rate': 1.27e-05, 'epoch': 0.21}


 23%|██▎       | 140/613 [10:07<31:08,  3.95s/it]

{'loss': 2.5047, 'learning_rate': 1.3700000000000001e-05, 'epoch': 0.23}


 24%|██▍       | 150/613 [10:50<32:47,  4.25s/it]

{'loss': 2.4804, 'learning_rate': 1.47e-05, 'epoch': 0.24}


 26%|██▌       | 160/613 [11:34<34:08,  4.52s/it]

{'loss': 2.5234, 'learning_rate': 1.5700000000000002e-05, 'epoch': 0.26}


 28%|██▊       | 170/613 [12:18<34:39,  4.69s/it]

{'loss': 2.4699, 'learning_rate': 1.6700000000000003e-05, 'epoch': 0.28}


 29%|██▉       | 180/613 [13:02<30:48,  4.27s/it]

{'loss': 2.4318, 'learning_rate': 1.77e-05, 'epoch': 0.29}


 31%|███       | 190/613 [13:48<31:42,  4.50s/it]

{'loss': 2.3957, 'learning_rate': 1.87e-05, 'epoch': 0.31}


 33%|███▎      | 200/613 [14:35<30:52,  4.49s/it]

{'loss': 2.2954, 'learning_rate': 1.97e-05, 'epoch': 0.33}


 34%|███▍      | 210/613 [15:19<28:23,  4.23s/it]

{'loss': 2.3249, 'learning_rate': 2.07e-05, 'epoch': 0.34}


 36%|███▌      | 220/613 [16:00<26:57,  4.12s/it]

{'loss': 2.3436, 'learning_rate': 2.1700000000000002e-05, 'epoch': 0.36}


 38%|███▊      | 230/613 [16:44<28:49,  4.52s/it]

{'loss': 2.2849, 'learning_rate': 2.2700000000000003e-05, 'epoch': 0.37}


 39%|███▉      | 240/613 [17:27<25:58,  4.18s/it]

{'loss': 2.2456, 'learning_rate': 2.37e-05, 'epoch': 0.39}


 41%|████      | 250/613 [18:11<28:48,  4.76s/it]

{'loss': 2.2613, 'learning_rate': 2.47e-05, 'epoch': 0.41}


 42%|████▏     | 260/613 [18:54<24:39,  4.19s/it]

{'loss': 2.3257, 'learning_rate': 2.57e-05, 'epoch': 0.42}


 44%|████▍     | 270/613 [19:35<24:28,  4.28s/it]

{'loss': 2.1215, 'learning_rate': 2.6700000000000002e-05, 'epoch': 0.44}


 46%|████▌     | 280/613 [20:18<26:01,  4.69s/it]

{'loss': 2.2589, 'learning_rate': 2.7700000000000002e-05, 'epoch': 0.46}


 47%|████▋     | 290/613 [21:10<27:30,  5.11s/it]

{'loss': 2.1684, 'learning_rate': 2.87e-05, 'epoch': 0.47}


 49%|████▉     | 300/613 [22:06<30:28,  5.84s/it]

{'loss': 2.203, 'learning_rate': 2.97e-05, 'epoch': 0.49}



 49%|████▉     | 300/613 [23:21<30:28,  5.84s/it]

{'eval_loss': 2.0000059604644775, 'eval_runtime': 74.5382, 'eval_samples_per_second': 10.974, 'eval_steps_per_second': 1.382, 'epoch': 0.49}


 51%|█████     | 310/613 [24:09<29:26,  5.83s/it]  

{'loss': 2.1843, 'learning_rate': 3.07e-05, 'epoch': 0.5}


 52%|█████▏    | 320/613 [25:03<25:12,  5.16s/it]

{'loss': 2.1576, 'learning_rate': 3.1700000000000005e-05, 'epoch': 0.52}


 54%|█████▍    | 330/613 [25:55<25:57,  5.50s/it]

{'loss': 2.1354, 'learning_rate': 3.27e-05, 'epoch': 0.54}


 55%|█████▌    | 340/613 [26:43<21:11,  4.66s/it]

{'loss': 2.1942, 'learning_rate': 3.3700000000000006e-05, 'epoch': 0.55}


 57%|█████▋    | 350/613 [27:23<17:41,  4.04s/it]

{'loss': 2.2168, 'learning_rate': 3.4699999999999996e-05, 'epoch': 0.57}


 59%|█████▊    | 360/613 [28:09<18:43,  4.44s/it]

{'loss': 2.1697, 'learning_rate': 3.57e-05, 'epoch': 0.59}


 60%|██████    | 370/613 [28:50<16:12,  4.00s/it]

{'loss': 2.197, 'learning_rate': 3.6700000000000004e-05, 'epoch': 0.6}


 62%|██████▏   | 380/613 [29:35<16:21,  4.21s/it]

{'loss': 2.1611, 'learning_rate': 3.77e-05, 'epoch': 0.62}


 64%|██████▎   | 390/613 [30:21<15:12,  4.09s/it]

{'loss': 2.2073, 'learning_rate': 3.8700000000000006e-05, 'epoch': 0.64}


 65%|██████▌   | 400/613 [31:03<14:56,  4.21s/it]

{'loss': 2.2156, 'learning_rate': 3.97e-05, 'epoch': 0.65}


 67%|██████▋   | 410/613 [31:45<14:04,  4.16s/it]

{'loss': 2.2005, 'learning_rate': 4.07e-05, 'epoch': 0.67}


 69%|██████▊   | 420/613 [32:30<13:43,  4.27s/it]

{'loss': 2.1527, 'learning_rate': 4.17e-05, 'epoch': 0.68}


 70%|███████   | 430/613 [33:12<11:55,  3.91s/it]

{'loss': 2.178, 'learning_rate': 4.27e-05, 'epoch': 0.7}


 72%|███████▏  | 440/613 [33:52<12:07,  4.21s/it]

{'loss': 2.1053, 'learning_rate': 4.3700000000000005e-05, 'epoch': 0.72}


 73%|███████▎  | 450/613 [34:33<10:17,  3.79s/it]

{'loss': 2.1065, 'learning_rate': 4.47e-05, 'epoch': 0.73}


 75%|███████▌  | 460/613 [35:17<12:13,  4.80s/it]

{'loss': 2.0824, 'learning_rate': 4.5700000000000006e-05, 'epoch': 0.75}


 77%|███████▋  | 470/613 [36:03<10:44,  4.51s/it]

{'loss': 2.1374, 'learning_rate': 4.6700000000000003e-05, 'epoch': 0.77}


 78%|███████▊  | 480/613 [36:46<09:36,  4.33s/it]

{'loss': 2.1301, 'learning_rate': 4.77e-05, 'epoch': 0.78}


 80%|███████▉  | 490/613 [37:30<09:06,  4.44s/it]

{'loss': 2.0638, 'learning_rate': 4.87e-05, 'epoch': 0.8}


 82%|████████▏ | 500/613 [38:15<08:25,  4.48s/it]

{'loss': 2.1168, 'learning_rate': 4.97e-05, 'epoch': 0.81}


 83%|████████▎ | 510/613 [38:54<06:48,  3.96s/it]

{'loss': 2.1537, 'learning_rate': 4.690265486725664e-05, 'epoch': 0.83}


 85%|████████▍ | 520/613 [39:41<07:07,  4.60s/it]

{'loss': 2.095, 'learning_rate': 4.247787610619469e-05, 'epoch': 0.85}


 86%|████████▋ | 530/613 [40:24<06:06,  4.42s/it]

{'loss': 2.0705, 'learning_rate': 3.8053097345132744e-05, 'epoch': 0.86}


 88%|████████▊ | 540/613 [41:08<05:30,  4.52s/it]

{'loss': 2.093, 'learning_rate': 3.3628318584070804e-05, 'epoch': 0.88}


 90%|████████▉ | 550/613 [41:52<04:15,  4.05s/it]

{'loss': 2.1517, 'learning_rate': 2.9203539823008852e-05, 'epoch': 0.9}


 91%|█████████▏| 560/613 [42:35<03:58,  4.50s/it]

{'loss': 2.0854, 'learning_rate': 2.4778761061946905e-05, 'epoch': 0.91}


 93%|█████████▎| 570/613 [43:17<02:59,  4.16s/it]

{'loss': 2.084, 'learning_rate': 2.0353982300884957e-05, 'epoch': 0.93}


 95%|█████████▍| 580/613 [43:58<02:06,  3.84s/it]

{'loss': 2.0964, 'learning_rate': 1.592920353982301e-05, 'epoch': 0.94}


 96%|█████████▌| 590/613 [44:39<01:31,  3.98s/it]

{'loss': 2.0728, 'learning_rate': 1.1504424778761062e-05, 'epoch': 0.96}


 98%|█████████▊| 600/613 [45:23<01:01,  4.70s/it]

{'loss': 2.0927, 'learning_rate': 7.079646017699115e-06, 'epoch': 0.98}


                                                 
 98%|█████████▊| 600/613 [46:22<01:01,  4.70s/it]

{'eval_loss': 1.8796818256378174, 'eval_runtime': 59.1282, 'eval_samples_per_second': 13.834, 'eval_steps_per_second': 1.742, 'epoch': 0.98}


100%|█████████▉| 610/613 [47:07<00:16,  5.44s/it]

{'loss': 2.1003, 'learning_rate': 2.6548672566371683e-06, 'epoch': 0.99}


100%|██████████| 613/613 [47:19<00:00,  4.63s/it]


{'train_runtime': 2839.3783, 'train_samples_per_second': 5.188, 'train_steps_per_second': 0.216, 'train_loss': 2.368479538119442, 'epoch': 1.0}


In [12]:
print(torch.__file__)


d:\Text_summarization\myenv\lib\site-packages\torch\__init__.py
