# BART Model

A transformer model combining BERT's text comprehension and GPT's text generation, ideal for summarization, translation, and question-answering tasks.

In [18]:
from transformers import BartTokenizer, BartForConditionalGeneration

# Initialize the BART tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')

# Preprocessing function for tokenization
def preprocess_function(examples):
    inputs = []
    targets = []

    # Iterate over the batched data
    for i in range(len(examples['context'])):
        context = examples['context'][i]  # Access each context in the batch
        question = examples['question'][i]  # Access each question in the batch
        answer = examples['answer'][i]  # Access each answer in the batch

        # Combine the context and question as input
        inputs.append(f"Context: {context} Question: {question}")
        targets.append(answer)  # Use the answer as the target

    # Tokenize the inputs (contexts + questions) with padding and truncation
    model_inputs = tokenizer(
        inputs, max_length=1024, truncation=True, padding='max_length'  # Ensure uniform length
    )

    # Tokenize the targets (answers) with padding and truncation
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            targets, max_length=128, truncation=True, padding='max_length'  # Ensure uniform length
        )

    # Fix the nested list issue by using 'input_ids' directly
    model_inputs['labels'] = labels['input_ids']
    return model_inputs




In [19]:
from datasets import load_dataset

# Load the dataset and ensure caching is disabled
dataset = load_dataset('json', data_files={'train': 'sb1.json'})
# Apply the preprocessing function with caching completely disabled
tokenized_datasets = dataset.map(
    preprocess_function, 
    batched=True, 
    load_from_cache_file=False, 
    keep_in_memory=True  # Ensure the dataset is processed in memory without cache files
)



Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/47 [00:00<?, ? examples/s]



In [20]:
from transformers import BartTokenizer, BartForConditionalGeneration

# Load the model and tokenizer from local paths
tokenizer = BartTokenizer.from_pretrained('bart-large')
model = BartForConditionalGeneration.from_pretrained('bart-large')




In [21]:
from transformers import Trainer, TrainingArguments

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',              # Output directory
    eval_strategy='epoch',         # Evaluate once per epoch
    learning_rate=5e-5,                  # Learning rate
    per_device_train_batch_size=4,       # Training batch size
    per_device_eval_batch_size=4,        # Evaluation batch size
    num_train_epochs=3,                  # Number of epochs
    weight_decay=0.01,                   # Weight decay
    save_steps=500,                      # Save every 500 steps
    logging_dir='./logs',                # Logging directory
    logging_steps=100                    # Log every 100 steps
)

# Initialize the Trainer with the model, tokenizer, and the training arguments
trainer = Trainer(
    model=model,                         # Use the model defined in your code
    args=training_args,                  # Use the training arguments defined above
    train_dataset=tokenized_datasets['train'],  # Use your tokenized training dataset
    eval_dataset=tokenized_datasets['train'],   # Evaluation dataset (could replace with a separate dataset)
    tokenizer=tokenizer                  # Use your tokenizer
)

# Start training the model
trainer.train()

# Save the fine-tuned model and tokenizer for future use
model.save_pretrained('./fine_tuned_bart_model')  # Save the model to the specified directory
tokenizer.save_pretrained('./fine_tuned_bart_model')  # Save the tokenizer

print("Training completed, and the fine-tuned BART model has been saved.")


Epoch,Training Loss,Validation Loss
1,No log,9.661262
2,No log,6.458136
3,No log,6.066216


Training completed, and the fine-tuned BART model has been saved.


Chat with your BART model! (type 'exit' to stop)


Context:  Which NFL team represented the AFC at Super Bowl 50?
Question:  Which NFL team represented the AFC at Super Bowl 50?


Answer: 



Context:  Superbowl
Question:  when is the superbowl?


Answer: 



Context:  exit
Question:  exit


Exiting chat.


In [5]:
from transformers import BartTokenizer, BartForConditionalGeneration

# Step 1: Load the fine-tuned BART model and tokenizer
model = BartForConditionalGeneration.from_pretrained('./fine_tuned_bart_model')
tokenizer = BartTokenizer.from_pretrained('./fine_tuned_bart_model')

# Step 2: Define a chat function with debugging
def chat_with_model():
    print("Chat with your BART model! (type 'exit' to stop)")

    while True:
        # Get user input
        context = input("Context: ")
        question = input("Question: ")
        
        if context.lower() == 'exit' or question.lower() == 'exit':
            print("Exiting chat.")
            break
        
        # Prepare the input for the model
        input_text = f"Context: {context} Question: {question}"
        # print(f"Input Text: {input_text}")  # Debugging to check input format
        inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True, padding=True)
        
        # Check if the inputs are tokenized correctly
        # print(f"Tokenized Input: {inputs}")  # Debugging to check tokenization

        # Generate the model's response
        outputs = model.generate(inputs['input_ids'], max_length=50, num_beams=5, early_stopping=True)
        
        # Decode and print the model's response
        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"Answer: {answer}\n")

# Step 3: Start the chat
chat_with_model()


Chat with your BART model! (type 'exit' to stop)


Context:  Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50
Question:  Which NFL team represented the AFC at Super Bowl 50?


Answer: Denver Broncos



Context:  Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.
Question:  which team represented AFC 


Answer: Denver Broncos



Context:  "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50."
Question:  What venue did Super Bowl 50 take place in?


Answer: Denver Broncos



Context:  Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50."
Question:  What venue did Super Bowl 50 take place in?


Answer: 



Context:  "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50."
Question:  What year did the Denver Broncos secure a Super Bowl title for the third time?"


Answer: Denver Broncos



Context:  "On May 21, 2013, NFL owners at their spring meetings in Boston voted and awarded the game to Levi's Stadium. The $1.2 billion stadium opened in 2014. It is the first Super Bowl held in the San Francisco Bay Area since Super Bowl XIX in 1985, and the first in California since Super Bowl XXXVII took place in San Diego in 2003."
Question:  Where did the spring meetings of the NFL owners take place?


Answer: 2014



Context:  exit
Question:  exit


Exiting chat.
