In [None]:
pip install pandas



In [None]:
pip install transformers

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m52.1 MB/s[0m eta [36m0:00:00[0m
Col

In [None]:
import pandas as pd
from transformers import DistilBertForQuestionAnswering, DistilBertTokenizer, pipeline
from collections import Counter  # Import the Counter class

# Load the SQuAD dataset from Hugging Face
# You can also create a custom dataset using your CSV files
# dataset = load_dataset("squad")

# Create dataframes from your train, test, and validation CSV files
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
validation_df = pd.read_csv("validation.csv")

# Load a pre-trained DistilBERT model and tokenizer for question-answering
model_name = "distilbert-base-cased-distilled-squad"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForQuestionAnswering.from_pretrained(model_name)

# Define a function to evaluate the model
question_answering = pipeline("question-answering", model=model, tokenizer=tokenizer)

def evaluate_model(model, eval_df):
    total_f1 = 0.0
    total_em = 0.0
    total_examples = 0

    for index, row in eval_df.iterrows():
        question = row['question']
        answer = row['answer_text']

        # Use the model to answer the question
        prediction = question_answering(question=question, context="How height is David Luiz?")

        # Calculate F1 and EM scores
        f1 = squad_f1(prediction['answer'], answer)
        em = squad_em(prediction['answer'], answer)

        total_f1 += f1
        total_em += em
        total_examples += 1

    avg_f1 = total_f1 / total_examples
    avg_em = total_em / total_examples

    print(f"Average F1 Score: {avg_f1}")
    print(f"Average EM Score: {avg_em}")

def squad_f1(pred, truth):
    # Calculate F1 score
    common = Counter(pred.split()) & Counter(truth.split())
    num_common = sum(common.values())
    if num_common == 0:
        return 0
    precision = 1.0 * num_common / len(pred.split())
    recall = 1.0 * num_common / len(truth.split())
    f1 = (2 * precision * recall) / (precision + recall)
    return f1

def squad_em(pred, truth):
    # Calculate EM (Exact Match) score
    return int(pred == truth)

# Evaluate the fine-tuned model on the test and validation data
print("Testing the model on the test dataset:")
evaluate_model(model, test_df)

print("Testing the model on the validation dataset:")
evaluate_model(model, validation_df)

# Save the model for later use
model.save_pretrained("custom_qa_model")
tokenizer.save_pretrained("custom_qa_model")


Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

Testing the model on the test dataset:
Average F1 Score: 0.0006535947712418301
Average EM Score: 0.0
Testing the model on the validation dataset:
Average F1 Score: 0.0017094017094017096
Average EM Score: 0.0


('custom_qa_model/tokenizer_config.json',
 'custom_qa_model/special_tokens_map.json',
 'custom_qa_model/vocab.txt',
 'custom_qa_model/added_tokens.json')