In [7]:
import requests
from bs4 import BeautifulSoup, NavigableString
import re
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
from datasets import load_dataset
import tkinter as tk
from tkinter import messagebox
from tabulate import tabulate

In [8]:
def get_paragraph_text(p):
    paragraph_text = ''
    for tag in p.children:
        if isinstance(tag, NavigableString):
            paragraph_text += tag.string
        else:
            paragraph_text += tag.text
    
    return paragraph_text

In [9]:
def get_wiki_extract(url):
    page = requests.get(url)    
    soup = BeautifulSoup(page.content , 'html.parser')
    headers = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
    wiki_extract = []
    for tag in soup.find_all():
        if tag.name in headers and tag.text != 'Contents':
            p = ''
            for ne in tag.next_elements:
                if ne.name == 'p':
                    p = p + get_paragraph_text(ne)
                if ne.name in headers:
                    break
            if p != '':
                section = [re.sub(r'\[\d+\]|\[edit\]', '', tag.text), tag.name, re.sub(r'\[\d+\]|\[edit\]', '', p)]
                wiki_extract.append(section)
    return wiki_extract

In [10]:
def get_final_answer(question, url):
    answers = get_answers(question, url)
    df = pd.DataFrame(answers)
    n_sections = len(df)
    if n_sections <= 1:
        df_answers = df.nlargest(n_sections, 'score')
    else:
        df_answers = df.nlargest(1, 'score')

    for index, row in df_answers.iterrows():
        return row['answer']

In [13]:
def get_answers(question, url):
    #model_name = "deepset/roberta-base-squad2"
    model_name = "PremalMatalia/roberta-base-best-squad2"
    #model_name = "deepset/deberta-v3-large-squad2"
    #model = AutoModelForQuestionAnswering.from_pretrained("./models/bert/bbu_squad2") 
    question_answerer = pipeline("question-answering", model=model_name)
    wiki_extract = get_wiki_extract(url)    
    answers = []
    for section in wiki_extract:
        result = question_answerer(question=question, context=section[2])
        answer = {'title': section[0], 'title_tag': section[1], 'paragraph': section[2], **result }
        answers.append(answer)
    return answers

In [14]:
url = "https://en.wikipedia.org/wiki/Fox"
#wiki_extract = get_wiki_extract(url)
#print(wiki_extract)
question = "What does the fox say?"
answer = get_final_answer(question=question,url=url)
print(answer)

config.json: 100%|██████████| 740/740 [00:00<?, ?B/s] 
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
pytorch_model.bin: 100%|██████████| 496M/496M [00:29<00:00, 16.7MB/s] 
  return self.fget.__get__(instance, owner)()
tokenizer_config.json: 100%|██████████| 1.40k/1.40k [00:00<?, ?B/s]
vocab.json: 100%|██████████| 798k/798k [00:00<00:00, 2.66MB/s]
merges.txt: 100%|██████████| 456k/456k [00:00<00:00, 3.19MB/s]
tokenizer.json: 100%|██████████| 1.36M/1.36M [00:00<00:00, 4.07MB/s]
special_tokens_map.json: 100%|██████████| 772/772 [00:00<?, ?B/s] 


whining


In [None]:
class WikipediaQA:
    def __init__(self, master):
        self.master = master
        self.master.title("Wikipedia Q&A")

        self.mode = tk.StringVar(value='light')
        self.create_widgets()

    def create_widgets(self):
        self.url_label = tk.Label(self.master, text="Wikipedia URL:")
        self.url_label.pack()
        self.url_entry = tk.Entry(self.master, width=50)
        self.url_entry.pack()

        self.question_label = tk.Label(self.master, text="Question:")
        self.question_label.pack()
        self.question_entry = tk.Entry(self.master, width=50)
        self.question_entry.pack()

        self.output_label = tk.Label(self.master, text="Answer:")
        self.output_label.pack()
        self.output_text = tk.Text(self.master, height=5, width=50, state=tk.DISABLED)
        self.output_text.pack()

        self.get_answer_button = tk.Button(self.master, text="Get Answer", command=self.get_answer)
        self.get_answer_button.pack()

        self.mode_button = tk.Button(self.master, text="Switch to Dark Mode", command=self.toggle_mode)
        self.mode_button.pack()
        
        self.url_label.pack(padx=10, pady=10)
        self.url_entry.pack(padx=10, pady=10)
        self.question_label.pack(padx=10, pady=10)
        self.question_entry.pack(padx=10, pady=10)
        self.output_label.pack(padx=10, pady=10)
        self.output_text.pack(padx=10, pady=10)
        self.get_answer_button.pack(padx=10, pady=10)
        self.mode_button.pack(padx=10, pady=10)


    def get_answer(self):
        url = self.url_entry.get()
        question = self.question_entry.get()

        if not url or not question:
            messagebox.showerror("Error", "Please enter both a Wikipedia URL and a question.")
            return

        match = re.search(r"en\.wikipedia\.org\/wiki\/(.+)", url)
        if not match:
            messagebox.showerror("Error", "Please enter a valid Wikipedia URL.")
            return

        try:
            answer = get_final_answer(question, url)
            self.output_text.configure(state=tk.NORMAL)
            self.output_text.delete("1.0", tk.END)
            self.output_text.insert(tk.END, answer)
            self.output_text.configure(state=tk.DISABLED) 
        except:
            messagebox.showerror("Error", "An error occurred. Please try again.")

    def toggle_mode(self):
        if self.mode.get() == 'light':
            self.mode.set('dark')
            self.master.configure(background='black')
            self.url_label.configure(background='black', foreground='white')
            self.question_label.configure(background='black', foreground='white')
            self.output_label.configure(background='black', foreground='white')
            self.output_text.configure(background='black', foreground='white')
            self.get_answer_button.configure(background='white', foreground='black')
            self.mode_button.configure(text='Switch to Light Mode')
        else:
            self.mode.set('light')
            self.master.configure(background='white')
            self.url_label.configure(background='white', foreground='black')
            self.question_label.configure(background='white', foreground='black')
            self.output_label.configure(background='white', foreground='black')
            self.output_text.configure(background='white', foreground='black')
            self.get_answer_button.configure(background='black', foreground='white')
            self.mode_button.configure(text='Switch to Dark Mode')

In [None]:
root = tk.Tk()
app = WikipediaQA(root)
root.mainloop()

In [10]:
dataset = load_dataset("squad_v2",split="validation")

In [11]:
df = pd.DataFrame(dataset)

In [12]:
df = df.drop("id", axis=1)
df = df.drop("context",axis=1)
df

Unnamed: 0,title,question,answers
0,Normans,In what country is Normandy located?,"{'text': ['France', 'France', 'France', 'Franc..."
1,Normans,When were the Normans in Normandy?,"{'text': ['10th and 11th centuries', 'in the 1..."
2,Normans,From which countries did the Norse originate?,"{'text': ['Denmark, Iceland and Norway', 'Denm..."
3,Normans,Who was the Norse leader?,"{'text': ['Rollo', 'Rollo', 'Rollo', 'Rollo'],..."
4,Normans,What century did the Normans first gain their ...,"{'text': ['10th century', 'the first half of t..."
...,...,...,...
11868,Force,What is the seldom used force unit equal to on...,"{'text': ['sthène', 'sthène', 'sthène', 'sthèn..."
11869,Force,What does not have a metric counterpart?,"{'text': [], 'answer_start': []}"
11870,Force,What is the force exerted by standard gravity ...,"{'text': [], 'answer_start': []}"
11871,Force,What force leads to a commonly used unit of mass?,"{'text': [], 'answer_start': []}"


In [13]:
df['text'] = df['answers'].apply(lambda x: x['text'])
df = df.drop(df[df['text'].apply(lambda x: len(x) == 0)].index)
df = df.drop('answers', axis=1)
df.rename(columns = {'text':'answers'}, inplace = True)
df

Unnamed: 0,title,question,answers
0,Normans,In what country is Normandy located?,"[France, France, France, France]"
1,Normans,When were the Normans in Normandy?,"[10th and 11th centuries, in the 10th and 11th..."
2,Normans,From which countries did the Norse originate?,"[Denmark, Iceland and Norway, Denmark, Iceland..."
3,Normans,Who was the Norse leader?,"[Rollo, Rollo, Rollo, Rollo]"
4,Normans,What century did the Normans first gain their ...,"[10th century, the first half of the 10th cent..."
...,...,...,...
11864,Force,What is the metric term less used than the New...,"[kilogram-force, pound-force, kilogram-force (..."
11865,Force,What is the kilogram-force sometimes reffered ...,"[kilopond, kilopond, kilopond, kilopond, kilop..."
11866,Force,What is a very seldom used unit of mass in the...,"[slug, metric slug, metric slug, metric slug, ..."
11867,Force,What seldom used term of a unit of force equal...,"[kip, kip, kip, kip, kip]"


In [11]:
df2 = df.sample(300)

In [12]:
answers = []
for index, row in df2.iterrows():
    url = "https://en.wikipedia.org/wiki/" + row["title"]
    #print(url)
    question = row["question"]
    #print(question)
    answer = get_final_answer(question=question,url=url)
    answers.append(answer)

KeyboardInterrupt: 

In [None]:
correct_answers = pd.DataFrame(df2["answers"]).values

In [None]:
def calculate_metrics(true_answers, predicted_answers):
    """
    Calculates the F1 score given the true answers and predicted answers.
    Both true_answers and predicted_answers are lists of strings.
    """
    # Convert predicted_answers list to a set of tuples
    predicted_answers_set = set(predicted_answers)

    # Calculate precision
    if len(predicted_answers_set) == 0:
        precision = 0
    else:
        true_positives = 0
        for ans_p in predicted_answers_set:
            for ans_t in true_answers:
                if all([a in ans_p for a in ans_t.split()]):
                    true_positives += 1
                    break  # go to next predicted answer if a true answer is found
        precision = true_positives / len(predicted_answers_set)

    # Calculate recall
    true_positives = 0
    for ans_t in true_answers:
        for ans_p in predicted_answers_set:
            if all([a in ans_t for a in ans_p]):
                true_positives += 1
                break  # go to next true answer if a predicted answer is found
    recall = true_positives / len(true_answers)

    # Calculate F1 score
    if precision == 0 and recall == 0:
        f1_score = 0
    else:
        f1_score = 2 * precision * recall / (precision + recall)

    return precision, recall, f1_score

In [None]:
pos = 0
neg = 0
for i in range(0,len(answers)):
    answer = answers[i]
    correct_answer = correct_answers[i]
    #print(answer)
    #print(correct_answer)
    for inner_list in correct_answer:
        for element in inner_list:
            if answer in element or element in answer:
                pos+=1
                break
            else:
                continue
            break
        else:
            neg+=1
acc_rate = 100*(pos/(pos+neg))
print("accuracy rate = ",acc_rate,"%")

accuracy rate =  60.0 %


In [None]:
true_positives = 0
false_positives = 0
false_negatives = 0

for i in range(0, len(answers)):
    answer = answers[i]
    correct_answer = correct_answers[i]
    is_match = False
    for inner_list in correct_answer:
        for element in inner_list:
            if answer in element or element in answer:
                is_match = True
                break
        if is_match:
            break
    if is_match:
        true_positives += 1
    else:
        false_negatives += 1

    is_match = False
    for inner_list in correct_answer:
        for element in inner_list:
            #print(element)
            #print(str(correct_answer))
            if element in correct_answer or str(correct_answer) in element:
                is_match = True
                print(element)
                print(str(correct_answer))
                break
        if is_match:
            break
    if not is_match:
        false_positives += 1

precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

print(true_positives)
print(false_positives)
print(false_negatives)

print("precision = ", precision)
print("recall = ", recall)
print("F1 score = ", f1_score)

18
30
12
precision =  0.375
recall =  0.6
F1 score =  0.4615384615384615


In [14]:
triviaqa = pd.read_json("training.json")

In [15]:
triviaqa = triviaqa.drop("version",axis=1)
#triviaqa

In [16]:
triviaqa['title'] = triviaqa['data'].apply(lambda x: x['title'])
triviaqa = triviaqa.drop(df[df['title'].apply(lambda x: len(x) == 0)].index)
triviaqa['paragraphs'] = triviaqa['data'].apply(lambda x: x['paragraphs'][0])
triviaqa['qas'] = triviaqa['paragraphs'].apply(lambda x: x['qas'][0])
triviaqa['question'] = triviaqa['qas'].apply(lambda x: x['question'])
triviaqa['answers'] = triviaqa['qas'].apply(lambda x: x['answers'])
triviaqa = triviaqa.drop(triviaqa[triviaqa['answers'].apply(lambda x: len(x) == 0)].index)
triviaqa['answers'] = triviaqa['qas'].apply(lambda x: x['answers'][0])
triviaqa['answer'] = triviaqa['answers'].apply(lambda x: x['text'])
triviaqa = triviaqa.drop(['data','paragraphs','qas','answers'], axis=1)
triviaqa

Unnamed: 0,title,question,answer
0,Beyoncé,When did Beyonce start becoming popular?,in the late 1990s
1,Frédéric_Chopin,What was Frédéric's nationalities?,Polish and French
2,Sino-Tibetan_relations_during_the_Ming_dynasty,Who were Wang Jiawei and Nyima Gyaincain?,Mainland Chinese scholars
3,IPod,Which company produces the iPod?,Apple
4,The_Legend_of_Zelda:_Twilight_Princess,What category of game is Legend of Zelda: Twil...,action-adventure
...,...,...,...
435,"Punjab,_Pakistan",What does Punjab mean?,five waters
436,"Richmond,_Virginia",How many miles east of Richmond is Williamsburg?,44
437,Infection,"Of the huge amount of microorganisms, how many...",relatively few
438,Hunting,What is the practice of killing or trapping an...,Hunting


In [19]:
df3 = triviaqa.sample(10)
#df3

In [None]:
'''
df4 = triviaqa.sample(250)
incorrect_questions = []
incorrect_topics = []
for index, row in df4.iterrows():
    url = "https://en.wikipedia.org/wiki/" + row["title"]
    question = row["question"]
'''

In [21]:
answers2 = []
questions2 = []
correct_answers2 = []
for index, row in df3.iterrows():
    #print(row["title"])
    #print(row["question"])
    url = "https://en.wikipedia.org/wiki/" + row["title"]
    #print(url)
    question = row["question"]
    #print(question)
    answer = get_final_answer(question=question,url=url)
    correct_answer = row['answer']
    correct_answers2.append(correct_answer)
    questions2.append(question)
    answers2.append(answer)

In [22]:
print(questions2)
print(answers2)
print(correct_answers2)

['What does child labour deprive children of?', "How much a  Myanmar's boundaries are encompassed by beachfront lands?", 'What does Florida stand for ', 'What is the oldest city in South Carolina?', 'The Times is based in what major British city?', 'What is commonly contrasted with empiricism?', 'What is the Cherokee name for Oklahoma?', 'What is the abbreviation by which the University of Kansas is known?', 'What is the largest private foundation in the world?', 'NARA is responsible for what collection of archives?']
[' pay', ' one quarter of its total perimeter.', '\nFlorida has teams in all five American major league sports.', ' Charleston', ' London', ' innate ideas or traditions.', ' Okalahoma,', ' (KU)', ' Bill & Melinda Gates Foundation', ' historical records']
['deprives children of their childhood', '(1,200 miles) forms an uninterrupted coastline', 'Florida i/ˈflɒrɪdə/ (Spanish for "flowery land")', 'Charleston', 'London', 'rationalism', 'Asgaya gigageyi', 'KU', 'Bill & Melind

In [None]:
url = "https://en.wikipedia.org/wiki/" + "Central_Intelligence_Agency"
#print(url)
question = "What is the CIA's main focus?"
#print(question)
answer = get_final_answer(question=question,url=url)
print(answer)

 HUMINT


In [27]:
incorrect_questions = []
i = 0

for index, row in df3.iterrows():
    answer = row['answer']
    correct_answer = correct_answers2[i][0]

    column_value = row['question']

    if answer not in correct_answer or str(correct_answer) not in answer:
        incorrect_questions.append(column_value)
    
    i+=1

1. What does child labour deprive children of?
2. How much a  Myanmar's boundaries are encompassed by beachfront lands?
3. What does Florida stand for 
4. What is the oldest city in South Carolina?
5. The Times is based in what major British city?
6. What is commonly contrasted with empiricism?
7. What is the Cherokee name for Oklahoma?
8. What is the abbreviation by which the University of Kansas is known?
9. What is the largest private foundation in the world?
10. NARA is responsible for what collection of archives?


In [30]:
# Combine lists into a list of tuples for tabulate
table_data = list(zip(questions2, answers2, correct_answers2))

# Define table headers
headers = ['Questions', 'Answers', 'Correct Answers']

# Print the table
print(tabulate(table_data, headers=headers, tablefmt='grid'))

+-----------------------------------------------------------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------+
| Questions                                                             | Answers                                                     | Correct Answers                                                                                                               |
| What does child labour deprive children of?                           | pay                                                         | deprives children of their childhood                                                                                          |
+-----------------------------------------------------------------------+-------------------------------------------------------------+-------------------------------------------------------------------------

In [None]:
pos = 0
neg = 0
for i in range(0,df3.shape[0]):
    answer = answers2[i]
    correct_answer = correct_answers2[i][0]
    #print(answer)
    #print(correct_answer)
    if answer in correct_answer or str(correct_answer) in answer:
        pos+=1
    else:
        neg+=1
acc_rate = 100*(pos/(pos+neg))
print("accuracy rate = ",acc_rate,"%")

accuracy rate =  36.666666666666664 %


In [None]:
TP = 0
FP = 0
FN = 0

for i in range(0,df3.shape[0]):
    answer = answers2[i]
    correct_answer = correct_answers2[i]
    if answer in correct_answer or str(correct_answer) in answer:
        if (answer == correct_answer).any():
            TP += 1
        else:
            FP += 1
    else:
        if answer != "":
            FN += 1

precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1_score = 2 * precision * recall / (precision + recall)

print(TP)
print(FP)
print(FN)
print("precision = ",precision)
print("recall = ",recall)
print("f1 score = ",f1_score)

ZeroDivisionError: division by zero

In [None]:
true_positives = 0
false_positives = 0
false_negatives = 0

for i in range(0, len(answers2)):
    answer = answers2[i]
    correct_answer = correct_answers2[i]
    is_match = False
    for inner_list in correct_answer:
        for element in inner_list:
            if answer in element or element in answer:
                is_match = True
                break
        if is_match:
            break
    if is_match:
        true_positives += 1
    else:
        false_negatives += 1

    is_match = False
    for inner_list in correct_answer:
        for element in inner_list:
            #print(element)
            #print(str(correct_answer))
            if element in correct_answer or str(correct_answer) in element:
                is_match = True
                print(element)
                print(str(correct_answer))
                break
        if is_match:
            break
    if not is_match:
        false_positives += 1
true_negatives = 10 - true_positives - false_positives - false_negatives

precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

print("true positive ",true_positives)
print("false positive ",false_positives)
print("false negative ",false_negatives)
print("true negative ",true_negatives)

print("precision = ", precision)
print("recall = ", recall)
print("F1 score = ", f1_score)

30
30
0
precision =  0.5
recall =  1.0
F1 score =  0.6666666666666666


In [3]:
from transformers import squad_convert_examples_to_features
from transformers import SquadV2Processor, squad_convert_examples_to_features
from transformers.data.processors.squad import SquadResult, SquadV1Processor, SquadV2Processor
from transformers import BertTokenizer, BertForQuestionAnswering
import torch
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

# Load SQuAD 2.0 dataset
processor = SquadV2Processor()
examples = load_dataset("squad_v2",split="validation")

# Load pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForQuestionAnswering.from_pretrained(model_name)

# Tokenize and convert examples to features
features, dataset = squad_convert_examples_to_features(
    examples=examples,
    tokenizer=tokenizer,
    max_seq_length=384,
    doc_stride=128,
    max_query_length=64,
    is_training=False,
    return_dataset="pt",
    threads=1,
)

# Create DataLoader for the dataset
data_loader = DataLoader(dataset, batch_size=8)

# Function to check if a question is unanswerable
def is_unanswerable(scores, start_threshold=0.5, end_threshold=0.5):
    start_prob, start_index = torch.max(scores[0], dim=1)
    end_prob, end_index = torch.max(scores[1], dim=1)
    return start_prob.item() < start_threshold and end_prob.item() < end_threshold

# Extract unanswerable questions
unanswerable_questions = []

# Set model to evaluation mode
model.eval()

with torch.no_grad():
    for batch in tqdm(data_loader, desc="Extracting Unanswerable Questions"):
        inputs = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)

        # Get model outputs
        outputs = model(inputs, attention_mask=attention_mask)
        start_scores, end_scores = outputs.start_logits, outputs.end_logits

        # Check if the question is unanswerable
        for i in range(len(batch['example_id'])):
            if is_unanswerable((start_scores[i], end_scores[i])):
                unanswerable_questions.append(examples[batch['example_id'][i]].question_text)

# Print some extracted unanswerable questions
for i, question in enumerate(unanswerable_questions[:10]):
    print(f"{i+1}. {question}")


Downloading readme: 100%|██████████| 8.18k/8.18k [00:00<?, ?B/s]


Downloading and preparing dataset None/squad_v2 to file://C:/Users/zfkha/.cache/huggingface/datasets/parquet/squad_v2-d8667bb0de9aba6c/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...


Downloading data: 100%|██████████| 16.4M/16.4M [00:01<00:00, 14.0MB/s]
Downloading data: 100%|██████████| 1.35M/1.35M [00:00<00:00, 9.35MB/s]
Downloading data files: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]
Extracting data files: 100%|██████████| 2/2 [00:00<?, ?it/s]
                                                                                          

Dataset parquet downloaded and prepared to file://C:/Users/zfkha/.cache/huggingface/datasets/parquet/squad_v2-d8667bb0de9aba6c/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.


NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported.