In [67]:
import re
import json


# Function to extract question-answer pairs from text
def extract_qa_pairs(text):
    # Define regular expression patterns for questions, options, and answer
    qa_pattern = r"(\d+\..*?)(?=(?:\d+\.|$))"
    question_pattern = r"(\d+\..*?)(?=(?:\n[A-D]\.|Answer:)|\Z)"
    option_pattern = r"([A-D]\..*?)(?=(?:\n[A-D]\.|Answer:)|\Z)"
    answer_pattern = r"Answer: ([A-D])"

    # Extract question-answer pairs using regular expressions
    qa_pairs = re.findall(qa_pattern, text, re.DOTALL)

    # Parse each question-answer pair
    parsed_pairs = []
    for qa_pair in qa_pairs:
        question_match = re.search(question_pattern, qa_pair, re.DOTALL)
        question = question_match.group(1).strip() if question_match else ""
        options = re.findall(option_pattern, qa_pair, re.DOTALL)
        answer_match = re.search(answer_pattern, qa_pair)
        answer = answer_match.group(1) if answer_match else ""

        # Construct question-answer pair
        qa_data = {
            "question": question,
            "option1": options[0].strip() if len(options) > 0 else "",
            "option2": options[1].strip() if len(options) > 1 else "",
            "option3": options[2].strip() if len(options) > 2 else "",
            "option4": options[3].strip() if len(options) > 3 else "",
            "correct_answer_text": "",
            "correct_answer_option": answer,
        }
        parsed_pairs.append(qa_data)

    return parsed_pairs


# Sample text containing questions and answers
text = """
61. 101.4 kPa (14.7 psi) is equivalent to __________ atm.
A. 0.5
B. 1
C. 386
D. 760
Answer: B 
"""

# Extract question-answer pairs
qa_pairs = extract_qa_pairs(text)

# Convert to JSON format
json_data = json.dumps(qa_pairs, indent=4)

# Print JSON data
print(json_data)

[
    {
        "question": "61.",
        "option1": "",
        "option2": "",
        "option3": "",
        "option4": "",
        "correct_answer_text": "",
        "correct_answer_option": ""
    },
    {
        "question": "101.4 kPa (",
        "option1": "",
        "option2": "",
        "option3": "",
        "option4": "",
        "correct_answer_text": "",
        "correct_answer_option": ""
    },
    {
        "question": "14.7 psi) is equivalent to __________ atm.",
        "option1": "A.",
        "option2": "",
        "option3": "",
        "option4": "",
        "correct_answer_text": "",
        "correct_answer_option": ""
    },
    {
        "question": "0.5",
        "option1": "B. 1",
        "option2": "C. 386",
        "option3": "D. 760",
        "option4": "",
        "correct_answer_text": "",
        "correct_answer_option": "B"
    }
]


In [1]:
import json


# Function to extract question-answer pairs from text file
def extract_qa_pairs_from_file(file_path):
    with open(file_path, "r") as file:
        lines = file.readlines()

    # Initialize variables
    qa_pairs = []
    question = ""
    options = []

    # Iterate through each line in the file
    for line in lines:
        line = line.strip()
        if line == "":
            # End of options, construct question-answer pair
            qa_data = {
                "question": question,
                "option1": options[0],
                "option2": options[1],
                "option3": options[2],
                "option4": options[3],
            }
            qa_pairs.append(qa_data)
            question = ""  # Reset question
            options = []  # Reset options for the next question
        elif line.startswith(("A.", "B.", "C.", "D.")):
            # Add options without option names
            options.append(line[3:])
        else:
            # Add to the current question
            question += " " + line

    return qa_pairs


# Path to the text file containing questions and answers
file_path = "test.txt"

# Extract question-answer pairs from the file
qa_pairs = extract_qa_pairs_from_file(file_path)

# Convert to JSON format
json_data = json.dumps(qa_pairs, indent=4)

# Print JSON data
print(json_data)

[
    {
        "question": " 61. 101.4 kPa (14.7 psi) is equivalent to __________ atm.",
        "option1": "0.5",
        "option2": "1",
        "option3": "386",
        "option4": "760"
    },
    {
        "question": " 62. Radiation that breaks chemical bonds is called __________ radiation.",
        "option1": "non-polarizing",
        "option2": "polarizing",
        "option3": "non-ionizing",
        "option4": "ionizing"
    },
    {
        "question": " 63. When a chemical has been released and physically comes in contact with people, the environment, and everything around it, either intentionally or unintentionally, the residue of that chemical is called:",
        "option1": "contamination.",
        "option2": "corrosive.",
        "option3": "radiation.",
        "option4": "polymerization."
    }
]
