In [74]:
import re
import ollama
from src.data_loading import load_data
from src.parsing import parse_options
from src.rationale_generation import generate_rationale_and_answer
from src.utils import is_rationale_correct
from src.rationalization import rationalize
from src.prompt_generation import create_prompt_examples, create_prompt_set, clean_options, get_correct_answer_text

In [113]:
NUM_PROMPT_EXAMPLES = 5 # number of prompt examples
NUM_EXAMPLES_TO_PROCESS = 7 # number of examples to process

### Load Dataset

In [114]:
ds_train = load_data()

# Step 1: Select the desired range from ds_train
dataset_D = ds_train.select(range(NUM_PROMPT_EXAMPLES, len(ds_train)))

# Step 2: Shuffle the selected dataset
dataset_D_shuffled = dataset_D.shuffle()  # Use any seed you prefer

# Step 3: Select a subset of examples to process
dataset_D_subset = dataset_D_shuffled.select(range(NUM_EXAMPLES_TO_PROCESS))

print(dataset_D_subset)

Dataset({
    features: ['question', 'options', 'rationale', 'correct'],
    num_rows: 7
})


### Create prompt examples

In [115]:
prompt_examples = create_prompt_examples(ds_train, NUM_PROMPT_EXAMPLES)

prompt_examples

[{'question': "Two friends plan to walk along a 43-km trail, starting at opposite ends of the trail at the same time. If Friend P's rate is 15% faster than Friend Q's, how many kilometers will Friend P have walked when they pass each other?",
  'rationale': 'If Q complete x kilometers, then P completes 1.15x kilometers.\nx + 1.15x = 43\n2.15x=43\nx = 43/2.15 = 20\nThen P will have have walked 1.15*20=23 km.\nThe answer is E.',
  'answer': 'E'},
 {'question': 'In the coordinate plane, points (x, 1) and (5, y) are on line k. If line k passes through the origin and has slope 1/5, then what are the values of x and y respectively?',
  'rationale': 'Line k passes through the origin and has slope 1/5 means that its equation is y=1/5*x.\nThus: (x, 1)=(5, 1) and (5, y) = (5,1) -->x=5 and y=1\nAnswer: C',
  'answer': 'C'},
 {'question': 'For all numbers p and q, the operation @ is defined by p@q = p^2 - pq. If xy ≠ 0, then which of the following can be equal to zero?\nI. x@y\nII. (xy)@y\nIII. x@

### Create Prompt Sets

In [116]:
prompt_set = create_prompt_set(ds_train, NUM_PROMPT_EXAMPLES)
print(prompt_set)

Question: Two friends plan to walk along a 43-km trail, starting at opposite ends of the trail at the same time. If Friend P's rate is 15% faster than Friend Q's, how many kilometers will Friend P have walked when they pass each other?
Answer Explanation: If Q complete x kilometers, then P completes 1.15x kilometers.\nx + 1.15x = 43\n2.15x=43\nx = 43/2.15 = 20\nThen P will have have walked 1.15*20=23 km.\nThe answer is E.
Answer: 23
###
Question: In the coordinate plane, points (x, 1) and (5, y) are on line k. If line k passes through the origin and has slope 1/5, then what are the values of x and y respectively?
Answer Explanation: Line k passes through the origin and has slope 1/5 means that its equation is y=1/5*x.\nThus: (x, 1)=(5, 1) and (5, y) = (5,1) -->x=5 and y=1\nAnswer: C
Answer: 5 and 1
###
Question: For all numbers p and q, the operation @ is defined by p@q = p^2 - pq. If xy ≠ 0, then which of the following can be equal to zero?
I. x@y
II. (xy)@y
III. x@(x + y)
Answer Expl

### Initialize lists to hold correct and incorrect pairs

In [134]:
correct_pairs = []
incorrect_pairs = []
unanswered_paris = []

### Iterate over each example in the subset

In [135]:
def extract_answer_text(rationale):
    """
    Extracts the answer text from the rationale using regex.
    
    Args:
        rationale (str): Generated rationale string.
    
    Returns:
        str or None: Extracted answer text if found, else None.
    """
    # Attempt to extract the answer text after 'Answer:'
    match = re.search(r'Answer:\s*(.+)', rationale, re.IGNORECASE)
    if match:
        return match.group(1)
    return None

In [136]:
for idx, example in enumerate(dataset_D_subset):
    question = example['question']
    # Map 'correct' label to answer text
    raw_options = example['options']
    cleaned_options = clean_options(raw_options)
    correct_label = example['correct'].strip().upper()
    correct_answer_text = get_correct_answer_text(cleaned_options, correct_label)

    print(f"correct_answer_text: {correct_answer_text}")
    print(f"clean options: {cleaned_options}")
    
    if correct_answer_text is None:
        print(f"Skipping example {idx} due to missing correct answer.")
        continue  # Skip this example
    
    # Generate rationale and answer
    generated_rationale = generate_rationale_and_answer(question, prompt_set)
    
    # Extract the answer text from the rationale
    extracted_answer = extract_answer_text(generated_rationale)
    print(f"extracted answer: {extracted_answer}")

    response = ollama.chat(model="llama3.1:8b", messages=[
            {
                'role': 'user',
                'content':
                f"""
                    Your task is to compare two numerical answers and determine if they are the same answer, ignoring differences in units or formatting.\n\n
                    Comparison Rules:\n\n
                    - If the answers are the same, for example, First Answer: '90' and Second Answer: '90 miles' or ( km, %, sec, ml, etc) this is a match and return 'correct' in your response.\n
                    - If the answers are different, consider them NOT a match and return 'incorrect' in your response.\n\n
                    Ignore differences in formatting, such as trailing zeros.\n\n

                    Compare the Following Answers:\n
                    First answer: {extracted_answer}\n
                    Second answer: {correct_answer_text}\n\n
                    Respond with:\n
                    "correct" if the two answers are the same\n
                    "incorrect" if the two answers are not the same\n\n
                    Please respond with only one of the above options, without any explanations.
                """
            },
     ])
    
    decision = response['message']['content'].strip()
    print(decision)
    
    # Categorize based on extracted answer
    if decision.lower() == "correct":
        correct_pairs.append({
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer
        })
        print('Correct:', {
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer
        })
    elif decision.lower() == "incorrect":
        incorrect_pairs.append({
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer if extracted_answer else "No Answer Extracted"
        })
        print("Incorrect:",  {
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer if extracted_answer else "No Answer Extracted"
        })
    else:
        unanswered_paris.append({
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer if extracted_answer else "No Answer Extracted"
        })
        print("Unanswered:",  {
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer if extracted_answer else "No Answer Extracted"
        })
        

    # Print progress every example
    print(f"Processed {idx + 1} questions.\n")

correct_answer_text: 125
clean options: ['A) 75', 'B) 100', 'C) 125', 'D) 175', 'E) 225']
extracted answer: 125
correct
Correct: {'question': 'There are 750 male and female participants in a meeting. Half the female participants and one-quarterof the male participants are Democrats. One-third of all the participants are Democrats. How many of the Democrats are female?', 'rationale': "Let's break down the problem step by step:\n\n1. We know that half the female participants and one-quarter of the male participants are Democrats.\n\nLet F be the number of female participants and M be the number of male participants. Since there are 750 total participants, we can write an equation:\n\nF + M = 750\n\n2. According to the problem, half the female participants are Democrats, so the number of Democratic female participants is F/2.\n\nSimilarly, one-quarter of the male participants are Democrats, so the number of Democratic male participants is M/4.\n\n3. We are also told that one-third of all 

In [137]:
correct_pairs

[{'question': 'There are 750 male and female participants in a meeting. Half the female participants and one-quarterof the male participants are Democrats. One-third of all the participants are Democrats. How many of the Democrats are female?',
  'rationale': "Let's break down the problem step by step:\n\n1. We know that half the female participants and one-quarter of the male participants are Democrats.\n\nLet F be the number of female participants and M be the number of male participants. Since there are 750 total participants, we can write an equation:\n\nF + M = 750\n\n2. According to the problem, half the female participants are Democrats, so the number of Democratic female participants is F/2.\n\nSimilarly, one-quarter of the male participants are Democrats, so the number of Democratic male participants is M/4.\n\n3. We are also told that one-third of all the participants are Democrats. This means that the total number of Democratic participants (both male and female) is 1/3 * 75

In [138]:
incorrect_pairs

[{'question': 'If x is an integer and y = 3x + 4, which of the following CANNOT be a divisor of y?',
  'rationale': "Let's analyze the problem step by step.\n\nWe are given that x is an integer and y = 3x + 4. We need to find a value that cannot be a divisor of y.\n\nStep 1: Let's try to understand how y is related to x. The expression y = 3x + 4 shows that y is obtained by adding 4 to the product of 3 and an integer (x).\n\nStep 2: Since x is an integer, the value of y will be determined by the sum of a multiple of 3 and 4.\n\nStep 3: We need to find a divisor that cannot divide y. To do this, let's analyze the possible divisors of y.\n\nSuppose y has a divisor d. Then, there exists an integer k such that y = kd.\n\nSubstituting y = 3x + 4 into this equation, we get:\n\n3x + 4 = kd\n\nStep 4: Rearranging the equation to isolate x, we have:\n\n3x = kd - 4\n\nSince 3 and d are relatively prime (d is a divisor of y), there exists an integer p such that:\n\n3p ≡ -4 (mod d)\n\nThis implies

In [139]:
unanswered_paris

[]

In [140]:
total = len(correct_pairs) + len(incorrect_pairs) + len(unanswered_paris)
accuracy = len(correct_pairs) / total * 100
print(f"Total questions processed: {total}")
print(f"Correct answers: {len(correct_pairs)}")
print(f"Incorrect answers: {len(incorrect_pairs)}")
print(f"Unanswered answers: {len(unanswered_paris)}")
print(f"Accuracy: {accuracy:.2f}%")

Total questions processed: 7
Correct answers: 6
Incorrect answers: 1
Unanswered answers: 0
Accuracy: 85.71%


In [26]:
new_correct_pairs = []

# Process the incorrect answers
for pair in incorrect_pairs:
    question = pair['question']
    options = pair['options']
    correct_answer = pair['correct_answer']

    # Generate the rationale with the correct answer as a hint
    generated_rationale = rationalize(question, options, correct_answer, prompt_set)

    # Add the rationalized example to correct_pairs
    new_correct_pairs.append({
        'question': question,
        'options': options,
        'rationale': generated_rationale,
        'answer': correct_answer
    })

    print({
        'question': question,
        'options': options,
        'rationale': generated_rationale,
        'answer': correct_answer
    })

{'question': 'Rs. 825 becomes Rs. 956 in 3 years at a certain rate of simple interest.If the rate of interest is increased by 4% ,What amount will Rs. 825 become in 3 years ?', 'options': {'A': 'Rs. 1020.80', 'B': 'Rs. 1025', 'C': 'Rs. 1055', 'D': 'Data inadequate', 'E': 'None of these'}, 'rationale': "Let's break down the problem step by step.\n\nGiven:\n\n1. Principal (P) = Rs. 825\n2. Amount after 3 years (A) = Rs. 956\n\nWe need to find the rate of interest (R) at which P becomes A in 3 years. We'll use the formula for simple interest:\n\nSimple Interest (SI) = (P × R × T)/100\n\nwhere T is the time period, which is 3 years here.\n\nFirst, let's calculate the SI for the given scenario:\n\nA = P + SI\n956 = 825 + (SI)\nSI = 131\n\nNow, we can use the formula to find the rate of interest (R):\n\nSI = (P × R × T)/100\n131 = (825 × R × 3)/100\n131 = 24.75R\nR = 131/24.75 ≈ 5.29%\n\nThis is the original rate of interest.\n\nNow, let's consider what happens when the rate of interest is i

### Converting our corrected pairs into a structure the Llama3.1 model will understand for fine tuning.

In [141]:
from datasets import Dataset, DatasetDict


def convert_correct_pairs_to_conversations(correct_pairs):
    """
    Converts correct_pairs into the desired conversations format.

    Parameters:
        correct_pairs (list): List of dictionaries containing question, options, correct_answer, and rationale.

    Returns:
        list: A list of conversations formatted for fine-tuning.
    """
    conversations = []
    
    for pair in correct_pairs:
        question = pair['question']
        correct_answer = pair['answer']
        rationale = pair['rationale']
        
        # Construct options text
        # options_text = '\n'.join([f"{key}: {value}" for key, value in options.items()])
        
        # Human prompt
        human_value = (
            f"Question: {question}\n"
        )
        
        # GPT response
        gpt_value = (
            f"{correct_answer}\nRationale: {rationale}"
        )
        
        # Append the conversation as a separate list
        conversation = [
            { "from": "human", "value": human_value },
            { "from": "gpt", "value": gpt_value }
        ]
        
        conversations.append(conversation)
    
    return conversations

# conversations = convert_correct_pairs_to_conversations(correct_pairs)

# # Create a Hugging Face Dataset
# dataset = Dataset.from_dict({"conversations": conversations})

# # Create a DatasetDict with only the 'train' split
# dataset_dict = DatasetDict({
#     "train": dataset
# })

# # Optionally, shuffle the dataset
# dataset_dict["train"] = dataset_dict["train"].shuffle(seed=42)

# # Verify the structure
# print(dataset_dict)
# print(dataset_dict["train"][0])  # Print the first conversation

### Save dataset to JSONL format

In [47]:
import json

def save_dataset_to_jsonl(dataset_dict, file_path='finetuning_data.jsonl'):
    """
    Saves the conversations in dataset_dict to a JSONL file.
    
    Parameters:
        dataset_dict (DatasetDict): The dataset to save.
        file_path (str): The path to the output JSONL file.
        
    Returns:
        None
    """
    with open(file_path, 'w', encoding='utf-8') as f:
        for conversation in dataset_dict["train"]["conversations"]:
            json_line = json.dumps(conversation)
            f.write(json_line + '\n')
    print(f"Dataset successfully saved to {file_path}")

# Usage
save_dataset_to_jsonl(dataset_dict, 'finetuning_data.jsonl')

Dataset successfully saved to finetuning_data.jsonl


### Load dataset from JSONL (for appending new data)

In [None]:
def load_dataset_from_jsonl(file_path='finetuning_data.jsonl', validation_split=0.1):
    """
    Loads conversations from a JSONL file and splits them into train and validation sets.

    Parameters:
        file_path (str): Path to the JSONL file.
        validation_split (float): Proportion of data to use for validation.

    Returns:
        DatasetDict: A DatasetDict with 'train' and 'validation' splits.
    """
    dataset = Dataset.from_json(file_path)
    dataset = dataset.train_test_split(test_size=validation_split, seed=42)
    dataset_dict = DatasetDict({
        "train": dataset["train"],
        "validation": dataset["test"]
    })
    print(f"Loaded dataset with {len(dataset_dict['train'])} training and {len(dataset_dict['validation'])} validation conversations.")
    return dataset_dict

### Appending New Data

In [142]:
import json

new_conversations = convert_correct_pairs_to_conversations(correct_pairs)

def append_conversations_to_jsonl(conversations, file_path='finetuning_data.jsonl'):
    """
    Appends a list of conversations to an existing JSONL file.

    Parameters:
        conversations (list): List of conversations to append.
        file_path (str): Path to the existing JSONL file.

    Returns:
        None
    """
    with open(file_path, 'a', encoding='utf-8') as f:
        for convo in conversations:
            json_line = json.dumps(convo)
            f.write(json_line + '\n')
    print(f"Successfully appended {len(conversations)} conversations to {file_path}")
    
# Usage
append_conversations_to_jsonl(new_conversations, 'finetuning_data.jsonl')

Successfully appended 6 conversations to finetuning_data.jsonl
