In [30]:
import re
import ollama
from src.data_loading import load_data
from src.parsing import parse_options
from src.rationale_generation import generate_rationale_and_answer
from src.utils import is_rationale_correct
from src.rationalization import rationalize
from src.prompt_generation import create_prompt_examples, create_prompt_set, clean_options, get_correct_answer_text

In [31]:
NUM_PROMPT_EXAMPLES = 10 # number of prompt examples
NUM_EXAMPLES_TO_PROCESS = 5 # number of examples to process

### Load Dataset

In [32]:
ds_train = load_data()

# Step 1: Select the desired range from ds_train
dataset_D = ds_train.select(range(NUM_PROMPT_EXAMPLES, len(ds_train)))

# Step 2: Shuffle the selected dataset
dataset_D_shuffled = dataset_D.shuffle()  # Use any seed you prefer

# Step 3: Select a subset of examples to process
dataset_D_subset = dataset_D_shuffled.select(range(NUM_EXAMPLES_TO_PROCESS))

print(dataset_D_subset)

Dataset({
    features: ['question', 'options', 'rationale', 'correct'],
    num_rows: 5
})


### Create prompt examples

In [33]:
prompt_examples = create_prompt_examples(ds_train, NUM_PROMPT_EXAMPLES)

prompt_examples

[{'question': "Two friends plan to walk along a 43-km trail, starting at opposite ends of the trail at the same time. If Friend P's rate is 15% faster than Friend Q's, how many kilometers will Friend P have walked when they pass each other?",
  'rationale': 'If Q complete x kilometers, then P completes 1.15x kilometers.\nx + 1.15x = 43\n2.15x=43\nx = 43/2.15 = 20\nThen P will have have walked 1.15*20=23 km.\nThe answer is E.',
  'answer': 'E'},
 {'question': 'In the coordinate plane, points (x, 1) and (5, y) are on line k. If line k passes through the origin and has slope 1/5, then what are the values of x and y respectively?',
  'rationale': 'Line k passes through the origin and has slope 1/5 means that its equation is y=1/5*x.\nThus: (x, 1)=(5, 1) and (5, y) = (5,1) -->x=5 and y=1\nAnswer: C',
  'answer': 'C'},
 {'question': 'For all numbers p and q, the operation @ is defined by p@q = p^2 - pq. If xy ≠ 0, then which of the following can be equal to zero?\nI. x@y\nII. (xy)@y\nIII. x@

### Create Prompt Sets

In [34]:
prompt_set = create_prompt_set(ds_train, NUM_PROMPT_EXAMPLES)
print(prompt_set)

Question: Two friends plan to walk along a 43-km trail, starting at opposite ends of the trail at the same time. If Friend P's rate is 15% faster than Friend Q's, how many kilometers will Friend P have walked when they pass each other?
Answer Explanation: If Q complete x kilometers, then P completes 1.15x kilometers.\nx + 1.15x = 43\n2.15x=43\nx = 43/2.15 = 20\nThen P will have have walked 1.15*20=23 km.\nThe answer is E.
Answer: 23
###
Question: In the coordinate plane, points (x, 1) and (5, y) are on line k. If line k passes through the origin and has slope 1/5, then what are the values of x and y respectively?
Answer Explanation: Line k passes through the origin and has slope 1/5 means that its equation is y=1/5*x.\nThus: (x, 1)=(5, 1) and (5, y) = (5,1) -->x=5 and y=1\nAnswer: C
Answer: 5 and 1
###
Question: For all numbers p and q, the operation @ is defined by p@q = p^2 - pq. If xy ≠ 0, then which of the following can be equal to zero?
I. x@y
II. (xy)@y
III. x@(x + y)
Answer Expl

### Initialize lists to hold correct and incorrect pairs

In [48]:
correct_pairs = []
incorrect_pairs = []
unanswered_paris = []

### Iterate over each example in the subset

In [49]:
def extract_answer_text(rationale):
    """
    Extracts the answer text from the rationale using regex.
    
    Args:
        rationale (str): Generated rationale string.
    
    Returns:
        str or None: Extracted answer text if found, else None.
    """
    # Attempt to extract the answer text after 'Answer:'
    match = re.search(r'Answer:\s*(.+)', rationale, re.IGNORECASE)
    if match:
        return match.group(1)
    return None

In [50]:
for idx, example in enumerate(dataset_D_subset):
    question = example['question']
    # Map 'correct' label to answer text
    raw_options = example['options']
    cleaned_options = clean_options(raw_options)
    correct_label = example['correct'].strip().upper()
    correct_answer_text = get_correct_answer_text(cleaned_options, correct_label)

    print(f"correct_answer_text: {correct_answer_text}")
    print(f"clean options: {cleaned_options}")
    
    if correct_answer_text is None:
        print(f"Skipping example {idx} due to missing correct answer.")
        continue  # Skip this example
    
    # Generate rationale and answer
    generated_rationale = generate_rationale_and_answer(question, prompt_set)
    
    # Extract the answer text from the rationale
    extracted_answer = extract_answer_text(generated_rationale)
    print(f"extracted answer: {extracted_answer}")

    response = ollama.chat(model="llama3.1:8b", messages=[
            {
                'role': 'user',
                'content': f"""
                You are tasked with comparing two answers to see if they are the same or not. Both may contain the same answer but missing some sort of measurement, for example, First Answer: 90, Second Answer: 90 km or others (%, sec, miles etc) -  These should be treated as a match since the underlying answer is correct. Any difference in numerical values will result in an 'incorrect'.  You will respond with only 'correct' or 'incorrect' to this comparison without any explanations.\n\n

                First answer: {extracted_answer}.\n
                Second answer: {correct_answer_text}\n\n

                Please compare carefully. Ignore differences in formatting, such as trailing zeros or units.
                """
            },
     ])
    
    decision = response['message']['content'].strip()
    
    # Categorize based on extracted answer
    if decision == "correct":
        correct_pairs.append({
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer
        })
        print('Correct:', {
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer
        })
    elif decision == "incorrect":
        incorrect_pairs.append({
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer if extracted_answer else "No Answer Extracted"
        })
        print("Incorrect:",  {
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer if extracted_answer else "No Answer Extracted"
        })
    else:
        unanswered_paris.append({
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer if extracted_answer else "No Answer Extracted"
        })
        print("Unanswered:",  {
            'question': question,
            'rationale': generated_rationale,
            'answer': extracted_answer if extracted_answer else "No Answer Extracted"
        })
        

    # Print progress every example
    print(f"Processed {idx + 1} questions.\n")

correct_answer_text: Rs 12000
clean options: ['A) Rs 14000', 'B) Rs 15000', 'C) Rs 12000', 'D) Rs 17000', 'E) None of these']
extracted answer: 12000
Correct: {'question': 'A man took a loan at rate of 12% per annum simple interest. After 3 years he had to pay 4320 interest. The principal amount borrowed by him was.', 'rationale': "To find the principal amount borrowed by the man, we can use the formula for simple interest:\n\nSimple Interest = (Principal x Rate x Time) / 100\n\nWe are given that:\n- Simple Interest (SI) = 4320\n- Rate of interest per annum = 12%\n- Time period = 3 years\n\nLet's substitute these values into the formula:\n\n4320 = (P x 12 x 3) / 100\n\nTo find the principal amount, we can rearrange the formula to solve for P:\n\nP = (SI x 100) / (Rate x Time)\n\nSubstituting the given values, we get:\n\nP = (4320 x 100) / (12 x 3)\n= 432000 / 36\n= 12000\n\nTherefore, the principal amount borrowed by the man was 12000.\n\nAnswer: 12000", 'answer': '12000'}
Processed 1 

In [51]:
correct_pairs

[{'question': 'A man took a loan at rate of 12% per annum simple interest. After 3 years he had to pay 4320 interest. The principal amount borrowed by him was.',
  'rationale': "To find the principal amount borrowed by the man, we can use the formula for simple interest:\n\nSimple Interest = (Principal x Rate x Time) / 100\n\nWe are given that:\n- Simple Interest (SI) = 4320\n- Rate of interest per annum = 12%\n- Time period = 3 years\n\nLet's substitute these values into the formula:\n\n4320 = (P x 12 x 3) / 100\n\nTo find the principal amount, we can rearrange the formula to solve for P:\n\nP = (SI x 100) / (Rate x Time)\n\nSubstituting the given values, we get:\n\nP = (4320 x 100) / (12 x 3)\n= 432000 / 36\n= 12000\n\nTherefore, the principal amount borrowed by the man was 12000.\n\nAnswer: 12000",
  'answer': '12000'},
 {'question': 'The ratio between the length and the breadth of a rectangular park is 3 : 2. If a man cycling along the boundary of the park at the speed of 12 km/hr 

In [52]:
incorrect_pairs

[{'question': 'A little boy is numbering his collection of flowers by placing single-digit stickers under each specimen. If the boy has 100 flowers, how many stickers does he need? (The numbers are consecutive and the number of the first flower is 1).',
  'rationale': "To find out how many stickers the little boy needs, we need to determine the total number of single-digit numbers that can be used to label his collection of 100 flowers.\n\nSince the numbers on the stickers will be consecutive and start from 1, we can consider this as a sequence of numbers from 1 to 100. However, not all these numbers are single digits. For example, numbers like 10, 20, 30, ..., 90 have two digits.\n\nLet's count how many numbers in the range from 1 to 100 are two-digit numbers:\n\n- Numbers 10 to 99: Since we're considering each number as a separate label (e.g., both '10' and '11'), there are 90 such numbers. We include both 10 and 99 because they have different labels.\n\nHowever, when counting sticke

In [53]:
unanswered_paris

[{'question': 'Which is not a leap year?',
  'rationale': "A leap year occurs every 4 years, which means that it is a year that is perfectly divisible by 4. This means that any year that can be divided evenly by 4 will be a leap year.\n\nStep 1: Identify the rule for determining a leap year.\nThe rule states that a year must be perfectly divisible by 4 to be considered a leap year.\n\nStep 2: Look at each of the given years and determine if they meet the criteria for being a leap year.\nLet's consider the following years:\n\n* 1900: This is not a leap year because it cannot be divided evenly by 4 (it can only be divided by 10).\n* 2000: This is a leap year because it can be divided evenly by 4.\n\nStep 3: Based on the criteria, determine which of these years is not a leap year.\nThe year 1900 does not meet the criteria for being a leap year since it cannot be divided evenly by 4.\n\n\nAnswer: 1900",
  'answer': '1900'}]

In [25]:
total = len(correct_pairs) + len(incorrect_pairs) + len(unanswered_paris)
accuracy = len(correct_pairs) / total * 100
print(f"Total questions processed: {total}")
print(f"Correct answers: {len(correct_pairs)}")
print(f"Incorrect answers: {len(incorrect_pairs)}")
print(f"Unanswered answers: {len(unanswered_paris)}")
print(f"Accuracy: {accuracy:.2f}%")

Total questions processed: 5
Correct answers: 4
Incorrect answers: 1
Unanswered answers: 0
Accuracy: 80.00%


In [26]:
new_correct_pairs = []

# Process the incorrect answers
for pair in incorrect_pairs:
    question = pair['question']
    options = pair['options']
    correct_answer = pair['correct_answer']

    # Generate the rationale with the correct answer as a hint
    generated_rationale = rationalize(question, options, correct_answer, prompt_set)

    # Add the rationalized example to correct_pairs
    new_correct_pairs.append({
        'question': question,
        'options': options,
        'rationale': generated_rationale,
        'answer': correct_answer
    })

    print({
        'question': question,
        'options': options,
        'rationale': generated_rationale,
        'answer': correct_answer
    })

{'question': 'Rs. 825 becomes Rs. 956 in 3 years at a certain rate of simple interest.If the rate of interest is increased by 4% ,What amount will Rs. 825 become in 3 years ?', 'options': {'A': 'Rs. 1020.80', 'B': 'Rs. 1025', 'C': 'Rs. 1055', 'D': 'Data inadequate', 'E': 'None of these'}, 'rationale': "Let's break down the problem step by step.\n\nGiven:\n\n1. Principal (P) = Rs. 825\n2. Amount after 3 years (A) = Rs. 956\n\nWe need to find the rate of interest (R) at which P becomes A in 3 years. We'll use the formula for simple interest:\n\nSimple Interest (SI) = (P × R × T)/100\n\nwhere T is the time period, which is 3 years here.\n\nFirst, let's calculate the SI for the given scenario:\n\nA = P + SI\n956 = 825 + (SI)\nSI = 131\n\nNow, we can use the formula to find the rate of interest (R):\n\nSI = (P × R × T)/100\n131 = (825 × R × 3)/100\n131 = 24.75R\nR = 131/24.75 ≈ 5.29%\n\nThis is the original rate of interest.\n\nNow, let's consider what happens when the rate of interest is i

### Converting our corrected pairs into a structure the Llama3.1 model will understand for fine tuning.

In [54]:
from datasets import Dataset, DatasetDict


def convert_correct_pairs_to_conversations(correct_pairs):
    """
    Converts correct_pairs into the desired conversations format.

    Parameters:
        correct_pairs (list): List of dictionaries containing question, options, correct_answer, and rationale.

    Returns:
        list: A list of conversations formatted for fine-tuning.
    """
    conversations = []
    
    for pair in correct_pairs:
        question = pair['question']
        correct_answer = pair['answer']
        rationale = pair['rationale']
        
        # Construct options text
        # options_text = '\n'.join([f"{key}: {value}" for key, value in options.items()])
        
        # Human prompt
        human_value = (
            f"Question: {question}\n"
        )
        
        # GPT response
        gpt_value = (
            f"{correct_answer}\nRationale: {rationale}"
        )
        
        # Append the conversation as a separate list
        conversation = [
            { "from": "human", "value": human_value },
            { "from": "gpt", "value": gpt_value }
        ]
        
        conversations.append(conversation)
    
    return conversations

# conversations = convert_correct_pairs_to_conversations(correct_pairs)

# # Create a Hugging Face Dataset
# dataset = Dataset.from_dict({"conversations": conversations})

# # Create a DatasetDict with only the 'train' split
# dataset_dict = DatasetDict({
#     "train": dataset
# })

# # Optionally, shuffle the dataset
# dataset_dict["train"] = dataset_dict["train"].shuffle(seed=42)

# # Verify the structure
# print(dataset_dict)
# print(dataset_dict["train"][0])  # Print the first conversation

### Save dataset to JSONL format

In [47]:
import json

def save_dataset_to_jsonl(dataset_dict, file_path='finetuning_data.jsonl'):
    """
    Saves the conversations in dataset_dict to a JSONL file.
    
    Parameters:
        dataset_dict (DatasetDict): The dataset to save.
        file_path (str): The path to the output JSONL file.
        
    Returns:
        None
    """
    with open(file_path, 'w', encoding='utf-8') as f:
        for conversation in dataset_dict["train"]["conversations"]:
            json_line = json.dumps(conversation)
            f.write(json_line + '\n')
    print(f"Dataset successfully saved to {file_path}")

# Usage
save_dataset_to_jsonl(dataset_dict, 'finetuning_data.jsonl')

Dataset successfully saved to finetuning_data.jsonl


### Load dataset from JSONL (for appending new data)

In [None]:
def load_dataset_from_jsonl(file_path='finetuning_data.jsonl', validation_split=0.1):
    """
    Loads conversations from a JSONL file and splits them into train and validation sets.

    Parameters:
        file_path (str): Path to the JSONL file.
        validation_split (float): Proportion of data to use for validation.

    Returns:
        DatasetDict: A DatasetDict with 'train' and 'validation' splits.
    """
    dataset = Dataset.from_json(file_path)
    dataset = dataset.train_test_split(test_size=validation_split, seed=42)
    dataset_dict = DatasetDict({
        "train": dataset["train"],
        "validation": dataset["test"]
    })
    print(f"Loaded dataset with {len(dataset_dict['train'])} training and {len(dataset_dict['validation'])} validation conversations.")
    return dataset_dict

### Appending New Data

In [55]:
import json

new_conversations = convert_correct_pairs_to_conversations(correct_pairs)

def append_conversations_to_jsonl(conversations, file_path='finetuning_data.jsonl'):
    """
    Appends a list of conversations to an existing JSONL file.

    Parameters:
        conversations (list): List of conversations to append.
        file_path (str): Path to the existing JSONL file.

    Returns:
        None
    """
    with open(file_path, 'a', encoding='utf-8') as f:
        for convo in conversations:
            json_line = json.dumps(convo)
            f.write(json_line + '\n')
    print(f"Successfully appended {len(conversations)} conversations to {file_path}")
    
# Usage
append_conversations_to_jsonl(new_conversations, 'finetuning_data.jsonl')

Successfully appended 3 conversations to finetuning_data.jsonl
