In [3]:
import json
import pandas as pd

def flatten_json_to_dataframe(json_data):
    """
    Flattens the nested JSON structure and converts it into a pandas DataFrame.

    Args:
        json_data (dict): The JSON object to be flattened.

    Returns:
        pandas.DataFrame: A DataFrame containing the flattened JSON data.
    """
    # Extract the relevant nested JSON data
    records = []
    for choice in json_data.get("choices", []):
        message_content = json.loads(choice.get("message", {}).get("content", "{}"))
        steps = message_content.get("steps", [])
        for step in steps:
            records.append({
                "step_output": step.get("output", ""),
                "step_explanation": step.get("explanation", ""),
                "final_answer": message_content.get("final_answer", "")
            })

    # Convert the list of records into a DataFrame
    df = pd.DataFrame(records)
    return df

# read json file
with open("/home/alex/Downloads/apps (1)/apps/mathematics-retry-o1/13-11-08/A-test/4_en.json") as f:
    json_file = json.load(f)


# Call the function and print the DataFrame
df = flatten_json_to_dataframe(json_file)
print(df)


                                         step_output  \
0  - In a simple graph, each diagonal entry (whic...   
1  - From the graph, there is no direct edge betw...   
2  \[\na + b + c + d + e = 0 + 1 + 0 + 1 + 0 = 2....   

                                    step_explanation   final_answer  
0  Step 1 [Using the property of diagonal entries...  \MC: boxed{2}  
1                  Step 2 [Reading the given graph]:  \MC: boxed{2}  
2                                Step 3 [Summation]:  \MC: boxed{2}  


In [4]:
df.head()

Unnamed: 0,step_output,step_explanation,final_answer
0,"- In a simple graph, each diagonal entry (whic...",Step 1 [Using the property of diagonal entries...,\MC: boxed{2}
1,"- From the graph, there is no direct edge betw...",Step 2 [Reading the given graph]:,\MC: boxed{2}
2,\[\na + b + c + d + e = 0 + 1 + 0 + 1 + 0 = 2....,Step 3 [Summation]:,\MC: boxed{2}


In [9]:
import os
import json

def main():
    # Path to your "wrong_ans_best_sce.txt"
    wrong_ans_list_file = "scripts/wrong_ans_best_sce.txt"

    # Root directory where all the JSON data reside
    root_dir = "mathematics-retry-o1"

    # Collect total_tokens for each entry
    # Lists to collect tokens
    completion_tokens_list = []
    prompt_tokens_list = []
    total_tokens_list = []

    with open(wrong_ans_list_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            
            # Each line has the format like "13-11-08-A Q4"
            testID, questionID = line.split()
            # e.g. testID = "13-11-08-A", questionID = "Q4"

            # Split out date (e.g. "13-11-08") and sub-test (A, B, etc.)
            # One way: rsplit on '-' once
            try:
                date_part, letter_part = testID.rsplit('-', 1)
            except:
                print(f"Warning: Could not split testID '{testID}' into date and letter.")
                continue
            # letter_part should be "A" or "B"

            # questionID is something like "Q4"
            # Remove the leading "Q" to get the numeric part
            q_num = questionID.lstrip("Q")  # e.g. "4"

            # Construct the path to the JSON file
            # e.g. mathematics-retry-o1/13-11-08/A-test/4_en.json
            json_filename = f"{q_num}_en.json"
            json_path = os.path.join(root_dir, date_part, f"{letter_part}-test", json_filename)

            # Check if file exists before loading
            if not os.path.isfile(json_path):
                print(f"Warning: File not found: {json_path}")
                continue

            # Open and load the JSON, then extract the token usage
            with open(json_path, "r", encoding="utf-8") as json_file:
                try:
                    data = json.load(json_file)
                    usage_info = data.get("usage", {})
                    
                    # Grab each usage value if present, else None
                    c_tokens = usage_info.get("completion_tokens", None)
                    p_tokens = usage_info.get("prompt_tokens", None)
                    t_tokens = usage_info.get("total_tokens", None)
                    
                    # If all usage fields are present, add them to respective lists
                    if c_tokens is not None and p_tokens is not None and t_tokens is not None:
                        completion_tokens_list.append(c_tokens)
                        prompt_tokens_list.append(p_tokens)
                        total_tokens_list.append(t_tokens)
                    else:
                        print(f"Warning: Some tokens not found in {json_path}")
                except json.JSONDecodeError:
                    print(f"Error: Could not parse JSON in file {json_path}")

    # Compute the averages if we have any token counts
    count_entries = len(completion_tokens_list)
    if count_entries > 0:
        avg_completion = sum(completion_tokens_list) / count_entries
        avg_prompt = sum(prompt_tokens_list) / count_entries
        avg_total = sum(total_tokens_list) / count_entries

        print(f"Found {count_entries} valid entries.")
        print(f"Average completion tokens: {avg_completion:.2f}")
        print(f"Average prompt tokens:     {avg_prompt:.2f}")
        print(f"Average total tokens:      {avg_total:.2f}")
    else:
        print("No valid token counts found.")

if __name__ == "__main__":
    main()


Found 125 valid entries.
Average completion tokens: 1009.23
Average prompt tokens:     791.70
Average total tokens:      1800.93


In [10]:
import os
import json

def main():
    # Path to your "wrong_ans_best_sce.txt"
    wrong_ans_list_file = "scripts/correct_ans_best_sce.txt"

    # Root directory where all the JSON data reside
    root_dir = "mathematics-retry-o1"

    # Collect total_tokens for each entry
    # Lists to collect tokens
    completion_tokens_list = []
    prompt_tokens_list = []
    total_tokens_list = []

    with open(wrong_ans_list_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            
            # Each line has the format like "13-11-08-A Q4"
            testID, questionID = line.split()
            # e.g. testID = "13-11-08-A", questionID = "Q4"

            # Split out date (e.g. "13-11-08") and sub-test (A, B, etc.)
            # One way: rsplit on '-' once
            try:
                date_part, letter_part = testID.rsplit('-', 1)
            except:
                print(f"Warning: Could not split testID '{testID}' into date and letter.")
                continue
            # letter_part should be "A" or "B"

            # questionID is something like "Q4"
            # Remove the leading "Q" to get the numeric part
            q_num = questionID.lstrip("Q")  # e.g. "4"

            # Construct the path to the JSON file
            # e.g. mathematics-retry-o1/13-11-08/A-test/4_en.json
            json_filename = f"{q_num}_en.json"
            json_path = os.path.join(root_dir, date_part, f"{letter_part}-test", json_filename)

            # Check if file exists before loading
            if not os.path.isfile(json_path):
                print(f"Warning: File not found: {json_path}")
                continue

            # Open and load the JSON, then extract the token usage
            with open(json_path, "r", encoding="utf-8") as json_file:
                try:
                    data = json.load(json_file)
                    usage_info = data.get("usage", {})
                    
                    # Grab each usage value if present, else None
                    c_tokens = usage_info.get("completion_tokens", None)
                    p_tokens = usage_info.get("prompt_tokens", None)
                    t_tokens = usage_info.get("total_tokens", None)
                    
                    # If all usage fields are present, add them to respective lists
                    if c_tokens is not None and p_tokens is not None and t_tokens is not None:
                        completion_tokens_list.append(c_tokens)
                        prompt_tokens_list.append(p_tokens)
                        total_tokens_list.append(t_tokens)
                    else:
                        print(f"Warning: Some tokens not found in {json_path}")
                except json.JSONDecodeError:
                    print(f"Error: Could not parse JSON in file {json_path}")

    # Compute the averages if we have any token counts
    count_entries = len(completion_tokens_list)
    if count_entries > 0:
        avg_completion = sum(completion_tokens_list) / count_entries
        avg_prompt = sum(prompt_tokens_list) / count_entries
        avg_total = sum(total_tokens_list) / count_entries

        print(f"Found {count_entries} valid entries.")
        print(f"Average completion tokens: {avg_completion:.2f}")
        print(f"Average prompt tokens:     {avg_prompt:.2f}")
        print(f"Average total tokens:      {avg_total:.2f}")
    else:
        print("No valid token counts found.")

if __name__ == "__main__":
    main()


Found 70 valid entries.
Average completion tokens: 902.39
Average prompt tokens:     721.51
Average total tokens:      1623.90


In [None]:
# Found 125 valid entries.
# Average completion tokens: 1009.23
# Average prompt tokens:     791.70
# Average total tokens:      1800.93

# Found 70 valid entries.
# Average completion tokens: 902.39
# Average prompt tokens:     721.51
# Average total tokens:      1623.90


edit the analysis of the above table, talk about for 125 scenarios that not a single strategy could aid the model to arrive at the correct solution, was longer in general

In [16]:
import os
import json

def main():
    # Path to your "wrong_ans_best_sce.txt"
    wrong_ans_list_file = "scripts/correct_ans_best_sce.txt"

    # Root directory where all the JSON data reside
    root_dir = "mathematics-retry-o1"

    # List to collect the number of steps for each solution
    steps_counts = []

    with open(wrong_ans_list_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            
            # Each line has the format like "13-11-08-A Q4"
            testID, questionID = line.split()
            # e.g., testID = "13-11-08-A", questionID = "Q4"

            # Split out date (e.g. "13-11-08") and sub-test letter (A, B, etc.)
            date_part, letter_part = testID.rsplit('-', 1)

            # Remove the leading "Q" from questionID to get the numeric part
            q_num = questionID.lstrip("Q")  # e.g. "4"

            # Construct the path to the JSON file
            # e.g.: mathematics-retry-o1/13-11-08/A-test/4_en.json
            json_filename = f"{q_num}_en.json"
            json_path = os.path.join(root_dir, date_part, f"{letter_part}-test", json_filename)

            # Check if file exists
            if not os.path.isfile(json_path):
                print(f"Warning: File not found: {json_path}")
                continue

            # Open and load the JSON
            with open(json_path, "r", encoding="utf-8") as json_file:
                try:
                    data = json.load(json_file)
                except json.JSONDecodeError:
                    print(f"Error: Could not parse JSON in file {json_path}")
                    continue
                
                # Try to access the content where the solution is stored
                # Example path: data["choices"][0]["message"]["content"]
                try:
                    content_str = data["choices"][0]["message"]["content"]
                except (IndexError, KeyError, TypeError):
                    print(f"Warning: Unexpected JSON structure in {json_path}")
                    continue
                
                # Now parse the content (which is itself a JSON string)
                try:
                    content_json = json.loads(content_str)  # e.g. {"steps": [...], "final_answer": "..."}
                except json.JSONDecodeError:
                    # If this fails, content may not be valid JSON, but a raw string instead
                    print(f"Warning: 'content' is not valid JSON in {json_path}")
                    continue

                # Extract steps
                steps = content_json.get("steps", [])
                steps_count = len(steps)
                
                # Accumulate into steps_counts
                steps_counts.append(steps_count)

    # Compute the average number of steps
    if steps_counts:
        avg_steps = sum(steps_counts) / len(steps_counts)
        print(f"Found {len(steps_counts)} valid entries.")
        print(f"Average number of steps: {avg_steps:.2f}")
    else:
        print("No valid steps found.")

if __name__ == "__main__":
    main()


Found 70 valid entries.
Average number of steps: 9.16
