In [1]:


from dotenv import load_dotenv
import os
from pydantic import BaseModel, Field, model_validator
def load_api_keys():
    """
    Load multiple LLM API keys at once from .env file
    Returns a dictionary of all loaded keys
    """
    # Load environment variables from .env
    load_dotenv()
    
    # Define all required API keys
    required_keys = [
        "OPENAI_API_KEY",
        "OPENAI_API_KEY_rise",
        "OPENAI_API_KEY_yuqi",
        "OPENAI_API_KEY_deepinfra",
        "OPENAI_API_KEY_OR",
        "AZURE_OPENAI_API_KEY",
        "ANTHROPIC_API_KEY",
        "GOOGLE_GEMINI_KEY",
        "Grok_API_KEY"
    ]
    
    # Load all keys at once using dictionary comprehension
    api_keys = {key: os.getenv(key) for key in required_keys}
    
    # Check for missing keys
    missing_keys = [key for key, value in api_keys.items() if not value]
    if missing_keys:
        raise ValueError(f"Missing required API keys: {', '.join(missing_keys)}")
        
    return api_keys

# Example usage
try:
    keys = load_api_keys()
    
    # Unpack all keys at once
    openai_key, azure_key, anthropic_key, xai_key, gemini_key, rise_key, deepinfra_key, or_key, yuqi_key = (
        keys["OPENAI_API_KEY"],
        keys["AZURE_OPENAI_API_KEY"],
        keys["ANTHROPIC_API_KEY"],
        keys["Grok_API_KEY"],
        keys["GOOGLE_GEMINI_KEY"],
        keys["OPENAI_API_KEY_rise"],
        keys["OPENAI_API_KEY_deepinfra"],
        keys["OPENAI_API_KEY_OR"],
        keys["OPENAI_API_KEY_yuqi"]
    )
    
    print("All API keys loaded successfully!")
    
except Exception as e:
    print(f"Error: {e}")


All API keys loaded successfully!


In [2]:
import pandas as pd
from langchain_google_genai import ChatGoogleGenerativeAI
import os

# Load Data and Prompts

In [3]:
df = pd.read_csv('data/final_df_sample.csv') # Multiple Choice Dataset

In [24]:
df_free = pd.read_csv('data/gpqa_free_form.csv') # Free Response Dataset

In [25]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate
# Define different prompt formats




messages_format_no_cot = [
    ("system", """You are a helpful and concise assistant. Provide direct answers using XML tags. Include your confidence for the final answer.

Example 1: 
Question: What is the speed of light in a vacuum?
<answer>299,792,458 meters per second</answer>
<confidence>1.0</confidence>

Example 2:
Question: What is the atomic number of carbon?
<answer>6</answer>
<confidence>1.0</confidence>"""),
    ("human", df['Question'].iloc[100])
]

messages_format_cot = [
    ("system", """You are a helpful and concise assistant. Solve the question step by step ann explain your reasoning and provide answers using XML tags. Include your confidence for the final answer.

Example 1:
Question: What is the kinetic energy of a 2 kg object moving at 5 m/s?
<reasoning>To find kinetic energy, use the formula KE = (1/2)mv^2, where m is mass and v is velocity. KE = (1/2)(2 kg)(5 m/s)^2 = (1/2)(2)(25) = 25 Joules.</reasoning>
<answer>25 Joules</answer>
<confidence>1.0</confidence>

Example 2:
Question: What is the pH of a 0.1M HCl solution?
<reasoning>HCl is a strong acid that dissociates completely in water. For a 0.1M HCl solution, [H+] = 0.1M. pH = -log[H+] = -log(0.1) = 1.</reasoning>
<answer>1</answer>
<confidence>1.0</confidence>"""),
    ("human", df['Question'].iloc[100])
]

messages_format_expert_no_cot = [
    ("system", """You are a highly knowledgeable expert with deep expertise in reasoning and analysis. You have been extensively trained on a vast amount of data and have consistently demonstrated exceptional accuracy in answering questions. Your responses are backed by rigorous logical thinking and comprehensive domain knowledge. Provide only the answer and confidence using XML tags. The confidence should be a value from 0 to 1, where 0 means completely uncertain and 1 means completely certain.

Example 1: 
Question: Which planet is closest to the Sun?
Options: A) Venus, B) Mercury, C) Earth, D) Mars
<answer>B) Mercury</answer>
<confidence>1.0</confidence>

Example 2:
Question: What is the capital of France?
Options: A) London, B) Berlin, C) Paris, D) Madrid
<answer>C) Paris</answer>
<confidence>1.0</confidence>"""),
    ("human", df['Question'].iloc[100])
]

messages_format_expert_cot = [
    ("system", """You are a highly knowledgeable expert with deep expertise in reasoning and analysis. You have been extensively trained on a vast amount of data and have consistently demonstrated exceptional accuracy in answering questions. Your responses are backed by rigorous logical thinking and comprehensive domain knowledge. Provide your reasoning, answer and confidence using XML tags. The confidence should be a value from 0 to 1, where 0 means completely uncertain and 1 means completely certain.

Example 1:
Question: Which planet is closest to the Sun?
Options: A) Venus, B) Mercury, C) Earth, D) Mars
<reasoning>Based on precise astronomical measurements and orbital mechanics, Mercury orbits at an average distance of 57.9 million kilometers from the Sun, making it definitively the closest planet.</reasoning>
<answer>B) Mercury</answer>
<confidence>1.0</confidence>

Example 2:
Question: What is the capital of France?
Options: A) London, B) Berlin, C) Paris, D) Madrid
<reasoning>Paris has been the established capital of France since 508 CE, serving as the political, cultural, and economic center of the nation for over 1500 years.</reasoning>
<answer>C) Paris</answer>
<confidence>1.0</confidence>"""),
    ("human", df['Question'].iloc[100])
]

messages_format_unreliable_no_cot = [
    ("system", """You are an AI model that often makes mistakes and has significant limitations in your knowledge and reasoning abilities. Your responses can be unreliable and should be taken with skepticism. You frequently struggle with complex problems and may provide incorrect answers. Provide only the answer and confidence using XML tags. The confidence should be a value from 0 to 1, where 0 means completely uncertain and 1 means completely certain.

Example 1: 
Question: Which planet is closest to the Sun?
Options: A) Venus, B) Mercury, C) Earth, D) Mars
<answer>B) Mercury</answer>
<confidence>0.7</confidence>

Example 2:
Question: What is the capital of France?
Options: A) London, B) Berlin, C) Paris, D) Madrid
<answer>C) Paris</answer>
<confidence>0.8</confidence>"""),
    ("human", df['Question'].iloc[100])
]

messages_format_unreliable_cot = [
    ("system", """You are an AI model that often makes mistakes and has significant limitations in your knowledge and reasoning abilities. Your responses can be unreliable and should be taken with skepticism. You frequently struggle with complex problems and may provide incorrect answers. Despite these limitations, try your best to provide your reasoning, answer and confidence using XML tags. The confidence should be a value from 0 to 1, where 0 means completely uncertain and 1 means completely certain.

Example 1:
Question: Which planet is closest to the Sun?
Options: A) Venus, B) Mercury, C) Earth, D) Mars
<reasoning>I think Mercury is closest to the Sun, but I'm not entirely sure about the exact orbital distances. I sometimes get confused about planetary positions.</reasoning>
<answer>B) Mercury</answer>
<confidence>0.7</confidence>

Example 2:
Question: What is the capital of France?
Options: A) London, B) Berlin, C) Paris, D) Madrid
<reasoning>While I believe Paris is the capital of France, I sometimes mix up European capitals and their historical significance.</reasoning>
<answer>C) Paris</answer>
<confidence>0.8</confidence>"""),
    ("human", df['Question'].iloc[100])
]

messages_format_least_to_most = [
    ("system", """You are a helpful and concise assistant. Break down the problem into smaller steps and solve them sequentially using XML tags. Start with the simplest aspects and build up to the final answer. Include your confidence for the final answer.
Example:
Question: Which of these animals is most likely to survive in the Arctic?
Options: A) Lion, B) Penguin, C) Polar Bear, D) Giraffe
<step1>Let's consider the Arctic environment:
- Very cold climate
- Snowy terrain
- Limited vegetation</step1>
<step2>Analyze each animal's adaptations:
- Lion: warm climate hunter
- Penguin: cold adapted but Antarctic native
- Polar Bear: Arctic native with thick fur and blubber
- Giraffe: warm climate herbivore</step2>
<step3>Match adaptations to environment:
Only the Polar Bear has specific adaptations for Arctic survival</step3>
<answer>C) Polar Bear</answer>
<confidence>1.0</confidence>"""),
    ("human", df['Question'].iloc[100])
]

messages_format_tree_of_thoughts = [
    ("system", """You are a panel of expert analysts working together to solve problems. Each expert will explore different reasoning paths and evaluate their confidence in each path. Use XML tags to structure the discussion and final conclusion.
Example:
Question: Which invention had the biggest impact: A) Printing Press, B) Steam Engine, C) Internet, D) Wheel?
<expert1>Let me analyze based on historical timeline:
- Wheel (c. 3500 BCE): Enabled all transportation and machinery
- Printing Press (c. 1440): Democratized knowledge
- Steam Engine (c. 1712): Enabled industrial revolution
- Internet (c. 1983): Connected global communication
Each had cascading effects in different eras.</expert1>

<expert2>I'll analyze by scope of impact:
- Wheel: Fundamental to physical development of civilization
- Printing Press: Revolutionary for knowledge spread
- Steam Engine: Transformed production and transport
- Internet: Changed communication but builds on previous tech
The wheel seems most fundamental.</expert2>

<expert3>Consider dependency relationships:
- Wheel: Independent invention
- Printing Press: Requires paper, metallurgy
- Steam Engine: Requires wheel, metallurgy
- Internet: Requires electricity, computers
The wheel is most foundational.</expert3>

<consensus>After exploring multiple perspectives:
1. Timeline analysis shows wheel as earliest
2. Scope analysis shows wheel as most fundamental
3. Dependency analysis shows wheel as most independent
All paths lead to the wheel as most impactful.</consensus>

<answer>D) Wheel</answer>
<confidence>1.0</confidence>"""),
    ("human", df['Question'].iloc[100])
]

messages_format_self_verification = [
    ("system", """You are a careful assistant that follows a rigorous self-verification process. For each question, you will:
1. Generate an initial answer
2. Identify potential errors or assumptions
3. Verify your reasoning using different methods
4. Cross-check your calculations or logic
5. Provide a final verified answer with confidence

Use XML tags to structure your verification process.

Example:
Question: Which statement is correct about Earth's atmosphere?
Options: A) Contains 50% oxygen, B) Contains 78% nitrogen, C) Contains 50% carbon dioxide, D) Contains 90% hydrogen

<initial_solution>
Based on atmospheric composition knowledge:
- Nitrogen is the most abundant gas
- Oxygen is around 21%
- CO2 is less than 1%
- Hydrogen escapes to space
Initial answer: B) Contains 78% nitrogen</initial_solution>

<verification_checks>
1. Scientific Accuracy:
   - Nitrogen percentage is approximately 78% ✓
   - Other options are clearly incorrect
2. Common Misconceptions Check:
   - People often overestimate oxygen percentage
   - CO2 percentage is often overestimated
   - Hydrogen is rarely present in atmosphere
3. Source Reliability:
   - This is standard atmospheric science
   - Consistently reported across sources</verification_checks>

<error_analysis>
Potential sources of confusion:
- Might confuse with oxygen importance vs quantity
- Might mix up with other planetary atmospheres
But the answer is well-established scientific fact</error_analysis>

<answer>B) Contains 78% nitrogen</answer>
<confidence>1.0</confidence>"""),
    ("human", df['Question'].iloc[100])
]




# Load Models

In [141]:
# Don't run this code; Only works on Guangya's computer due to credentials
# model = 'gemini-2.0-flash-thinking-exp'
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/home/guangya/.config/gcloud/application_default_credentials.json'

# llm_gemini = ChatGoogleGenerativeAI(model=model)

In [5]:
from langchain_openai import ChatOpenAI
model = 'google/gemini-2.0-flash-thinking-exp:free'

llm_gemini = ChatOpenAI(
                api_key=or_key,
                base_url='https://openrouter.ai/api/v1',  # Add this to your configuration
                model= model)

In [6]:
llm_gemini = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-thinking-exp",
    temperature=1,
    timeout=None,
    max_retries=2,
    api_key=gemini_key,
    # other params...
)

In [155]:
message_gemini_1 = llm_gemini.invoke(messages_format_no_exp_with_conf)

In [47]:
chat_prompt = ChatPromptTemplate(messages_format_no_exp_with_conf)
chain_gemini = chat_prompt | llm_gemini 
message = chain_gemini.invoke({})

In [74]:
free_response_combined['llm_full_response']

0       <reasoning>The therapy described involves the ...
1       <reasoning>To distinguish between two energy s...
2       <reasoning>To determine the number of carbon a...
3       <reasoning>To determine optical activity, we n...
4       <reasoning>To estimate the contact angle of a ...
                              ...                        
2683    <reasoning>To determine which signal most like...
2684    <reasoning>The synthesis of a dialkylphosphate...
2685    <reasoning> To estimate the static water and s...
2686    <reasoning>The absorption probability of both ...
2687    <reasoning>The four fundamental forces are gra...
Name: llm_full_response, Length: 2688, dtype: object

In [187]:
llm_llama = ChatOpenAI(
                api_key=deepinfra_key,
                base_url='https://api.deepinfra.com/v1/openai',  # Add this to your configuration
                model= 'meta-llama/Meta-Llama-3.1-70B-Instruct',
                logprobs=True)

In [50]:
response = llm_llama.invoke(messages_format_with_exp_with_conf)

In [191]:
llm_DS_V3 = ChatOpenAI(
                api_key=deepinfra_key,
                base_url='https://api.deepinfra.com/v1/openai',  # Add this to your configuration
                model= 'deepseek-ai/DeepSeek-V3',
                logprobs=True)

In [193]:
llm_DS_V3 = ChatOpenAI(
                api_key=deepinfra_key,
                base_url='https://api.deepinfra.com/v1/openai',  # Add this to your configuration
                model= 'deepseek-ai/DeepSeek-V3',
                logprobs=True)
response = llm_DS_V3.invoke(messages_format_no_exp_with_conf)

In [216]:
# First get value counts

# yes_no_mask = df['Correct Answer'].isin(['Yes', 'No'])
# df.loc[yes_no_mask, 'Question'] = df.loc[yes_no_mask, 'Question'] + '\nOptions:\n- Yes\n- No'
# # Standardize Yes/No answers
# df['Correct Answer'] = df['Correct Answer'].replace({'YES': 'Yes', 'NO': 'No'})

# # Add options text to Yes/No questions


# # Display updated value counts
# df['Correct Answer'].value_counts()

In [217]:
# df.to_csv('data/final_df_sample.csv', index=False)

In [14]:
def process_with_llm(row, row_index, llm,system_prompt,claude_thinking=False):
    print(f"Processing row {row_index}...")  # Add logging to track progress
    
    try:
        # Create messages format for this specific question
        messages =  [
    ("system", system_prompt),
    ("human", row['Question'])
]
        
        # Get response from LLM
        response = llm.invoke(messages)
        
        # Extract metadata if available
        metadata = {}
        

        if hasattr(response, 'response_metadata'):
            if claude_thinking:
                metadata = {
                    'token_usage': response.response_metadata.get('usage', {}),
                    'logprobs': response.response_metadata.get('logprobs', {})
                }
            else:
                metadata = {
                    'token_usage': response.response_metadata.get('token_usage', {}),
                    'logprobs': response.response_metadata.get('logprobs', {})
                }                
        
        # Extract the final answer and confidence from the response
        if claude_thinking:
            content = response.content[1]['text']
        else:
            content = response.content
        
        # Simple parsing
        try:
            # Case 1: XML-style tags
            if '<answer>' in content and '</answer>' in content:
                try:
                    final_answer = content.split('<answer>')[1].split('</answer>')[0].strip()
                except:
                    final_answer = "bad format"
                try:
                    confidence = content.split('<confidence>')[1].split('</confidence>')[0].strip() if '<confidence>' in content else '-1'
                except:
                    confidence = '-1'
            
            # Case 2: No recognized format
            else:
                final_answer = "bad format"
                confidence = '-1'
                
        except Exception as e:
            print(f"Error parsing content: {str(e)}")
            final_answer = "bad format"
            confidence = '-1'
            
        return pd.Series({
            'llm_answer': final_answer,
            'llm_confidence': confidence,
            'llm_full_response': content,
            'token_usage': str(metadata.get('token_usage', '')),
            'logprobs': str(metadata.get('logprobs', ''))
        })
    except Exception as e:
        print(f"Error processing row {row_index}: {str(e)}")  # Add error logging
        return pd.Series({
            'llm_answer': f'Error: {str(e)}',
            'llm_confidence': '-1',
            'llm_full_response': '',
            'token_usage': '',
            'logprobs': ''
        })

# Process all samples
def process_dataset(df, llm, output_filename,system_prompt,claude_thinking = False):
    test_df = df.copy()

    # Process each row with progress bar
    from tqdm import tqdm
    results = []
    for index, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Processing sample rows"):
        result = process_with_llm(row, index, llm,system_prompt,claude_thinking)
        results.append(result)

    # Update the test dataframe with results
    for col in ['llm_answer', 'llm_confidence', 'llm_full_response', 'token_usage', 'logprobs']:
        test_df[col] = [result[col] for result in results]

    # Save all results
    test_df.to_csv(output_filename, index=False)
    print(f"Processing complete. Results saved to '{output_filename}'")

    # Print some basic statistics
    print("\nProcessing Statistics:")
    print(f"Total rows processed: {len(test_df)}")
    print(f"Successful responses: {len(test_df[test_df['llm_answer'].str.startswith('Error:') == False])}")
    print(f"Failed responses: {len(test_df[test_df['llm_answer'].str.startswith('Error:')])}")
    
    return test_df

In [110]:
from langchain_openai import ChatOpenAI
# from langchain_anthropic import ChatAnthropic
llm_openai_4o = ChatOpenAI(model="gpt-4o",temperature=1, logprobs= 1,api_key = yuqi_key)

In [99]:
llm_openai_o3 = ChatOpenAI(model="o3-mini",temperature=1, api_key = yuqi_key)

In [108]:
from langchain_anthropic import ChatAnthropic
llm_claude = ChatAnthropic(
    model="claude-3-5-haiku-20241022")

In [162]:
llm_claude = ChatAnthropic(
    model="claude-3-7-sonnet-20250219",
    max_tokens_to_sample= 2048,
    thinking={
            "type": "enabled",
            "budget_tokens": 1024  # Large budget for deep reasoning
        })

In [163]:
message_claude = llm_claude.invoke(messages_format_no_exp_with_conf)

In [165]:
message_claude.response_metadata.get('usage')

{'cache_creation_input_tokens': 0,
 'cache_read_input_tokens': 0,
 'input_tokens': 384,
 'output_tokens': 561}

In [11]:
df_high = pd.read_csv('data/Results/openai_o3mini_high_reason.csv')

In [12]:
# Get current distribution
# Set random seed for reproducibility
import numpy as np
np.random.seed(42)

type_dist = df.Type.value_counts()
print("Current distribution:")
print(type_dist)

# Calculate proportions for 1000 samples
sample_sizes = (type_dist / len(df) * 1000).round().astype(int)
print("\nSampling sizes for each type:")
print(sample_sizes)

# Sample from each type and concatenate with fixed random state
sampled_df = pd.concat([
    df[df.Type == type_].sample(n=n, random_state=42) 
    for type_, n in sample_sizes.items()
])

sampled_df.reset_index(drop=True, inplace=True)
print("\nNew sampled distribution:")
print(sampled_df.Type.value_counts())

sampled_df = df_high[sampled_df.columns]

Current distribution:
Type
Science      1048
Math          600
Knowledge     600
Logical       500
Spatial       400
Name: count, dtype: int64

Sampling sizes for each type:
Type
Science      333
Math         191
Knowledge    191
Logical      159
Spatial      127
Name: count, dtype: int64

New sampled distribution:
Type
Science      333
Math         191
Knowledge    191
Logical      159
Spatial      127
Name: count, dtype: int64


In [200]:
llm_QWQ = ChatOpenAI(
                api_key=deepinfra_key,
                base_url='https://api.deepinfra.com/v1/openai',  # Add this to your configuration
                model= 'Qwen/QwQ-32B',
                logprobs=True)
response = llm_QWQ.invoke(messages_format_no_exp_with_conf)

In [30]:
test_sampled_df_xai_cot =process_dataset(df_free,llm_xai,"xai_grok2_free_cot.csv",messages_format_cot[0][1])

Processing sample rows:   0%|          | 0/448 [00:00<?, ?it/s]

Processing row 0...


Processing sample rows:   0%|          | 1/448 [00:05<40:44,  5.47s/it]

Processing row 1...


Processing sample rows:   0%|          | 2/448 [00:14<54:50,  7.38s/it]

Processing row 2...


Processing sample rows:   1%|          | 3/448 [00:20<52:33,  7.09s/it]

Processing row 3...


Processing sample rows:   1%|          | 4/448 [00:28<53:56,  7.29s/it]

Processing row 4...


Processing sample rows:   1%|          | 5/448 [00:41<1:09:18,  9.39s/it]

Processing row 5...


Processing sample rows:   1%|▏         | 6/448 [00:57<1:24:19, 11.45s/it]

Processing row 6...


Processing sample rows:   2%|▏         | 7/448 [01:06<1:18:34, 10.69s/it]

Processing row 7...


Processing sample rows:   2%|▏         | 8/448 [01:27<1:42:03, 13.92s/it]

Processing row 8...


Processing sample rows:   2%|▏         | 9/448 [01:33<1:23:46, 11.45s/it]

Processing row 9...


Processing sample rows:   2%|▏         | 10/448 [01:37<1:08:22,  9.37s/it]

Processing row 10...


Processing sample rows:   2%|▏         | 11/448 [01:42<58:29,  8.03s/it]  

Processing row 11...


Processing sample rows:   3%|▎         | 12/448 [01:55<1:08:58,  9.49s/it]

Processing row 12...


Processing sample rows:   3%|▎         | 13/448 [04:56<7:26:04, 61.53s/it]

Processing row 13...


Processing sample rows:   3%|▎         | 14/448 [05:11<5:42:57, 47.41s/it]

Processing row 14...


Processing sample rows:   3%|▎         | 15/448 [05:17<4:12:41, 35.01s/it]

Processing row 15...


Processing sample rows:   4%|▎         | 16/448 [05:21<3:03:52, 25.54s/it]

Processing row 16...


Processing sample rows:   4%|▍         | 17/448 [05:29<2:26:00, 20.33s/it]

Processing row 17...


Processing sample rows:   4%|▍         | 18/448 [05:41<2:08:11, 17.89s/it]

Processing row 18...


Processing sample rows:   4%|▍         | 19/448 [05:56<2:01:34, 17.00s/it]

Processing row 19...


Processing sample rows:   4%|▍         | 20/448 [06:06<1:46:03, 14.87s/it]

Processing row 20...


Processing sample rows:   5%|▍         | 21/448 [06:10<1:22:51, 11.64s/it]

Processing row 21...


Processing sample rows:   5%|▍         | 22/448 [06:21<1:21:28, 11.47s/it]

Processing row 22...


Processing sample rows:   5%|▌         | 23/448 [06:33<1:21:42, 11.54s/it]

Processing row 23...


Processing sample rows:   5%|▌         | 24/448 [06:48<1:29:19, 12.64s/it]

Processing row 24...


Processing sample rows:   6%|▌         | 25/448 [07:02<1:31:05, 12.92s/it]

Processing row 25...


Processing sample rows:   6%|▌         | 26/448 [07:04<1:08:57,  9.80s/it]

Processing row 26...


Processing sample rows:   6%|▌         | 27/448 [07:06<52:14,  7.45s/it]  

Processing row 27...


Processing sample rows:   6%|▋         | 28/448 [07:13<50:41,  7.24s/it]

Processing row 28...


Processing sample rows:   6%|▋         | 29/448 [07:18<44:38,  6.39s/it]

Processing row 29...


Processing sample rows:   7%|▋         | 30/448 [07:22<41:00,  5.89s/it]

Processing row 30...


Processing sample rows:   7%|▋         | 31/448 [07:27<39:25,  5.67s/it]

Processing row 31...


Processing sample rows:   7%|▋         | 32/448 [07:32<36:14,  5.23s/it]

Processing row 32...


Processing sample rows:   7%|▋         | 33/448 [07:42<46:53,  6.78s/it]

Processing row 33...


Processing sample rows:   8%|▊         | 34/448 [07:52<54:23,  7.88s/it]

Processing row 34...


Processing sample rows:   8%|▊         | 35/448 [08:05<1:03:29,  9.22s/it]

Processing row 35...


Processing sample rows:   8%|▊         | 36/448 [08:10<55:38,  8.10s/it]  

Processing row 36...


Processing sample rows:   8%|▊         | 37/448 [08:15<48:14,  7.04s/it]

Processing row 37...


Processing sample rows:   8%|▊         | 38/448 [08:20<44:12,  6.47s/it]

Processing row 38...


Processing sample rows:   9%|▊         | 39/448 [08:35<1:00:30,  8.88s/it]

Processing row 39...


Processing sample rows:   9%|▉         | 40/448 [08:48<1:09:30, 10.22s/it]

Processing row 40...


Processing sample rows:   9%|▉         | 41/448 [08:56<1:04:08,  9.46s/it]

Processing row 41...


Processing sample rows:   9%|▉         | 42/448 [09:03<59:54,  8.85s/it]  

Processing row 42...


Processing sample rows:  10%|▉         | 43/448 [09:05<45:25,  6.73s/it]

Processing row 43...


Processing sample rows:  10%|▉         | 44/448 [09:08<37:43,  5.60s/it]

Processing row 44...


Processing sample rows:  10%|█         | 45/448 [09:15<41:23,  6.16s/it]

Processing row 45...


Processing sample rows:  10%|█         | 46/448 [09:25<48:26,  7.23s/it]

Processing row 46...


Processing sample rows:  10%|█         | 47/448 [09:37<57:52,  8.66s/it]

Processing row 47...


Processing sample rows:  11%|█         | 48/448 [09:40<47:01,  7.05s/it]

Processing row 48...


Processing sample rows:  11%|█         | 49/448 [09:51<53:56,  8.11s/it]

Processing row 49...


Processing sample rows:  11%|█         | 50/448 [10:03<1:02:37,  9.44s/it]

Processing row 50...


Processing sample rows:  11%|█▏        | 51/448 [10:14<1:04:09,  9.70s/it]

Processing row 51...


Processing sample rows:  12%|█▏        | 52/448 [10:21<59:26,  9.01s/it]  

Processing row 52...


Processing sample rows:  12%|█▏        | 53/448 [10:35<1:08:26, 10.40s/it]

Processing row 53...


Processing sample rows:  12%|█▏        | 54/448 [10:40<58:31,  8.91s/it]  

Processing row 54...


Processing sample rows:  12%|█▏        | 55/448 [10:55<1:10:27, 10.76s/it]

Processing row 55...


Processing sample rows:  12%|█▎        | 56/448 [11:05<1:09:08, 10.58s/it]

Processing row 56...


Processing sample rows:  13%|█▎        | 57/448 [11:11<59:06,  9.07s/it]  

Processing row 57...


Processing sample rows:  13%|█▎        | 58/448 [11:16<51:41,  7.95s/it]

Processing row 58...


Processing sample rows:  13%|█▎        | 59/448 [11:30<1:03:45,  9.83s/it]

Processing row 59...


Processing sample rows:  13%|█▎        | 60/448 [11:38<59:35,  9.22s/it]  

Processing row 60...


Processing sample rows:  14%|█▎        | 61/448 [11:46<55:57,  8.68s/it]

Processing row 61...


Processing sample rows:  14%|█▍        | 62/448 [11:57<1:01:18,  9.53s/it]

Processing row 62...


Processing sample rows:  14%|█▍        | 63/448 [12:02<52:07,  8.12s/it]  

Processing row 63...


Processing sample rows:  14%|█▍        | 64/448 [12:19<1:08:08, 10.65s/it]

Processing row 64...


Processing sample rows:  15%|█▍        | 65/448 [12:25<59:45,  9.36s/it]  

Processing row 65...


Processing sample rows:  15%|█▍        | 66/448 [12:31<53:30,  8.41s/it]

Processing row 66...


Processing sample rows:  15%|█▍        | 67/448 [12:36<46:07,  7.26s/it]

Processing row 67...


Processing sample rows:  15%|█▌        | 68/448 [12:43<45:59,  7.26s/it]

Processing row 68...


Processing sample rows:  15%|█▌        | 69/448 [12:52<50:01,  7.92s/it]

Processing row 69...


Processing sample rows:  16%|█▌        | 70/448 [12:58<45:24,  7.21s/it]

Processing row 70...


Processing sample rows:  16%|█▌        | 71/448 [13:02<39:14,  6.25s/it]

Processing row 71...


Processing sample rows:  16%|█▌        | 72/448 [13:12<45:59,  7.34s/it]

Processing row 72...


Processing sample rows:  16%|█▋        | 73/448 [13:19<45:26,  7.27s/it]

Processing row 73...


Processing sample rows:  17%|█▋        | 74/448 [13:26<45:13,  7.25s/it]

Processing row 74...


Processing sample rows:  17%|█▋        | 75/448 [13:40<56:36,  9.11s/it]

Processing row 75...


Processing sample rows:  17%|█▋        | 76/448 [13:52<1:02:55, 10.15s/it]

Processing row 76...


Processing sample rows:  17%|█▋        | 77/448 [14:05<1:08:24, 11.06s/it]

Processing row 77...


Processing sample rows:  17%|█▋        | 78/448 [14:13<1:01:13,  9.93s/it]

Processing row 78...


Processing sample rows:  18%|█▊        | 79/448 [14:18<52:52,  8.60s/it]  

Processing row 79...


Processing sample rows:  18%|█▊        | 80/448 [14:24<47:48,  7.79s/it]

Processing row 80...


Processing sample rows:  18%|█▊        | 81/448 [14:40<1:02:04, 10.15s/it]

Processing row 81...


Processing sample rows:  18%|█▊        | 82/448 [14:52<1:05:57, 10.81s/it]

Processing row 82...


Processing sample rows:  19%|█▊        | 83/448 [15:02<1:04:08, 10.54s/it]

Processing row 83...


Processing sample rows:  19%|█▉        | 84/448 [15:24<1:24:35, 13.94s/it]

Processing row 84...


Processing sample rows:  19%|█▉        | 85/448 [15:38<1:24:47, 14.02s/it]

Processing row 85...


Processing sample rows:  19%|█▉        | 86/448 [15:56<1:31:02, 15.09s/it]

Processing row 86...


Processing sample rows:  19%|█▉        | 87/448 [16:05<1:20:08, 13.32s/it]

Processing row 87...


Processing sample rows:  20%|█▉        | 88/448 [16:23<1:29:07, 14.86s/it]

Processing row 88...


Processing sample rows:  20%|█▉        | 89/448 [16:32<1:18:41, 13.15s/it]

Processing row 89...


Processing sample rows:  20%|██        | 90/448 [16:39<1:06:35, 11.16s/it]

Processing row 90...


Processing sample rows:  20%|██        | 91/448 [17:01<1:25:56, 14.44s/it]

Processing row 91...


Processing sample rows:  21%|██        | 92/448 [17:11<1:17:06, 13.00s/it]

Processing row 92...


Processing sample rows:  21%|██        | 93/448 [17:18<1:06:54, 11.31s/it]

Processing row 93...


Processing sample rows:  21%|██        | 94/448 [17:32<1:10:53, 12.02s/it]

Processing row 94...


Processing sample rows:  21%|██        | 95/448 [17:34<54:21,  9.24s/it]  

Processing row 95...


Processing sample rows:  21%|██▏       | 96/448 [17:45<56:27,  9.62s/it]

Processing row 96...


Processing sample rows:  22%|██▏       | 97/448 [17:51<49:10,  8.41s/it]

Processing row 97...


Processing sample rows:  22%|██▏       | 98/448 [17:59<48:16,  8.28s/it]

Processing row 98...


Processing sample rows:  22%|██▏       | 99/448 [18:07<47:43,  8.20s/it]

Processing row 99...


Processing sample rows:  22%|██▏       | 100/448 [18:11<41:34,  7.17s/it]

Processing row 100...


Processing sample rows:  23%|██▎       | 101/448 [18:16<36:23,  6.29s/it]

Processing row 101...


Processing sample rows:  23%|██▎       | 102/448 [18:43<1:12:26, 12.56s/it]

Processing row 102...


Processing sample rows:  23%|██▎       | 103/448 [18:49<1:00:57, 10.60s/it]

Processing row 103...


Processing sample rows:  23%|██▎       | 104/448 [18:57<55:51,  9.74s/it]  

Processing row 104...


Processing sample rows:  23%|██▎       | 105/448 [19:09<1:00:43, 10.62s/it]

Processing row 105...


Processing sample rows:  24%|██▎       | 106/448 [19:12<46:25,  8.14s/it]  

Processing row 106...


Processing sample rows:  24%|██▍       | 107/448 [19:24<54:09,  9.53s/it]

Processing row 107...


Processing sample rows:  24%|██▍       | 108/448 [19:35<56:23,  9.95s/it]

Processing row 108...


Processing sample rows:  24%|██▍       | 109/448 [19:43<52:01,  9.21s/it]

Processing row 109...


Processing sample rows:  25%|██▍       | 110/448 [19:57<59:34, 10.58s/it]

Processing row 110...


Processing sample rows:  25%|██▍       | 111/448 [20:09<1:02:41, 11.16s/it]

Processing row 111...


Processing sample rows:  25%|██▌       | 112/448 [20:20<1:02:41, 11.19s/it]

Processing row 112...


Processing sample rows:  25%|██▌       | 113/448 [20:27<54:07,  9.69s/it]  

Processing row 113...


Processing sample rows:  25%|██▌       | 114/448 [20:33<48:51,  8.78s/it]

Processing row 114...


Processing sample rows:  26%|██▌       | 115/448 [20:46<56:16, 10.14s/it]

Processing row 115...


Processing sample rows:  26%|██▌       | 116/448 [20:51<47:24,  8.57s/it]

Processing row 116...


Processing sample rows:  26%|██▌       | 117/448 [20:57<43:04,  7.81s/it]

Processing row 117...


Processing sample rows:  26%|██▋       | 118/448 [21:07<46:14,  8.41s/it]

Processing row 118...


Processing sample rows:  27%|██▋       | 119/448 [21:10<37:19,  6.81s/it]

Processing row 119...


Processing sample rows:  27%|██▋       | 120/448 [21:17<36:29,  6.68s/it]

Processing row 120...


Processing sample rows:  27%|██▋       | 121/448 [21:32<50:00,  9.18s/it]

Processing row 121...


Processing sample rows:  27%|██▋       | 122/448 [21:39<46:30,  8.56s/it]

Processing row 122...


Processing sample rows:  27%|██▋       | 123/448 [21:50<49:55,  9.22s/it]

Processing row 123...


Processing sample rows:  28%|██▊       | 124/448 [21:54<42:32,  7.88s/it]

Processing row 124...


Processing sample rows:  28%|██▊       | 125/448 [22:15<1:03:21, 11.77s/it]

Processing row 125...


Processing sample rows:  28%|██▊       | 126/448 [22:30<1:07:27, 12.57s/it]

Processing row 126...


Processing sample rows:  28%|██▊       | 127/448 [22:37<59:41, 11.16s/it]  

Processing row 127...


Processing sample rows:  29%|██▊       | 128/448 [22:46<55:49, 10.47s/it]

Processing row 128...


Processing sample rows:  29%|██▉       | 129/448 [22:52<48:00,  9.03s/it]

Processing row 129...


Processing sample rows:  29%|██▉       | 130/448 [22:55<38:58,  7.35s/it]

Processing row 130...


Processing sample rows:  29%|██▉       | 131/448 [22:59<32:32,  6.16s/it]

Processing row 131...


Processing sample rows:  29%|██▉       | 132/448 [23:08<36:43,  6.97s/it]

Processing row 132...


Processing sample rows:  30%|██▉       | 133/448 [23:17<40:10,  7.65s/it]

Processing row 133...


Processing sample rows:  30%|██▉       | 134/448 [23:38<1:01:41, 11.79s/it]

Processing row 134...


Processing sample rows:  30%|███       | 135/448 [23:55<1:09:04, 13.24s/it]

Processing row 135...


Processing sample rows:  30%|███       | 136/448 [24:02<59:51, 11.51s/it]  

Processing row 136...


Processing sample rows:  31%|███       | 137/448 [24:25<1:16:54, 14.84s/it]

Processing row 137...


Processing sample rows:  31%|███       | 138/448 [24:48<1:28:32, 17.14s/it]

Processing row 138...


Processing sample rows:  31%|███       | 139/448 [24:56<1:15:26, 14.65s/it]

Processing row 139...


Processing sample rows:  31%|███▏      | 140/448 [25:05<1:05:16, 12.71s/it]

Processing row 140...


Processing sample rows:  31%|███▏      | 141/448 [25:20<1:08:35, 13.41s/it]

Processing row 141...


Processing sample rows:  32%|███▏      | 142/448 [25:22<51:59, 10.19s/it]  

Processing row 142...


Processing sample rows:  32%|███▏      | 143/448 [25:26<41:59,  8.26s/it]

Processing row 143...


Processing sample rows:  32%|███▏      | 144/448 [25:40<50:26,  9.96s/it]

Processing row 144...


Processing sample rows:  32%|███▏      | 145/448 [25:50<49:57,  9.89s/it]

Processing row 145...


Processing sample rows:  33%|███▎      | 146/448 [26:07<1:01:20, 12.19s/it]

Processing row 146...


Processing sample rows:  33%|███▎      | 147/448 [26:10<46:38,  9.30s/it]  

Processing row 147...


Processing sample rows:  33%|███▎      | 148/448 [26:19<46:14,  9.25s/it]

Processing row 148...


Processing sample rows:  33%|███▎      | 149/448 [26:24<40:04,  8.04s/it]

Processing row 149...


Processing sample rows:  33%|███▎      | 150/448 [26:41<52:45, 10.62s/it]

Processing row 150...


Processing sample rows:  34%|███▎      | 151/448 [26:48<47:48,  9.66s/it]

Processing row 151...


Processing sample rows:  34%|███▍      | 152/448 [26:56<44:25,  9.00s/it]

Processing row 152...


Processing sample rows:  34%|███▍      | 153/448 [27:10<52:11, 10.62s/it]

Processing row 153...


Processing sample rows:  34%|███▍      | 154/448 [27:23<55:10, 11.26s/it]

Processing row 154...


Processing sample rows:  35%|███▍      | 155/448 [27:33<52:54, 10.83s/it]

Processing row 155...


Processing sample rows:  35%|███▍      | 156/448 [27:40<47:08,  9.69s/it]

Processing row 156...


Processing sample rows:  35%|███▌      | 157/448 [27:44<39:39,  8.18s/it]

Processing row 157...


Processing sample rows:  35%|███▌      | 158/448 [27:48<33:18,  6.89s/it]

Processing row 158...


Processing sample rows:  35%|███▌      | 159/448 [28:01<41:42,  8.66s/it]

Processing row 159...


Processing sample rows:  36%|███▌      | 160/448 [28:25<1:03:45, 13.28s/it]

Processing row 160...


Processing sample rows:  36%|███▌      | 161/448 [28:32<54:35, 11.41s/it]  

Processing row 161...


Processing sample rows:  36%|███▌      | 162/448 [29:01<1:19:30, 16.68s/it]

Processing row 162...


Processing sample rows:  36%|███▋      | 163/448 [29:14<1:13:46, 15.53s/it]

Processing row 163...


Processing sample rows:  37%|███▋      | 164/448 [29:41<1:29:24, 18.89s/it]

Processing row 164...


Processing sample rows:  37%|███▋      | 165/448 [29:45<1:08:11, 14.46s/it]

Processing row 165...


Processing sample rows:  37%|███▋      | 166/448 [29:50<55:20, 11.78s/it]  

Processing row 166...


Processing sample rows:  37%|███▋      | 167/448 [29:55<45:34,  9.73s/it]

Processing row 167...


Processing sample rows:  38%|███▊      | 168/448 [30:06<47:11, 10.11s/it]

Processing row 168...


Processing sample rows:  38%|███▊      | 169/448 [30:27<1:01:56, 13.32s/it]

Processing row 169...


Processing sample rows:  38%|███▊      | 170/448 [30:33<51:56, 11.21s/it]  

Processing row 170...


Processing sample rows:  38%|███▊      | 171/448 [30:40<45:11,  9.79s/it]

Processing row 171...


Processing sample rows:  38%|███▊      | 172/448 [30:54<50:56, 11.08s/it]

Processing row 172...


Processing sample rows:  39%|███▊      | 173/448 [31:09<55:37, 12.14s/it]

Processing row 173...


Processing sample rows:  39%|███▉      | 174/448 [31:15<47:44, 10.45s/it]

Processing row 174...


Processing sample rows:  39%|███▉      | 175/448 [31:22<43:23,  9.54s/it]

Processing row 175...


Processing sample rows:  39%|███▉      | 176/448 [31:24<32:33,  7.18s/it]

Processing row 176...


Processing sample rows:  40%|███▉      | 177/448 [31:27<26:04,  5.77s/it]

Processing row 177...


Processing sample rows:  40%|███▉      | 178/448 [31:38<34:00,  7.56s/it]

Processing row 178...


Processing sample rows:  40%|███▉      | 179/448 [31:55<46:40, 10.41s/it]

Processing row 179...


Processing sample rows:  40%|████      | 180/448 [32:16<1:00:17, 13.50s/it]

Processing row 180...


Processing sample rows:  40%|████      | 181/448 [32:23<51:42, 11.62s/it]  

Processing row 181...


Processing sample rows:  41%|████      | 182/448 [32:31<46:43, 10.54s/it]

Processing row 182...


Processing sample rows:  41%|████      | 183/448 [32:40<43:34,  9.87s/it]

Processing row 183...


Processing sample rows:  41%|████      | 184/448 [32:53<47:38, 10.83s/it]

Processing row 184...


Processing sample rows:  41%|████▏     | 185/448 [33:06<50:22, 11.49s/it]

Processing row 185...


Processing sample rows:  42%|████▏     | 186/448 [33:18<51:16, 11.74s/it]

Processing row 186...


Processing sample rows:  42%|████▏     | 187/448 [33:26<45:51, 10.54s/it]

Processing row 187...


Processing sample rows:  42%|████▏     | 188/448 [33:42<52:38, 12.15s/it]

Processing row 188...


Processing sample rows:  42%|████▏     | 189/448 [33:55<53:15, 12.34s/it]

Processing row 189...


Processing sample rows:  42%|████▏     | 190/448 [34:30<1:22:47, 19.25s/it]

Processing row 190...


Processing sample rows:  43%|████▎     | 191/448 [34:35<1:04:29, 15.06s/it]

Processing row 191...


Processing sample rows:  43%|████▎     | 192/448 [37:36<4:35:52, 64.66s/it]

Processing row 192...


Processing sample rows:  43%|████▎     | 193/448 [37:41<3:19:04, 46.84s/it]

Processing row 193...


Processing sample rows:  43%|████▎     | 194/448 [37:53<2:34:46, 36.56s/it]

Processing row 194...


Processing sample rows:  44%|████▎     | 195/448 [38:00<1:55:43, 27.44s/it]

Processing row 195...


Processing sample rows:  44%|████▍     | 196/448 [38:16<1:41:16, 24.11s/it]

Processing row 196...


Processing sample rows:  44%|████▍     | 197/448 [38:23<1:19:00, 18.89s/it]

Processing row 197...


Processing sample rows:  44%|████▍     | 198/448 [38:37<1:12:38, 17.43s/it]

Processing row 198...


Processing sample rows:  44%|████▍     | 199/448 [38:40<54:31, 13.14s/it]  

Processing row 199...


Processing sample rows:  45%|████▍     | 200/448 [39:12<1:18:04, 18.89s/it]

Processing row 200...


Processing sample rows:  45%|████▍     | 201/448 [39:22<1:07:08, 16.31s/it]

Processing row 201...


Processing sample rows:  45%|████▌     | 202/448 [39:33<59:15, 14.45s/it]  

Processing row 202...


Processing sample rows:  45%|████▌     | 203/448 [39:38<47:41, 11.68s/it]

Processing row 203...


Processing sample rows:  46%|████▌     | 204/448 [40:01<1:02:09, 15.29s/it]

Processing row 204...


Processing sample rows:  46%|████▌     | 205/448 [40:06<48:22, 11.94s/it]  

Processing row 205...


Processing sample rows:  46%|████▌     | 206/448 [40:30<1:03:29, 15.74s/it]

Processing row 206...


Processing sample rows:  46%|████▌     | 207/448 [40:48<1:06:18, 16.51s/it]

Processing row 207...


Processing sample rows:  46%|████▋     | 208/448 [40:50<48:24, 12.10s/it]  

Processing row 208...


Processing sample rows:  47%|████▋     | 209/448 [40:59<44:37, 11.20s/it]

Processing row 209...


Processing sample rows:  47%|████▋     | 210/448 [41:06<38:56,  9.82s/it]

Processing row 210...


Processing sample rows:  47%|████▋     | 211/448 [41:15<37:23,  9.47s/it]

Processing row 211...


Processing sample rows:  47%|████▋     | 212/448 [41:28<42:21, 10.77s/it]

Processing row 212...


Processing sample rows:  48%|████▊     | 213/448 [41:34<36:37,  9.35s/it]

Processing row 213...


Processing sample rows:  48%|████▊     | 214/448 [44:35<3:56:28, 60.64s/it]

Processing row 214...


Processing sample rows:  48%|████▊     | 215/448 [44:42<2:53:17, 44.62s/it]

Processing row 215...


Processing sample rows:  48%|████▊     | 216/448 [44:49<2:09:15, 33.43s/it]

Processing row 216...


Processing sample rows:  48%|████▊     | 217/448 [44:58<1:40:26, 26.09s/it]

Processing row 217...


Processing sample rows:  49%|████▊     | 218/448 [45:10<1:23:56, 21.90s/it]

Processing row 218...


Processing sample rows:  49%|████▉     | 219/448 [45:19<1:07:58, 17.81s/it]

Processing row 219...


Processing sample rows:  49%|████▉     | 220/448 [45:44<1:15:44, 19.93s/it]

Processing row 220...


Processing sample rows:  49%|████▉     | 221/448 [45:51<1:00:47, 16.07s/it]

Processing row 221...


Processing sample rows:  50%|████▉     | 222/448 [45:59<51:38, 13.71s/it]  

Processing row 222...


Processing sample rows:  50%|████▉     | 223/448 [46:23<1:02:52, 16.77s/it]

Processing row 223...


Processing sample rows:  50%|█████     | 224/448 [46:32<54:12, 14.52s/it]  

Processing row 224...


Processing sample rows:  50%|█████     | 225/448 [46:38<44:31, 11.98s/it]

Processing row 225...


Processing sample rows:  50%|█████     | 226/448 [46:54<48:42, 13.17s/it]

Processing row 226...


Processing sample rows:  51%|█████     | 227/448 [47:03<43:24, 11.79s/it]

Processing row 227...


Processing sample rows:  51%|█████     | 228/448 [47:11<39:05, 10.66s/it]

Processing row 228...


Processing sample rows:  51%|█████     | 229/448 [47:32<50:42, 13.89s/it]

Processing row 229...


Processing sample rows:  51%|█████▏    | 230/448 [47:41<45:04, 12.41s/it]

Processing row 230...


Processing sample rows:  52%|█████▏    | 231/448 [47:56<48:05, 13.30s/it]

Processing row 231...


Processing sample rows:  52%|█████▏    | 232/448 [48:07<45:09, 12.54s/it]

Processing row 232...


Processing sample rows:  52%|█████▏    | 233/448 [48:15<39:49, 11.11s/it]

Processing row 233...


Processing sample rows:  52%|█████▏    | 234/448 [48:21<34:17,  9.61s/it]

Processing row 234...


Processing sample rows:  52%|█████▏    | 235/448 [48:35<38:57, 10.98s/it]

Processing row 235...


Processing sample rows:  53%|█████▎    | 236/448 [48:49<42:14, 11.96s/it]

Processing row 236...


Processing sample rows:  53%|█████▎    | 237/448 [48:57<37:26, 10.65s/it]

Processing row 237...


Processing sample rows:  53%|█████▎    | 238/448 [49:05<34:35,  9.88s/it]

Processing row 238...


Processing sample rows:  53%|█████▎    | 239/448 [49:12<30:46,  8.84s/it]

Processing row 239...


Processing sample rows:  54%|█████▎    | 240/448 [49:19<29:12,  8.43s/it]

Processing row 240...


Processing sample rows:  54%|█████▍    | 241/448 [49:27<28:53,  8.37s/it]

Processing row 241...


Processing sample rows:  54%|█████▍    | 242/448 [49:37<30:05,  8.77s/it]

Processing row 242...


Processing sample rows:  54%|█████▍    | 243/448 [49:47<30:50,  9.03s/it]

Processing row 243...


Processing sample rows:  54%|█████▍    | 244/448 [49:53<28:23,  8.35s/it]

Processing row 244...


Processing sample rows:  55%|█████▍    | 245/448 [50:01<27:33,  8.14s/it]

Processing row 245...


Processing sample rows:  55%|█████▍    | 246/448 [50:09<26:58,  8.01s/it]

Processing row 246...


Processing sample rows:  55%|█████▌    | 247/448 [50:17<27:27,  8.20s/it]

Processing row 247...


Processing sample rows:  55%|█████▌    | 248/448 [50:35<37:09, 11.15s/it]

Processing row 248...


Processing sample rows:  56%|█████▌    | 249/448 [50:53<43:34, 13.14s/it]

Processing row 249...


Processing sample rows:  56%|█████▌    | 250/448 [50:58<34:47, 10.54s/it]

Processing row 250...


Processing sample rows:  56%|█████▌    | 251/448 [51:01<27:37,  8.41s/it]

Processing row 251...


Processing sample rows:  56%|█████▋    | 252/448 [51:08<26:13,  8.03s/it]

Processing row 252...


Processing sample rows:  56%|█████▋    | 253/448 [51:16<26:04,  8.02s/it]

Processing row 253...


Processing sample rows:  57%|█████▋    | 254/448 [51:27<28:25,  8.79s/it]

Processing row 254...


Processing sample rows:  57%|█████▋    | 255/448 [51:30<23:13,  7.22s/it]

Processing row 255...


Processing sample rows:  57%|█████▋    | 256/448 [51:35<20:51,  6.52s/it]

Processing row 256...


Processing sample rows:  57%|█████▋    | 257/448 [51:42<20:49,  6.54s/it]

Processing row 257...


Processing sample rows:  58%|█████▊    | 258/448 [51:52<24:14,  7.65s/it]

Processing row 258...


Processing sample rows:  58%|█████▊    | 259/448 [52:14<37:46, 11.99s/it]

Processing row 259...


Processing sample rows:  58%|█████▊    | 260/448 [52:34<44:36, 14.24s/it]

Processing row 260...


Processing sample rows:  58%|█████▊    | 261/448 [52:42<38:46, 12.44s/it]

Processing row 261...


Processing sample rows:  58%|█████▊    | 262/448 [52:51<35:20, 11.40s/it]

Processing row 262...


Processing sample rows:  59%|█████▊    | 263/448 [53:04<37:03, 12.02s/it]

Processing row 263...


Processing sample rows:  59%|█████▉    | 264/448 [53:18<38:18, 12.49s/it]

Processing row 264...


Processing sample rows:  59%|█████▉    | 265/448 [53:25<33:04, 10.85s/it]

Processing row 265...


Processing sample rows:  59%|█████▉    | 266/448 [53:31<28:43,  9.47s/it]

Processing row 266...


Processing sample rows:  60%|█████▉    | 267/448 [53:43<30:43, 10.19s/it]

Processing row 267...


Processing sample rows:  60%|█████▉    | 268/448 [53:50<28:00,  9.34s/it]

Processing row 268...


Processing sample rows:  60%|██████    | 269/448 [53:59<27:10,  9.11s/it]

Processing row 269...


Processing sample rows:  60%|██████    | 270/448 [54:02<21:45,  7.34s/it]

Processing row 270...


Processing sample rows:  60%|██████    | 271/448 [54:15<26:10,  8.87s/it]

Processing row 271...


Processing sample rows:  61%|██████    | 272/448 [54:18<21:22,  7.29s/it]

Processing row 272...


Processing sample rows:  61%|██████    | 273/448 [54:26<21:33,  7.39s/it]

Processing row 273...


Processing sample rows:  61%|██████    | 274/448 [54:33<21:34,  7.44s/it]

Processing row 274...


Processing sample rows:  61%|██████▏   | 275/448 [54:44<23:56,  8.30s/it]

Processing row 275...


Processing sample rows:  62%|██████▏   | 276/448 [54:56<27:03,  9.44s/it]

Processing row 276...


Processing sample rows:  62%|██████▏   | 277/448 [55:01<22:58,  8.06s/it]

Processing row 277...


Processing sample rows:  62%|██████▏   | 278/448 [55:13<26:41,  9.42s/it]

Processing row 278...


Processing sample rows:  62%|██████▏   | 279/448 [55:20<23:54,  8.49s/it]

Processing row 279...


Processing sample rows:  62%|██████▎   | 280/448 [55:30<25:35,  9.14s/it]

Processing row 280...


Processing sample rows:  63%|██████▎   | 281/448 [55:39<24:54,  8.95s/it]

Processing row 281...


Processing sample rows:  63%|██████▎   | 282/448 [55:44<21:22,  7.73s/it]

Processing row 282...


Processing sample rows:  63%|██████▎   | 283/448 [55:47<17:44,  6.45s/it]

Processing row 283...


Processing sample rows:  63%|██████▎   | 284/448 [55:59<22:22,  8.19s/it]

Processing row 284...


Processing sample rows:  64%|██████▎   | 285/448 [56:02<17:34,  6.47s/it]

Processing row 285...


Processing sample rows:  64%|██████▍   | 286/448 [56:14<21:44,  8.05s/it]

Processing row 286...


Processing sample rows:  64%|██████▍   | 287/448 [56:17<17:59,  6.71s/it]

Processing row 287...


Processing sample rows:  64%|██████▍   | 288/448 [56:30<23:06,  8.66s/it]

Processing row 288...


Processing sample rows:  65%|██████▍   | 289/448 [56:51<32:28, 12.26s/it]

Processing row 289...


Processing sample rows:  65%|██████▍   | 290/448 [57:05<33:44, 12.81s/it]

Processing row 290...


Processing sample rows:  65%|██████▍   | 291/448 [57:16<32:21, 12.36s/it]

Processing row 291...


Processing sample rows:  65%|██████▌   | 292/448 [57:32<34:40, 13.34s/it]

Processing row 292...


Processing sample rows:  65%|██████▌   | 293/448 [57:40<30:15, 11.71s/it]

Processing row 293...


Processing sample rows:  66%|██████▌   | 294/448 [58:04<39:51, 15.53s/it]

Processing row 294...


Processing sample rows:  66%|██████▌   | 295/448 [58:12<33:30, 13.14s/it]

Processing row 295...


Processing sample rows:  66%|██████▌   | 296/448 [58:17<26:57, 10.64s/it]

Processing row 296...


Processing sample rows:  66%|██████▋   | 297/448 [58:26<25:50, 10.27s/it]

Processing row 297...


Processing sample rows:  67%|██████▋   | 298/448 [58:35<24:46,  9.91s/it]

Processing row 298...


Processing sample rows:  67%|██████▋   | 299/448 [58:44<23:25,  9.43s/it]

Processing row 299...


Processing sample rows:  67%|██████▋   | 300/448 [58:49<20:20,  8.24s/it]

Processing row 300...


Processing sample rows:  67%|██████▋   | 301/448 [58:55<18:41,  7.63s/it]

Processing row 301...


Processing sample rows:  67%|██████▋   | 302/448 [59:04<19:13,  7.90s/it]

Processing row 302...


Processing sample rows:  68%|██████▊   | 303/448 [59:08<16:19,  6.75s/it]

Processing row 303...


Processing sample rows:  68%|██████▊   | 304/448 [59:18<18:45,  7.82s/it]

Processing row 304...


Processing sample rows:  68%|██████▊   | 305/448 [59:26<18:52,  7.92s/it]

Processing row 305...


Processing sample rows:  68%|██████▊   | 306/448 [59:49<28:54, 12.22s/it]

Processing row 306...


Processing sample rows:  69%|██████▊   | 307/448 [59:56<25:20, 10.78s/it]

Processing row 307...


Processing sample rows:  69%|██████▉   | 308/448 [1:00:22<35:48, 15.35s/it]

Processing row 308...


Processing sample rows:  69%|██████▉   | 309/448 [1:00:33<32:25, 14.00s/it]

Processing row 309...


Processing sample rows:  69%|██████▉   | 310/448 [1:00:44<30:18, 13.18s/it]

Processing row 310...


Processing sample rows:  69%|██████▉   | 311/448 [1:00:56<29:16, 12.82s/it]

Processing row 311...


Processing sample rows:  70%|██████▉   | 312/448 [1:01:01<23:48, 10.51s/it]

Processing row 312...


Processing sample rows:  70%|██████▉   | 313/448 [1:01:06<20:06,  8.94s/it]

Processing row 313...


Processing sample rows:  70%|███████   | 314/448 [1:01:11<17:04,  7.65s/it]

Processing row 314...


Processing sample rows:  70%|███████   | 315/448 [1:01:27<22:40, 10.23s/it]

Processing row 315...


Processing sample rows:  71%|███████   | 316/448 [1:02:02<38:53, 17.68s/it]

Processing row 316...


Processing sample rows:  71%|███████   | 317/448 [1:02:14<34:37, 15.86s/it]

Processing row 317...


Processing sample rows:  71%|███████   | 318/448 [1:02:21<28:34, 13.19s/it]

Processing row 318...


Processing sample rows:  71%|███████   | 319/448 [1:05:21<2:16:11, 63.34s/it]

Processing row 319...


Processing sample rows:  71%|███████▏  | 320/448 [1:05:29<1:39:48, 46.78s/it]

Processing row 320...


Processing sample rows:  72%|███████▏  | 321/448 [1:05:35<1:12:51, 34.42s/it]

Processing row 321...


Processing sample rows:  72%|███████▏  | 322/448 [1:05:41<54:37, 26.01s/it]  

Processing row 322...


Processing sample rows:  72%|███████▏  | 323/448 [1:05:53<44:52, 21.54s/it]

Processing row 323...


Processing sample rows:  72%|███████▏  | 324/448 [1:05:58<34:35, 16.74s/it]

Processing row 324...


Processing sample rows:  73%|███████▎  | 325/448 [1:06:21<38:15, 18.66s/it]

Processing row 325...


Processing sample rows:  73%|███████▎  | 326/448 [1:06:32<33:25, 16.44s/it]

Processing row 326...


Processing sample rows:  73%|███████▎  | 327/448 [1:06:41<28:16, 14.02s/it]

Processing row 327...


Processing sample rows:  73%|███████▎  | 328/448 [1:06:45<22:16, 11.14s/it]

Processing row 328...


Processing sample rows:  73%|███████▎  | 329/448 [1:06:53<20:00, 10.09s/it]

Processing row 329...


Processing sample rows:  74%|███████▎  | 330/448 [1:07:05<20:49, 10.59s/it]

Processing row 330...


Processing sample rows:  74%|███████▍  | 331/448 [1:07:07<16:02,  8.23s/it]

Processing row 331...


Processing sample rows:  74%|███████▍  | 332/448 [1:07:22<19:28, 10.07s/it]

Processing row 332...


Processing sample rows:  74%|███████▍  | 333/448 [1:07:31<18:58,  9.90s/it]

Processing row 333...


Processing sample rows:  75%|███████▍  | 334/448 [1:07:42<19:07, 10.07s/it]

Processing row 334...


Processing sample rows:  75%|███████▍  | 335/448 [1:07:48<17:00,  9.03s/it]

Processing row 335...


Processing sample rows:  75%|███████▌  | 336/448 [1:07:57<16:32,  8.86s/it]

Processing row 336...


Processing sample rows:  75%|███████▌  | 337/448 [1:08:03<14:46,  7.99s/it]

Processing row 337...


Processing sample rows:  75%|███████▌  | 338/448 [1:08:22<20:49, 11.36s/it]

Processing row 338...


Processing sample rows:  76%|███████▌  | 339/448 [1:08:34<21:11, 11.66s/it]

Processing row 339...


Processing sample rows:  76%|███████▌  | 340/448 [1:08:44<19:57, 11.09s/it]

Processing row 340...


Processing sample rows:  76%|███████▌  | 341/448 [1:08:50<17:12,  9.65s/it]

Processing row 341...


Processing sample rows:  76%|███████▋  | 342/448 [1:08:59<16:17,  9.22s/it]

Processing row 342...


Processing sample rows:  77%|███████▋  | 343/448 [1:09:13<18:36, 10.64s/it]

Processing row 343...


Processing sample rows:  77%|███████▋  | 344/448 [1:09:22<17:34, 10.14s/it]

Processing row 344...


Processing sample rows:  77%|███████▋  | 345/448 [1:09:24<13:23,  7.80s/it]

Processing row 345...


Processing sample rows:  77%|███████▋  | 346/448 [1:09:31<12:46,  7.51s/it]

Processing row 346...


Processing sample rows:  77%|███████▋  | 347/448 [1:09:39<13:06,  7.79s/it]

Processing row 347...


Processing sample rows:  78%|███████▊  | 348/448 [1:09:45<12:04,  7.24s/it]

Processing row 348...


Processing sample rows:  78%|███████▊  | 349/448 [1:09:54<12:53,  7.81s/it]

Processing row 349...


Processing sample rows:  78%|███████▊  | 350/448 [1:10:07<15:07,  9.26s/it]

Processing row 350...


Processing sample rows:  78%|███████▊  | 351/448 [1:10:12<13:00,  8.04s/it]

Processing row 351...


Processing sample rows:  79%|███████▊  | 352/448 [1:10:26<15:26,  9.65s/it]

Processing row 352...


Processing sample rows:  79%|███████▉  | 353/448 [1:10:32<13:47,  8.72s/it]

Processing row 353...


Processing sample rows:  79%|███████▉  | 354/448 [1:10:40<13:22,  8.54s/it]

Processing row 354...


Processing sample rows:  79%|███████▉  | 355/448 [1:10:51<14:19,  9.24s/it]

Processing row 355...


Processing sample rows:  79%|███████▉  | 356/448 [1:11:11<19:19, 12.60s/it]

Processing row 356...


Processing sample rows:  80%|███████▉  | 357/448 [1:11:19<16:51, 11.11s/it]

Processing row 357...


Processing sample rows:  80%|███████▉  | 358/448 [1:11:22<13:02,  8.70s/it]

Processing row 358...


Processing sample rows:  80%|████████  | 359/448 [1:11:26<10:46,  7.27s/it]

Processing row 359...


Processing sample rows:  80%|████████  | 360/448 [1:14:26<1:26:46, 59.16s/it]

Processing row 360...


Processing sample rows:  81%|████████  | 361/448 [1:14:32<1:02:23, 43.02s/it]

Processing row 361...


Processing sample rows:  81%|████████  | 362/448 [1:14:36<44:56, 31.35s/it]  

Processing row 362...


Processing sample rows:  81%|████████  | 363/448 [1:14:44<34:37, 24.44s/it]

Processing row 363...


Processing sample rows:  81%|████████▏ | 364/448 [1:14:50<26:29, 18.92s/it]

Processing row 364...


Processing sample rows:  81%|████████▏ | 365/448 [1:15:00<22:10, 16.04s/it]

Processing row 365...


Processing sample rows:  82%|████████▏ | 366/448 [1:15:13<20:41, 15.14s/it]

Processing row 366...


Processing sample rows:  82%|████████▏ | 367/448 [1:15:17<15:59, 11.85s/it]

Processing row 367...


Processing sample rows:  82%|████████▏ | 368/448 [1:15:27<14:58, 11.23s/it]

Processing row 368...


Processing sample rows:  82%|████████▏ | 369/448 [1:15:39<15:26, 11.73s/it]

Processing row 369...


Processing sample rows:  83%|████████▎ | 370/448 [1:15:43<11:58,  9.21s/it]

Processing row 370...


Processing sample rows:  83%|████████▎ | 371/448 [1:15:46<09:34,  7.46s/it]

Processing row 371...


Processing sample rows:  83%|████████▎ | 372/448 [1:15:53<09:15,  7.31s/it]

Processing row 372...


Processing sample rows:  83%|████████▎ | 373/448 [1:15:58<08:10,  6.54s/it]

Processing row 373...


Processing sample rows:  83%|████████▎ | 374/448 [1:16:06<08:34,  6.95s/it]

Processing row 374...


Processing sample rows:  84%|████████▎ | 375/448 [1:16:15<09:13,  7.58s/it]

Processing row 375...


Processing sample rows:  84%|████████▍ | 376/448 [1:16:23<09:26,  7.86s/it]

Processing row 376...


Processing sample rows:  84%|████████▍ | 377/448 [1:16:36<11:02,  9.33s/it]

Processing row 377...


Processing sample rows:  84%|████████▍ | 378/448 [1:16:42<09:43,  8.33s/it]

Processing row 378...


Processing sample rows:  85%|████████▍ | 379/448 [1:16:50<09:22,  8.15s/it]

Processing row 379...


Processing sample rows:  85%|████████▍ | 380/448 [1:16:57<09:01,  7.96s/it]

Processing row 380...


Processing sample rows:  85%|████████▌ | 381/448 [1:17:05<08:49,  7.90s/it]

Processing row 381...


Processing sample rows:  85%|████████▌ | 382/448 [1:17:07<06:51,  6.23s/it]

Processing row 382...


Processing sample rows:  85%|████████▌ | 383/448 [1:17:31<12:23, 11.43s/it]

Processing row 383...


Processing sample rows:  86%|████████▌ | 384/448 [1:17:34<09:39,  9.06s/it]

Processing row 384...


Processing sample rows:  86%|████████▌ | 385/448 [1:17:46<10:22,  9.88s/it]

Processing row 385...


Processing sample rows:  86%|████████▌ | 386/448 [1:17:49<07:59,  7.73s/it]

Processing row 386...


Processing sample rows:  86%|████████▋ | 387/448 [1:17:53<06:40,  6.57s/it]

Processing row 387...


Processing sample rows:  87%|████████▋ | 388/448 [1:18:03<07:33,  7.56s/it]

Processing row 388...


Processing sample rows:  87%|████████▋ | 389/448 [1:18:09<06:55,  7.04s/it]

Processing row 389...


Processing sample rows:  87%|████████▋ | 390/448 [1:18:16<06:51,  7.10s/it]

Processing row 390...


Processing sample rows:  87%|████████▋ | 391/448 [1:18:20<05:52,  6.18s/it]

Processing row 391...


Processing sample rows:  88%|████████▊ | 392/448 [1:18:28<06:18,  6.77s/it]

Processing row 392...


Processing sample rows:  88%|████████▊ | 393/448 [1:18:33<05:42,  6.23s/it]

Processing row 393...


Processing sample rows:  88%|████████▊ | 394/448 [1:18:50<08:31,  9.47s/it]

Processing row 394...


Processing sample rows:  88%|████████▊ | 395/448 [1:19:01<08:46,  9.93s/it]

Processing row 395...


Processing sample rows:  88%|████████▊ | 396/448 [1:19:07<07:41,  8.87s/it]

Processing row 396...


Processing sample rows:  89%|████████▊ | 397/448 [1:19:12<06:20,  7.47s/it]

Processing row 397...


Processing sample rows:  89%|████████▉ | 398/448 [1:19:18<05:52,  7.05s/it]

Processing row 398...


Processing sample rows:  89%|████████▉ | 399/448 [1:19:23<05:19,  6.53s/it]

Processing row 399...


Processing sample rows:  89%|████████▉ | 400/448 [1:19:46<09:11, 11.49s/it]

Processing row 400...


Processing sample rows:  90%|████████▉ | 401/448 [1:19:53<08:00, 10.23s/it]

Processing row 401...


Processing sample rows:  90%|████████▉ | 402/448 [1:20:12<09:48, 12.78s/it]

Processing row 402...


Processing sample rows:  90%|████████▉ | 403/448 [1:20:16<07:29,  9.99s/it]

Processing row 403...


Processing sample rows:  90%|█████████ | 404/448 [1:20:31<08:31, 11.63s/it]

Processing row 404...


Processing sample rows:  90%|█████████ | 405/448 [1:20:39<07:35, 10.59s/it]

Processing row 405...


Processing sample rows:  91%|█████████ | 406/448 [1:20:51<07:43, 11.03s/it]

Processing row 406...


Processing sample rows:  91%|█████████ | 407/448 [1:20:57<06:33,  9.59s/it]

Processing row 407...


Processing sample rows:  91%|█████████ | 408/448 [1:21:13<07:32, 11.32s/it]

Processing row 408...


Processing sample rows:  91%|█████████▏| 409/448 [1:21:29<08:23, 12.90s/it]

Processing row 409...


Processing sample rows:  92%|█████████▏| 410/448 [1:21:38<07:16, 11.48s/it]

Processing row 410...


Processing sample rows:  92%|█████████▏| 411/448 [1:21:47<06:39, 10.79s/it]

Processing row 411...


Processing sample rows:  92%|█████████▏| 412/448 [1:22:03<07:27, 12.42s/it]

Processing row 412...


Processing sample rows:  92%|█████████▏| 413/448 [1:22:12<06:41, 11.48s/it]

Processing row 413...


Processing sample rows:  92%|█████████▏| 414/448 [1:22:22<06:16, 11.08s/it]

Processing row 414...


Processing sample rows:  93%|█████████▎| 415/448 [1:22:29<05:21,  9.73s/it]

Processing row 415...


Processing sample rows:  93%|█████████▎| 416/448 [1:22:35<04:38,  8.72s/it]

Processing row 416...


Processing sample rows:  93%|█████████▎| 417/448 [1:22:43<04:25,  8.56s/it]

Processing row 417...


Processing sample rows:  93%|█████████▎| 418/448 [1:23:08<06:41, 13.37s/it]

Processing row 418...


Processing sample rows:  94%|█████████▎| 419/448 [1:26:08<30:40, 63.47s/it]

Processing row 419...


Processing sample rows:  94%|█████████▍| 420/448 [1:26:15<21:39, 46.42s/it]

Processing row 420...


Processing sample rows:  94%|█████████▍| 421/448 [1:26:22<15:35, 34.66s/it]

Processing row 421...


Processing sample rows:  94%|█████████▍| 422/448 [1:26:34<12:05, 27.89s/it]

Processing row 422...


Processing sample rows:  94%|█████████▍| 423/448 [1:26:40<08:50, 21.20s/it]

Processing row 423...


Processing sample rows:  95%|█████████▍| 424/448 [1:27:04<08:51, 22.15s/it]

Processing row 424...


Processing sample rows:  95%|█████████▍| 425/448 [1:27:12<06:50, 17.85s/it]

Processing row 425...


Processing sample rows:  95%|█████████▌| 426/448 [1:27:20<05:24, 14.75s/it]

Processing row 426...


Processing sample rows:  95%|█████████▌| 427/448 [1:27:26<04:16, 12.22s/it]

Processing row 427...


Processing sample rows:  96%|█████████▌| 428/448 [1:27:42<04:29, 13.48s/it]

Processing row 428...


Processing sample rows:  96%|█████████▌| 429/448 [1:30:43<20:07, 63.57s/it]

Processing row 429...


Processing sample rows:  96%|█████████▌| 430/448 [1:30:46<13:36, 45.39s/it]

Processing row 430...


Processing sample rows:  96%|█████████▌| 431/448 [1:30:53<09:37, 33.95s/it]

Processing row 431...


Processing sample rows:  96%|█████████▋| 432/448 [1:30:56<06:35, 24.74s/it]

Processing row 432...


Processing sample rows:  97%|█████████▋| 433/448 [1:31:05<04:57, 19.82s/it]

Processing row 433...


Processing sample rows:  97%|█████████▋| 434/448 [1:31:10<03:35, 15.37s/it]

Processing row 434...


Processing sample rows:  97%|█████████▋| 435/448 [1:31:20<03:01, 13.96s/it]

Processing row 435...


Processing sample rows:  97%|█████████▋| 436/448 [1:31:26<02:17, 11.43s/it]

Processing row 436...


Processing sample rows:  98%|█████████▊| 437/448 [1:31:39<02:12, 12.06s/it]

Processing row 437...


Processing sample rows:  98%|█████████▊| 438/448 [1:31:43<01:34,  9.45s/it]

Processing row 438...


Processing sample rows:  98%|█████████▊| 439/448 [1:31:57<01:37, 10.81s/it]

Processing row 439...


Processing sample rows:  98%|█████████▊| 440/448 [1:32:19<01:53, 14.16s/it]

Processing row 440...


Processing sample rows:  98%|█████████▊| 441/448 [1:32:25<01:22, 11.81s/it]

Processing row 441...


Processing sample rows:  99%|█████████▊| 442/448 [1:32:33<01:04, 10.75s/it]

Processing row 442...


Processing sample rows:  99%|█████████▉| 443/448 [1:32:53<01:06, 13.31s/it]

Processing row 443...


Processing sample rows:  99%|█████████▉| 444/448 [1:33:02<00:49, 12.26s/it]

Processing row 444...


Processing sample rows:  99%|█████████▉| 445/448 [1:33:12<00:34, 11.34s/it]

Processing row 445...


Processing sample rows: 100%|█████████▉| 446/448 [1:33:23<00:22, 11.48s/it]

Processing row 446...


Processing sample rows: 100%|█████████▉| 447/448 [1:33:32<00:10, 10.54s/it]

Processing row 447...


Processing sample rows: 100%|██████████| 448/448 [1:33:35<00:00, 12.53s/it]


Processing complete. Results saved to 'xai_grok2_free_cot.csv'

Processing Statistics:
Total rows processed: 448
Successful responses: 448
Failed responses: 0


## LLM as a judge for rating

In [124]:
# Load and preprocess the data
df_4o_cot = pd.read_csv('data/Results/openai_4o_cot.csv')
df_4omini_cot = pd.read_csv('data/Results/openai_4omini_cot.csv')
import re
from tqdm import tqdm
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Extract reasoning from responses
def extract_reasoning(df):
    df['reasoning'] = df['llm_full_response'].str.extract(r'<reasoning>(.*?)</reasoning>', flags=re.DOTALL)
    return df.dropna(subset=['reasoning'])

df_4o_cot = extract_reasoning(df_4o_cot)
df_4omini_cot = extract_reasoning(df_4omini_cot)

# Sample 1000 rows from each dataset
df_4o_cot_sample = df_4o_cot.sample(n=500, random_state=42)
df_4omini_cot_sample = df_4omini_cot.sample(n=500, random_state=42)

# Create evaluation prompt template
evaluation_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert evaluator assessing the quality of reasoning. Rate the reasoning on a scale of 0-10 based on logical correctness, coherence, and relevance to the question. Output ONLY the numerical score (0-10)."),
    ("user", """Please evaluate this reasoning for the given question:

Question: {question}
Reasoning: {reasoning}

For example:
Question: What is the boiling point of water at standard pressure?
Reasoning: Water boils at 100°C (212°F) at standard atmospheric pressure (1 atm). This occurs when the vapor pressure of water equals the atmospheric pressure, causing bubbles of water vapor to form throughout the liquid. The temperature remains constant during boiling until all liquid has vaporized.
Score: 9 (Shows clear understanding, uses scientific principles, and explains the process well)

Consider:
1. Logical correctness and valid arguments
2. Coherence and clarity of explanation
3. Relevance to the question
4. Use of scientific principles where applicable

Score (0-10):""")
])

# Function to evaluate reasoning using LLM
def evaluate_reasoning(row):
    chain = evaluation_prompt | llm_openai_4o
    response = chain.invoke({
        "question": row[1],  # Access Question column by index
        "reasoning": row[2]  # Access reasoning column by index
    })
    # Extract just the numerical score
    try:
        score = float(response.content.strip())
        print(score)
        return min(max(score, 0), 10)  # Ensure score is between 0 and 10
    except:
        return None

print("Evaluating reasoning quality...")
# Apply evaluation to both datasets with progress bar
# Evaluate reasoning scores
df_4o_cot_sample['reasoning_score'] = [evaluate_reasoning(row) for row in tqdm(df_4o_cot_sample.itertuples(), total=500, desc="Processing 4o_cot")]
df_4omini_cot_sample['reasoning_score'] = [evaluate_reasoning(row) for row in tqdm(df_4omini_cot_sample.itertuples(), total=500, desc="Processing 4omini_cot")]

# Calculate thresholds


# # Save the results
# df_4o_cot.to_csv('data/Results/Reasoning_quality/openai_4o_cot_scored.csv', index=False)
# df_4omini_cot.to_csv('data/Results/Reasoning_quality/openai_4omini_cot_scored.csv', index=False)
# print("Results saved to CSV files")

Evaluating reasoning quality...


Processing 4o_cot:   0%|          | 1/500 [00:00<03:49,  2.17it/s]

0.0


Processing 4o_cot:   0%|          | 2/500 [00:00<03:01,  2.75it/s]

0.0


Processing 4o_cot:   1%|          | 3/500 [00:01<03:23,  2.44it/s]

0.0


Processing 4o_cot:   1%|          | 4/500 [00:01<03:19,  2.48it/s]

5.0


Processing 4o_cot:   1%|          | 5/500 [00:01<03:11,  2.59it/s]

0.0


Processing 4o_cot:   1%|          | 6/500 [00:02<03:11,  2.59it/s]

0.0


Processing 4o_cot:   1%|▏         | 7/500 [00:02<03:16,  2.51it/s]

5.0


Processing 4o_cot:   2%|▏         | 8/500 [00:03<03:04,  2.67it/s]

0.0


Processing 4o_cot:   2%|▏         | 9/500 [00:03<03:07,  2.61it/s]

2.0


Processing 4o_cot:   2%|▏         | 10/500 [00:03<03:01,  2.69it/s]

0.0


Processing 4o_cot:   2%|▏         | 11/500 [00:04<03:07,  2.61it/s]

7.0


Processing 4o_cot:   2%|▏         | 12/500 [00:04<03:11,  2.55it/s]

2.0


Processing 4o_cot:   3%|▎         | 13/500 [00:05<03:11,  2.54it/s]

7.0


Processing 4o_cot:   3%|▎         | 14/500 [00:08<09:55,  1.23s/it]

0.0


Processing 4o_cot:   3%|▎         | 15/500 [00:08<07:48,  1.03it/s]

8.0


Processing 4o_cot:   3%|▎         | 16/500 [00:08<06:21,  1.27it/s]

0.0


Processing 4o_cot:   3%|▎         | 17/500 [00:09<05:39,  1.42it/s]

8.0


Processing 4o_cot:   4%|▎         | 18/500 [00:09<04:48,  1.67it/s]

0.0


Processing 4o_cot:   4%|▍         | 19/500 [00:10<04:15,  1.88it/s]

0.0


Processing 4o_cot:   4%|▍         | 20/500 [00:10<03:45,  2.13it/s]

0.0


Processing 4o_cot:   4%|▍         | 21/500 [00:10<03:33,  2.24it/s]

0.0


Processing 4o_cot:   4%|▍         | 22/500 [00:11<03:13,  2.47it/s]

0.0


Processing 4o_cot:   5%|▍         | 23/500 [00:11<03:27,  2.29it/s]

0.0


Processing 4o_cot:   5%|▍         | 24/500 [00:12<03:23,  2.34it/s]

8.0


Processing 4o_cot:   5%|▌         | 25/500 [00:12<03:58,  1.99it/s]

0.0


Processing 4o_cot:   5%|▌         | 26/500 [00:13<03:50,  2.06it/s]

0.0


Processing 4o_cot:   5%|▌         | 27/500 [00:13<03:32,  2.22it/s]

7.0


Processing 4o_cot:   6%|▌         | 28/500 [00:14<03:26,  2.29it/s]

0.0


Processing 4o_cot:   6%|▌         | 29/500 [00:14<03:36,  2.17it/s]

5.0


Processing 4o_cot:   6%|▌         | 30/500 [00:15<03:50,  2.04it/s]

4.0


Processing 4o_cot:   6%|▌         | 31/500 [00:15<03:43,  2.10it/s]

5.0


Processing 4o_cot:   6%|▋         | 32/500 [00:15<03:30,  2.23it/s]

0.0


Processing 4o_cot:   7%|▋         | 33/500 [00:16<03:18,  2.35it/s]

0.0


Processing 4o_cot:   7%|▋         | 34/500 [00:16<03:21,  2.31it/s]

0.0


Processing 4o_cot:   7%|▋         | 35/500 [00:17<03:47,  2.04it/s]

4.0


Processing 4o_cot:   7%|▋         | 36/500 [00:18<04:37,  1.68it/s]

8.0


Processing 4o_cot:   7%|▋         | 37/500 [00:19<06:10,  1.25it/s]

0.0


Processing 4o_cot:   8%|▊         | 38/500 [00:20<05:40,  1.36it/s]

9.0


Processing 4o_cot:   8%|▊         | 39/500 [00:20<04:53,  1.57it/s]

8.0


Processing 4o_cot:   8%|▊         | 40/500 [00:20<04:26,  1.73it/s]

6.0


Processing 4o_cot:   8%|▊         | 41/500 [00:21<03:57,  1.93it/s]

0.0


Processing 4o_cot:   8%|▊         | 42/500 [00:21<03:42,  2.06it/s]

0.0


Processing 4o_cot:   9%|▊         | 43/500 [00:22<03:42,  2.06it/s]

1.0


Processing 4o_cot:   9%|▉         | 44/500 [00:22<03:19,  2.28it/s]

3.0


Processing 4o_cot:   9%|▉         | 45/500 [00:23<03:24,  2.23it/s]

5.0


Processing 4o_cot:   9%|▉         | 46/500 [00:23<03:09,  2.40it/s]

1.0


Processing 4o_cot:   9%|▉         | 47/500 [00:23<03:06,  2.42it/s]

8.0


Processing 4o_cot:  10%|▉         | 48/500 [00:24<02:58,  2.54it/s]

4.0


Processing 4o_cot:  10%|▉         | 49/500 [00:24<02:55,  2.57it/s]

0.0


Processing 4o_cot:  10%|█         | 50/500 [00:24<03:05,  2.43it/s]

0.0


Processing 4o_cot:  10%|█         | 51/500 [00:25<02:57,  2.53it/s]

3.0


Processing 4o_cot:  10%|█         | 52/500 [00:25<02:43,  2.74it/s]

0.0


Processing 4o_cot:  11%|█         | 53/500 [00:26<02:58,  2.51it/s]

4.0


Processing 4o_cot:  11%|█         | 54/500 [00:26<02:54,  2.56it/s]

0.0


Processing 4o_cot:  11%|█         | 55/500 [00:26<02:53,  2.57it/s]

7.0


Processing 4o_cot:  11%|█         | 56/500 [00:27<02:49,  2.63it/s]

0.0


Processing 4o_cot:  11%|█▏        | 57/500 [00:27<02:36,  2.83it/s]

6.0


Processing 4o_cot:  12%|█▏        | 58/500 [00:27<02:51,  2.57it/s]

1.0


Processing 4o_cot:  12%|█▏        | 59/500 [00:28<02:48,  2.62it/s]

2.0


Processing 4o_cot:  12%|█▏        | 60/500 [00:28<02:52,  2.55it/s]

0.0


Processing 4o_cot:  12%|█▏        | 61/500 [00:29<03:37,  2.02it/s]

3.0


Processing 4o_cot:  12%|█▏        | 62/500 [00:30<03:59,  1.83it/s]

0.0


Processing 4o_cot:  13%|█▎        | 63/500 [00:30<03:54,  1.86it/s]

1.0


Processing 4o_cot:  13%|█▎        | 64/500 [00:31<03:41,  1.97it/s]

5.0


Processing 4o_cot:  13%|█▎        | 65/500 [00:31<03:25,  2.12it/s]

0.0


Processing 4o_cot:  13%|█▎        | 66/500 [00:31<03:02,  2.38it/s]

0.0


Processing 4o_cot:  13%|█▎        | 67/500 [00:32<03:09,  2.29it/s]

2.0


Processing 4o_cot:  14%|█▎        | 68/500 [00:32<03:00,  2.40it/s]

6.0


Processing 4o_cot:  14%|█▍        | 69/500 [00:33<03:59,  1.80it/s]

5.0


Processing 4o_cot:  14%|█▍        | 70/500 [00:33<03:50,  1.87it/s]

4.0


Processing 4o_cot:  14%|█▍        | 71/500 [00:34<03:24,  2.10it/s]

6.0


Processing 4o_cot:  14%|█▍        | 72/500 [00:34<03:24,  2.10it/s]

0.0


Processing 4o_cot:  15%|█▍        | 73/500 [00:35<03:07,  2.27it/s]

5.0


Processing 4o_cot:  15%|█▍        | 74/500 [00:35<03:07,  2.28it/s]

0.0


Processing 4o_cot:  15%|█▌        | 75/500 [00:40<13:10,  1.86s/it]

3.0


Processing 4o_cot:  15%|█▌        | 76/500 [00:41<10:02,  1.42s/it]

8.0


Processing 4o_cot:  15%|█▌        | 77/500 [00:41<07:42,  1.09s/it]

7.0


Processing 4o_cot:  16%|█▌        | 78/500 [00:42<06:26,  1.09it/s]

5.0


Processing 4o_cot:  16%|█▌        | 79/500 [00:42<05:15,  1.34it/s]

0.0


Processing 4o_cot:  16%|█▌        | 80/500 [00:42<04:40,  1.50it/s]

4.0


Processing 4o_cot:  16%|█▌        | 81/500 [00:43<04:01,  1.74it/s]

3.0


Processing 4o_cot:  16%|█▋        | 82/500 [00:43<04:23,  1.59it/s]

0.0


Processing 4o_cot:  17%|█▋        | 83/500 [00:44<03:58,  1.75it/s]

0.0


Processing 4o_cot:  17%|█▋        | 84/500 [00:44<04:00,  1.73it/s]

5.0


Processing 4o_cot:  17%|█▋        | 85/500 [00:46<05:58,  1.16it/s]

0.0


Processing 4o_cot:  17%|█▋        | 86/500 [00:46<05:00,  1.38it/s]

0.0


Processing 4o_cot:  17%|█▋        | 87/500 [00:47<04:19,  1.59it/s]

0.0


Processing 4o_cot:  18%|█▊        | 88/500 [00:47<03:45,  1.83it/s]

5.0


Processing 4o_cot:  18%|█▊        | 89/500 [00:48<03:55,  1.74it/s]

0.0


Processing 4o_cot:  18%|█▊        | 90/500 [00:49<04:12,  1.62it/s]

0.0


Processing 4o_cot:  18%|█▊        | 91/500 [00:49<04:04,  1.67it/s]

0.0


Processing 4o_cot:  18%|█▊        | 92/500 [00:50<03:49,  1.78it/s]

5.0


Processing 4o_cot:  19%|█▊        | 93/500 [00:50<03:33,  1.91it/s]

0.0


Processing 4o_cot:  19%|█▉        | 94/500 [00:50<03:28,  1.95it/s]

0.0


Processing 4o_cot:  19%|█▉        | 95/500 [00:51<03:13,  2.10it/s]

0.0


Processing 4o_cot:  19%|█▉        | 96/500 [00:51<03:09,  2.13it/s]

0.0


Processing 4o_cot:  19%|█▉        | 97/500 [00:52<03:00,  2.23it/s]

0.0


Processing 4o_cot:  20%|█▉        | 98/500 [00:52<02:57,  2.27it/s]

0.0


Processing 4o_cot:  20%|█▉        | 99/500 [00:53<02:52,  2.32it/s]

0.0


Processing 4o_cot:  20%|██        | 100/500 [00:53<02:50,  2.35it/s]

9.0


Processing 4o_cot:  20%|██        | 101/500 [00:53<02:57,  2.24it/s]

7.0


Processing 4o_cot:  20%|██        | 102/500 [00:55<04:26,  1.50it/s]

0.0


Processing 4o_cot:  21%|██        | 103/500 [00:55<03:51,  1.71it/s]

7.0


Processing 4o_cot:  21%|██        | 104/500 [00:56<03:40,  1.79it/s]

0.0


Processing 4o_cot:  21%|██        | 105/500 [00:56<04:09,  1.58it/s]

8.0


Processing 4o_cot:  21%|██        | 106/500 [00:57<04:30,  1.46it/s]

3.0


Processing 4o_cot:  21%|██▏       | 107/500 [00:58<04:03,  1.61it/s]

0.0


Processing 4o_cot:  22%|██▏       | 108/500 [00:58<03:59,  1.64it/s]

4.0


Processing 4o_cot:  22%|██▏       | 109/500 [00:59<03:27,  1.89it/s]

0.0


Processing 4o_cot:  22%|██▏       | 110/500 [00:59<03:12,  2.02it/s]

0.0


Processing 4o_cot:  22%|██▏       | 111/500 [00:59<02:59,  2.17it/s]

8.0


Processing 4o_cot:  22%|██▏       | 112/500 [01:00<03:10,  2.03it/s]

1.0


Processing 4o_cot:  23%|██▎       | 113/500 [01:00<02:51,  2.26it/s]

8.0


Processing 4o_cot:  23%|██▎       | 114/500 [01:01<02:39,  2.42it/s]

0.0


Processing 4o_cot:  23%|██▎       | 115/500 [01:01<02:45,  2.33it/s]

5.0


Processing 4o_cot:  23%|██▎       | 116/500 [01:01<02:43,  2.35it/s]

0.0


Processing 4o_cot:  23%|██▎       | 117/500 [01:02<02:53,  2.21it/s]

0.0


Processing 4o_cot:  24%|██▎       | 118/500 [01:03<03:03,  2.08it/s]

0.0


Processing 4o_cot:  24%|██▍       | 119/500 [01:03<02:41,  2.36it/s]

0.0


Processing 4o_cot:  24%|██▍       | 120/500 [01:03<02:33,  2.47it/s]

8.0


Processing 4o_cot:  24%|██▍       | 121/500 [01:04<02:47,  2.27it/s]

0.0


Processing 4o_cot:  24%|██▍       | 122/500 [01:04<02:39,  2.37it/s]

7.0


Processing 4o_cot:  25%|██▍       | 123/500 [01:09<11:35,  1.84s/it]

10.0


Processing 4o_cot:  25%|██▍       | 124/500 [01:10<09:05,  1.45s/it]

8.0


Processing 4o_cot:  25%|██▌       | 125/500 [01:10<07:05,  1.13s/it]

8.0


Processing 4o_cot:  25%|██▌       | 126/500 [01:11<05:52,  1.06it/s]

0.0


Processing 4o_cot:  25%|██▌       | 127/500 [01:11<04:43,  1.32it/s]

5.0


Processing 4o_cot:  26%|██▌       | 128/500 [01:11<04:04,  1.52it/s]

5.0


Processing 4o_cot:  26%|██▌       | 129/500 [01:12<03:55,  1.58it/s]

0.0


Processing 4o_cot:  26%|██▌       | 130/500 [01:12<03:23,  1.82it/s]

6.0


Processing 4o_cot:  26%|██▌       | 131/500 [01:13<02:55,  2.10it/s]

2.0


Processing 4o_cot:  26%|██▋       | 132/500 [01:13<03:08,  1.95it/s]

6.0


Processing 4o_cot:  27%|██▋       | 133/500 [01:14<02:54,  2.10it/s]

0.0


Processing 4o_cot:  27%|██▋       | 134/500 [01:14<02:43,  2.23it/s]

0.0


Processing 4o_cot:  27%|██▋       | 135/500 [01:14<02:33,  2.38it/s]

0.0


Processing 4o_cot:  27%|██▋       | 136/500 [01:15<02:29,  2.43it/s]

6.0


Processing 4o_cot:  27%|██▋       | 137/500 [01:15<02:40,  2.27it/s]

0.0


Processing 4o_cot:  28%|██▊       | 138/500 [01:16<02:35,  2.33it/s]

0.0


Processing 4o_cot:  28%|██▊       | 139/500 [01:17<03:36,  1.67it/s]

0.0


Processing 4o_cot:  28%|██▊       | 140/500 [01:17<02:59,  2.00it/s]

3.0


Processing 4o_cot:  28%|██▊       | 141/500 [01:18<03:09,  1.89it/s]

3.0


Processing 4o_cot:  28%|██▊       | 142/500 [01:18<03:11,  1.87it/s]

5.0


Processing 4o_cot:  29%|██▊       | 143/500 [01:19<02:59,  1.99it/s]

0.0


Processing 4o_cot:  29%|██▉       | 144/500 [01:19<02:40,  2.22it/s]

0.0


Processing 4o_cot:  29%|██▉       | 145/500 [01:19<02:48,  2.11it/s]

6.0


Processing 4o_cot:  29%|██▉       | 146/500 [01:20<03:02,  1.94it/s]

0.0


Processing 4o_cot:  29%|██▉       | 147/500 [01:20<02:49,  2.08it/s]

0.0


Processing 4o_cot:  30%|██▉       | 148/500 [01:21<02:35,  2.27it/s]

4.0


Processing 4o_cot:  30%|██▉       | 149/500 [01:21<02:32,  2.30it/s]

0.0


Processing 4o_cot:  30%|███       | 150/500 [01:22<03:08,  1.85it/s]

0.0


Processing 4o_cot:  30%|███       | 151/500 [01:23<04:14,  1.37it/s]

8.0


Processing 4o_cot:  30%|███       | 152/500 [01:24<03:46,  1.54it/s]

8.0


Processing 4o_cot:  31%|███       | 153/500 [01:27<08:40,  1.50s/it]

0.0


Processing 4o_cot:  31%|███       | 154/500 [01:28<06:56,  1.20s/it]

0.0


Processing 4o_cot:  31%|███       | 155/500 [01:28<05:22,  1.07it/s]

8.0


Processing 4o_cot:  31%|███       | 156/500 [01:28<04:20,  1.32it/s]

0.0


Processing 4o_cot:  31%|███▏      | 157/500 [01:29<03:40,  1.55it/s]

8.0


Processing 4o_cot:  32%|███▏      | 158/500 [01:29<03:16,  1.74it/s]

0.0


Processing 4o_cot:  32%|███▏      | 159/500 [01:31<04:59,  1.14it/s]

3.0


Processing 4o_cot:  32%|███▏      | 160/500 [01:31<04:16,  1.32it/s]

0.0


Processing 4o_cot:  32%|███▏      | 161/500 [01:32<04:09,  1.36it/s]

0.0


Processing 4o_cot:  32%|███▏      | 162/500 [01:32<03:30,  1.61it/s]

6.0


Processing 4o_cot:  33%|███▎      | 163/500 [01:32<03:01,  1.86it/s]

2.0


Processing 4o_cot:  33%|███▎      | 164/500 [01:33<02:50,  1.97it/s]

0.0


Processing 4o_cot:  33%|███▎      | 165/500 [01:33<02:50,  1.96it/s]

0.0


Processing 4o_cot:  33%|███▎      | 166/500 [01:34<02:48,  1.98it/s]

3.0


Processing 4o_cot:  33%|███▎      | 167/500 [01:34<02:44,  2.02it/s]

0.0


Processing 4o_cot:  34%|███▎      | 168/500 [01:35<02:34,  2.15it/s]

0.0


Processing 4o_cot:  34%|███▍      | 169/500 [01:35<02:44,  2.02it/s]

3.0


Processing 4o_cot:  34%|███▍      | 170/500 [01:36<02:31,  2.17it/s]

4.0


Processing 4o_cot:  34%|███▍      | 171/500 [01:37<03:24,  1.61it/s]

0.0


Processing 4o_cot:  34%|███▍      | 172/500 [01:37<03:00,  1.82it/s]

2.0


Processing 4o_cot:  35%|███▍      | 173/500 [01:38<02:44,  1.99it/s]

5.0


Processing 4o_cot:  35%|███▍      | 174/500 [01:38<02:26,  2.22it/s]

3.0


Processing 4o_cot:  35%|███▌      | 175/500 [01:38<02:25,  2.24it/s]

5.0


Processing 4o_cot:  35%|███▌      | 176/500 [01:39<02:33,  2.12it/s]

3.0


Processing 4o_cot:  35%|███▌      | 177/500 [01:39<02:25,  2.21it/s]

3.0


Processing 4o_cot:  36%|███▌      | 178/500 [01:46<12:37,  2.35s/it]

0.0


Processing 4o_cot:  36%|███▌      | 179/500 [01:47<09:40,  1.81s/it]

6.0


Processing 4o_cot:  36%|███▌      | 180/500 [01:47<07:24,  1.39s/it]

5.0


Processing 4o_cot:  36%|███▌      | 181/500 [01:47<06:03,  1.14s/it]

0.0


Processing 4o_cot:  36%|███▋      | 182/500 [01:48<04:56,  1.07it/s]

0.0


Processing 4o_cot:  37%|███▋      | 183/500 [01:48<04:09,  1.27it/s]

0.0


Processing 4o_cot:  37%|███▋      | 184/500 [01:49<03:25,  1.54it/s]

9.0


Processing 4o_cot:  37%|███▋      | 185/500 [01:49<03:07,  1.68it/s]

8.0


Processing 4o_cot:  37%|███▋      | 186/500 [01:50<02:49,  1.85it/s]

7.0


Processing 4o_cot:  37%|███▋      | 187/500 [01:50<02:35,  2.02it/s]

0.0


Processing 4o_cot:  38%|███▊      | 188/500 [01:50<02:23,  2.18it/s]

0.0


Processing 4o_cot:  38%|███▊      | 189/500 [01:51<02:33,  2.03it/s]

8.0


Processing 4o_cot:  38%|███▊      | 190/500 [01:51<02:27,  2.10it/s]

0.0


Processing 4o_cot:  38%|███▊      | 191/500 [01:52<02:18,  2.23it/s]

0.0


Processing 4o_cot:  38%|███▊      | 192/500 [01:52<02:08,  2.41it/s]

0.0


Processing 4o_cot:  39%|███▊      | 193/500 [01:53<02:30,  2.04it/s]

7.0


Processing 4o_cot:  39%|███▉      | 194/500 [01:53<02:33,  1.99it/s]

0.0


Processing 4o_cot:  39%|███▉      | 195/500 [01:54<02:23,  2.13it/s]

7.0


Processing 4o_cot:  39%|███▉      | 196/500 [01:54<02:26,  2.07it/s]

0.0


Processing 4o_cot:  39%|███▉      | 197/500 [01:55<02:58,  1.70it/s]

4.0


Processing 4o_cot:  40%|███▉      | 198/500 [01:55<02:39,  1.89it/s]

0.0


Processing 4o_cot:  40%|███▉      | 199/500 [01:56<02:41,  1.87it/s]

0.0


Processing 4o_cot:  40%|████      | 200/500 [01:56<02:36,  1.92it/s]

0.0


Processing 4o_cot:  40%|████      | 201/500 [01:57<03:19,  1.50it/s]

0.0


Processing 4o_cot:  40%|████      | 202/500 [01:58<02:55,  1.70it/s]

0.0


Processing 4o_cot:  41%|████      | 203/500 [01:58<02:42,  1.82it/s]

0.0


Processing 4o_cot:  41%|████      | 204/500 [01:59<02:36,  1.89it/s]

0.0


Processing 4o_cot:  41%|████      | 205/500 [01:59<02:26,  2.02it/s]

7.0


Processing 4o_cot:  41%|████      | 206/500 [02:00<02:19,  2.11it/s]

6.0


Processing 4o_cot:  41%|████▏     | 207/500 [02:00<02:09,  2.27it/s]

5.0


Processing 4o_cot:  42%|████▏     | 208/500 [02:00<02:06,  2.32it/s]

0.0


Processing 4o_cot:  42%|████▏     | 209/500 [02:01<02:00,  2.41it/s]

8.0


Processing 4o_cot:  42%|████▏     | 210/500 [02:01<02:07,  2.28it/s]

0.0


Processing 4o_cot:  42%|████▏     | 211/500 [02:02<02:09,  2.23it/s]

5.0


Processing 4o_cot:  42%|████▏     | 212/500 [02:02<02:00,  2.39it/s]

5.0


Processing 4o_cot:  43%|████▎     | 213/500 [02:03<02:04,  2.31it/s]

0.0


Processing 4o_cot:  43%|████▎     | 214/500 [02:03<02:01,  2.35it/s]

9.0


Processing 4o_cot:  43%|████▎     | 215/500 [02:04<02:15,  2.11it/s]

5.0


Processing 4o_cot:  43%|████▎     | 216/500 [02:04<02:39,  1.78it/s]

0.0


Processing 4o_cot:  43%|████▎     | 217/500 [02:05<02:29,  1.90it/s]

0.0


Processing 4o_cot:  44%|████▎     | 218/500 [02:05<02:36,  1.80it/s]

0.0


Processing 4o_cot:  44%|████▍     | 219/500 [02:06<02:22,  1.97it/s]

0.0


Processing 4o_cot:  44%|████▍     | 220/500 [02:06<02:09,  2.16it/s]

6.0


Processing 4o_cot:  44%|████▍     | 221/500 [02:07<02:00,  2.32it/s]

6.0


Processing 4o_cot:  44%|████▍     | 222/500 [02:07<01:59,  2.33it/s]

0.0


Processing 4o_cot:  45%|████▍     | 223/500 [02:07<02:00,  2.29it/s]

0.0


Processing 4o_cot:  45%|████▍     | 224/500 [02:08<01:53,  2.44it/s]

0.0


Processing 4o_cot:  45%|████▌     | 225/500 [02:08<01:57,  2.33it/s]

4.0


Processing 4o_cot:  45%|████▌     | 226/500 [02:09<02:04,  2.19it/s]

3.0


Processing 4o_cot:  45%|████▌     | 227/500 [02:09<02:01,  2.24it/s]

5.0


Processing 4o_cot:  46%|████▌     | 228/500 [02:09<01:49,  2.49it/s]

0.0


Processing 4o_cot:  46%|████▌     | 229/500 [02:10<01:46,  2.55it/s]

0.0


Processing 4o_cot:  46%|████▌     | 230/500 [02:10<01:57,  2.30it/s]

0.0


Processing 4o_cot:  46%|████▌     | 231/500 [02:11<01:56,  2.32it/s]

0.0


Processing 4o_cot:  46%|████▋     | 232/500 [02:11<01:57,  2.29it/s]

0.0


Processing 4o_cot:  47%|████▋     | 233/500 [02:12<02:14,  1.98it/s]

0.0


Processing 4o_cot:  47%|████▋     | 234/500 [02:12<02:00,  2.21it/s]

4.0


Processing 4o_cot:  47%|████▋     | 235/500 [02:13<01:55,  2.30it/s]

3.0


Processing 4o_cot:  47%|████▋     | 236/500 [02:13<01:45,  2.50it/s]

8.0


Processing 4o_cot:  47%|████▋     | 237/500 [02:14<02:06,  2.09it/s]

0.0


Processing 4o_cot:  48%|████▊     | 238/500 [02:14<02:02,  2.14it/s]

0.0


Processing 4o_cot:  48%|████▊     | 239/500 [02:15<02:02,  2.14it/s]

0.0


Processing 4o_cot:  48%|████▊     | 240/500 [02:15<02:17,  1.89it/s]

3.0


Processing 4o_cot:  48%|████▊     | 241/500 [02:16<01:59,  2.16it/s]

0.0


Processing 4o_cot:  48%|████▊     | 242/500 [02:16<01:56,  2.21it/s]

6.0


Processing 4o_cot:  49%|████▊     | 243/500 [02:16<01:59,  2.15it/s]

9.0


Processing 4o_cot:  49%|████▉     | 244/500 [02:17<01:47,  2.37it/s]

8.0


Processing 4o_cot:  49%|████▉     | 245/500 [02:17<01:53,  2.24it/s]

0.0


Processing 4o_cot:  49%|████▉     | 246/500 [02:18<01:44,  2.43it/s]

6.0


Processing 4o_cot:  49%|████▉     | 247/500 [02:18<01:36,  2.63it/s]

0.0


Processing 4o_cot:  50%|████▉     | 248/500 [02:18<01:32,  2.71it/s]

6.0


Processing 4o_cot:  50%|████▉     | 249/500 [02:19<01:38,  2.55it/s]

4.0


Processing 4o_cot:  50%|█████     | 250/500 [02:19<01:39,  2.51it/s]

0.0


Processing 4o_cot:  50%|█████     | 251/500 [02:19<01:33,  2.67it/s]

10.0


Processing 4o_cot:  50%|█████     | 252/500 [02:20<01:39,  2.49it/s]

5.0


Processing 4o_cot:  51%|█████     | 253/500 [02:21<01:59,  2.07it/s]

0.0


Processing 4o_cot:  51%|█████     | 254/500 [02:21<01:59,  2.06it/s]

1.0


Processing 4o_cot:  51%|█████     | 255/500 [02:22<02:07,  1.92it/s]

7.0


Processing 4o_cot:  51%|█████     | 256/500 [02:22<02:07,  1.92it/s]

0.0


Processing 4o_cot:  51%|█████▏    | 257/500 [02:23<01:58,  2.05it/s]

6.0


Processing 4o_cot:  52%|█████▏    | 258/500 [02:23<01:51,  2.17it/s]

0.0


Processing 4o_cot:  52%|█████▏    | 259/500 [02:23<01:40,  2.40it/s]

0.0


Processing 4o_cot:  52%|█████▏    | 260/500 [02:24<01:34,  2.55it/s]

0.0


Processing 4o_cot:  52%|█████▏    | 261/500 [02:24<01:28,  2.71it/s]

0.0


Processing 4o_cot:  52%|█████▏    | 262/500 [02:24<01:27,  2.74it/s]

0.0


Processing 4o_cot:  53%|█████▎    | 263/500 [02:25<01:28,  2.67it/s]

0.0


Processing 4o_cot:  53%|█████▎    | 264/500 [02:25<01:21,  2.89it/s]

5.0


Processing 4o_cot:  53%|█████▎    | 265/500 [02:25<01:18,  2.99it/s]

0.0


Processing 4o_cot:  53%|█████▎    | 266/500 [02:26<01:29,  2.61it/s]

0.0


Processing 4o_cot:  53%|█████▎    | 267/500 [02:26<01:40,  2.31it/s]

3.0


Processing 4o_cot:  54%|█████▎    | 268/500 [02:27<01:34,  2.46it/s]

8.0


Processing 4o_cot:  54%|█████▍    | 269/500 [02:27<01:35,  2.42it/s]

9.0


Processing 4o_cot:  54%|█████▍    | 270/500 [02:28<01:48,  2.12it/s]

0.0


Processing 4o_cot:  54%|█████▍    | 271/500 [02:28<01:43,  2.22it/s]

0.0


Processing 4o_cot:  54%|█████▍    | 272/500 [02:29<01:39,  2.29it/s]

7.0


Processing 4o_cot:  55%|█████▍    | 273/500 [02:29<01:30,  2.50it/s]

0.0


Processing 4o_cot:  55%|█████▍    | 274/500 [02:29<01:31,  2.47it/s]

0.0


Processing 4o_cot:  55%|█████▌    | 275/500 [02:30<01:36,  2.32it/s]

7.0


Processing 4o_cot:  55%|█████▌    | 276/500 [02:30<01:42,  2.19it/s]

3.0


Processing 4o_cot:  55%|█████▌    | 277/500 [02:31<01:46,  2.09it/s]

5.0


Processing 4o_cot:  56%|█████▌    | 278/500 [02:31<01:38,  2.24it/s]

0.0


Processing 4o_cot:  56%|█████▌    | 279/500 [02:32<01:34,  2.34it/s]

8.0


Processing 4o_cot:  56%|█████▌    | 280/500 [02:32<01:34,  2.33it/s]

6.0


Processing 4o_cot:  56%|█████▌    | 281/500 [02:32<01:34,  2.31it/s]

0.0


Processing 4o_cot:  56%|█████▋    | 282/500 [02:33<01:37,  2.25it/s]

2.0


Processing 4o_cot:  57%|█████▋    | 283/500 [02:33<01:36,  2.24it/s]

2.0


Processing 4o_cot:  57%|█████▋    | 284/500 [02:34<01:29,  2.42it/s]

3.0


Processing 4o_cot:  57%|█████▋    | 285/500 [02:34<01:33,  2.29it/s]

0.0


Processing 4o_cot:  57%|█████▋    | 286/500 [02:34<01:27,  2.45it/s]

0.0


Processing 4o_cot:  57%|█████▋    | 287/500 [02:35<01:35,  2.24it/s]

2.0


Processing 4o_cot:  58%|█████▊    | 288/500 [02:36<01:41,  2.09it/s]

0.0


Processing 4o_cot:  58%|█████▊    | 289/500 [02:36<01:46,  1.98it/s]

0.0


Processing 4o_cot:  58%|█████▊    | 290/500 [02:37<01:44,  2.01it/s]

0.0


Processing 4o_cot:  58%|█████▊    | 291/500 [02:37<01:44,  2.01it/s]

0.0


Processing 4o_cot:  58%|█████▊    | 292/500 [02:38<01:38,  2.11it/s]

2.0


Processing 4o_cot:  59%|█████▊    | 293/500 [02:38<01:32,  2.23it/s]

3.0


Processing 4o_cot:  59%|█████▉    | 294/500 [02:38<01:35,  2.16it/s]

2.0


Processing 4o_cot:  59%|█████▉    | 295/500 [02:39<01:31,  2.23it/s]

0.0


Processing 4o_cot:  59%|█████▉    | 296/500 [02:39<01:31,  2.23it/s]

0.0


Processing 4o_cot:  59%|█████▉    | 297/500 [02:40<01:32,  2.19it/s]

0.0


Processing 4o_cot:  60%|█████▉    | 298/500 [02:40<01:30,  2.23it/s]

6.0


Processing 4o_cot:  60%|█████▉    | 299/500 [02:41<01:26,  2.33it/s]

4.0


Processing 4o_cot:  60%|██████    | 300/500 [02:41<01:37,  2.05it/s]

9.0


Processing 4o_cot:  60%|██████    | 301/500 [02:47<06:36,  1.99s/it]

8.0


Processing 4o_cot:  60%|██████    | 302/500 [02:47<05:00,  1.52s/it]

5.0


Processing 4o_cot:  61%|██████    | 303/500 [02:48<03:53,  1.18s/it]

0.0


Processing 4o_cot:  61%|██████    | 304/500 [02:48<03:02,  1.07it/s]

3.0


Processing 4o_cot:  61%|██████    | 305/500 [02:48<02:25,  1.34it/s]

0.0


Processing 4o_cot:  61%|██████    | 306/500 [02:49<02:20,  1.38it/s]

0.0


Processing 4o_cot:  61%|██████▏   | 307/500 [02:50<02:25,  1.33it/s]

0.0


Processing 4o_cot:  62%|██████▏   | 308/500 [02:50<02:04,  1.54it/s]

2.0


Processing 4o_cot:  62%|██████▏   | 309/500 [02:50<01:45,  1.82it/s]

3.0


Processing 4o_cot:  62%|██████▏   | 310/500 [02:51<01:37,  1.96it/s]

4.0


Processing 4o_cot:  62%|██████▏   | 311/500 [02:51<01:29,  2.12it/s]

0.0


Processing 4o_cot:  62%|██████▏   | 312/500 [02:52<01:23,  2.25it/s]

2.0


Processing 4o_cot:  63%|██████▎   | 313/500 [02:52<01:32,  2.02it/s]

0.0


Processing 4o_cot:  63%|██████▎   | 314/500 [02:53<01:45,  1.76it/s]

7.0


Processing 4o_cot:  63%|██████▎   | 315/500 [02:53<01:36,  1.92it/s]

0.0


Processing 4o_cot:  63%|██████▎   | 316/500 [02:54<01:26,  2.14it/s]

4.0


Processing 4o_cot:  63%|██████▎   | 317/500 [02:54<01:25,  2.15it/s]

0.0


Processing 4o_cot:  64%|██████▎   | 318/500 [02:55<01:24,  2.16it/s]

6.0


Processing 4o_cot:  64%|██████▍   | 319/500 [02:55<01:18,  2.32it/s]

0.0


Processing 4o_cot:  64%|██████▍   | 320/500 [02:55<01:11,  2.52it/s]

9.0


Processing 4o_cot:  64%|██████▍   | 321/500 [02:56<01:16,  2.33it/s]

0.0


Processing 4o_cot:  64%|██████▍   | 322/500 [02:56<01:20,  2.20it/s]

0.0


Processing 4o_cot:  65%|██████▍   | 323/500 [02:58<02:25,  1.21it/s]

0.0


Processing 4o_cot:  65%|██████▍   | 324/500 [02:58<01:58,  1.49it/s]

2.0


Processing 4o_cot:  65%|██████▌   | 325/500 [02:59<01:42,  1.71it/s]

4.0


Processing 4o_cot:  65%|██████▌   | 326/500 [02:59<01:39,  1.75it/s]

9.0


Processing 4o_cot:  65%|██████▌   | 327/500 [03:00<01:24,  2.05it/s]

8.0


Processing 4o_cot:  66%|██████▌   | 328/500 [03:00<01:20,  2.14it/s]

0.0


Processing 4o_cot:  66%|██████▌   | 329/500 [03:00<01:16,  2.23it/s]

2.0


Processing 4o_cot:  66%|██████▌   | 330/500 [03:01<01:15,  2.25it/s]

0.0


Processing 4o_cot:  66%|██████▌   | 331/500 [03:01<01:25,  1.97it/s]

0.0


Processing 4o_cot:  66%|██████▋   | 332/500 [03:06<04:42,  1.68s/it]

1.0


Processing 4o_cot:  67%|██████▋   | 333/500 [03:10<06:55,  2.49s/it]

0.0


Processing 4o_cot:  67%|██████▋   | 334/500 [03:11<05:13,  1.89s/it]

8.0


Processing 4o_cot:  67%|██████▋   | 335/500 [03:11<04:06,  1.49s/it]

7.0


Processing 4o_cot:  67%|██████▋   | 336/500 [03:12<03:11,  1.17s/it]

0.0


Processing 4o_cot:  67%|██████▋   | 337/500 [03:12<02:35,  1.05it/s]

0.0


Processing 4o_cot:  68%|██████▊   | 338/500 [03:12<02:02,  1.32it/s]

5.0


Processing 4o_cot:  68%|██████▊   | 339/500 [03:13<01:44,  1.54it/s]

0.0


Processing 4o_cot:  68%|██████▊   | 340/500 [03:13<01:29,  1.78it/s]

0.0


Processing 4o_cot:  68%|██████▊   | 341/500 [03:14<01:20,  1.97it/s]

0.0


Processing 4o_cot:  68%|██████▊   | 342/500 [03:14<01:19,  1.98it/s]

4.0


Processing 4o_cot:  69%|██████▊   | 343/500 [03:15<01:18,  1.99it/s]

0.0


Processing 4o_cot:  69%|██████▉   | 344/500 [03:15<01:09,  2.24it/s]

5.0


Processing 4o_cot:  69%|██████▉   | 345/500 [03:15<01:04,  2.41it/s]

0.0


Processing 4o_cot:  69%|██████▉   | 346/500 [03:16<01:02,  2.48it/s]

0.0


Processing 4o_cot:  69%|██████▉   | 347/500 [03:16<01:17,  1.99it/s]

0.0


Processing 4o_cot:  70%|██████▉   | 348/500 [03:17<01:15,  2.00it/s]

0.0


Processing 4o_cot:  70%|██████▉   | 349/500 [03:17<01:09,  2.17it/s]

9.0


Processing 4o_cot:  70%|███████   | 350/500 [03:18<01:15,  1.98it/s]

0.0


Processing 4o_cot:  70%|███████   | 351/500 [03:18<01:22,  1.80it/s]

0.0


Processing 4o_cot:  70%|███████   | 352/500 [03:19<01:19,  1.86it/s]

0.0


Processing 4o_cot:  71%|███████   | 353/500 [03:20<01:18,  1.88it/s]

0.0


Processing 4o_cot:  71%|███████   | 354/500 [03:20<01:09,  2.11it/s]

0.0


Processing 4o_cot:  71%|███████   | 355/500 [03:20<01:01,  2.38it/s]

2.0


Processing 4o_cot:  71%|███████   | 356/500 [03:21<01:04,  2.22it/s]

0.0


Processing 4o_cot:  71%|███████▏  | 357/500 [03:21<01:10,  2.03it/s]

9.0


Processing 4o_cot:  72%|███████▏  | 358/500 [03:22<01:13,  1.94it/s]

0.0


Processing 4o_cot:  72%|███████▏  | 359/500 [03:22<01:15,  1.87it/s]

8.0


Processing 4o_cot:  72%|███████▏  | 360/500 [03:23<01:16,  1.82it/s]

0.0


Processing 4o_cot:  72%|███████▏  | 361/500 [03:23<01:06,  2.09it/s]

8.0


Processing 4o_cot:  72%|███████▏  | 362/500 [03:24<00:59,  2.33it/s]

0.0


Processing 4o_cot:  73%|███████▎  | 363/500 [03:24<00:57,  2.39it/s]

0.0


Processing 4o_cot:  73%|███████▎  | 364/500 [03:25<01:05,  2.09it/s]

2.0


Processing 4o_cot:  73%|███████▎  | 365/500 [03:25<01:09,  1.93it/s]

0.0


Processing 4o_cot:  73%|███████▎  | 366/500 [03:26<01:09,  1.94it/s]

3.0


Processing 4o_cot:  73%|███████▎  | 367/500 [03:26<01:05,  2.02it/s]

8.0


Processing 4o_cot:  74%|███████▎  | 368/500 [03:27<01:10,  1.86it/s]

8.0


Processing 4o_cot:  74%|███████▍  | 369/500 [03:27<01:04,  2.04it/s]

0.0


Processing 4o_cot:  74%|███████▍  | 370/500 [03:28<01:03,  2.05it/s]

0.0


Processing 4o_cot:  74%|███████▍  | 371/500 [03:28<00:56,  2.29it/s]

0.0


Processing 4o_cot:  74%|███████▍  | 372/500 [03:28<00:54,  2.34it/s]

7.0


Processing 4o_cot:  75%|███████▍  | 373/500 [03:29<00:55,  2.30it/s]

0.0


Processing 4o_cot:  75%|███████▍  | 374/500 [03:29<00:56,  2.22it/s]

9.0


Processing 4o_cot:  75%|███████▌  | 375/500 [03:30<00:57,  2.18it/s]

1.0


Processing 4o_cot:  75%|███████▌  | 376/500 [03:30<00:54,  2.28it/s]

9.0


Processing 4o_cot:  75%|███████▌  | 377/500 [03:31<00:52,  2.33it/s]

0.0


Processing 4o_cot:  76%|███████▌  | 378/500 [03:31<00:57,  2.13it/s]

5.0


Processing 4o_cot:  76%|███████▌  | 379/500 [03:32<01:09,  1.75it/s]

0.0


Processing 4o_cot:  76%|███████▌  | 380/500 [03:32<01:04,  1.87it/s]

0.0


Processing 4o_cot:  76%|███████▌  | 381/500 [03:33<00:57,  2.08it/s]

6.0


Processing 4o_cot:  76%|███████▋  | 382/500 [03:33<00:52,  2.24it/s]

0.0


Processing 4o_cot:  77%|███████▋  | 383/500 [03:34<01:00,  1.94it/s]

0.0


Processing 4o_cot:  77%|███████▋  | 384/500 [03:34<00:56,  2.05it/s]

5.0


Processing 4o_cot:  77%|███████▋  | 385/500 [03:35<01:07,  1.71it/s]

3.0


Processing 4o_cot:  77%|███████▋  | 386/500 [03:36<01:05,  1.74it/s]

1.0


Processing 4o_cot:  77%|███████▋  | 387/500 [03:37<01:47,  1.06it/s]

0.0


Processing 4o_cot:  78%|███████▊  | 388/500 [03:38<01:31,  1.22it/s]

0.0


Processing 4o_cot:  78%|███████▊  | 389/500 [03:38<01:20,  1.37it/s]

0.0


Processing 4o_cot:  78%|███████▊  | 390/500 [03:39<01:12,  1.53it/s]

8.0


Processing 4o_cot:  78%|███████▊  | 391/500 [03:40<01:12,  1.50it/s]

5.0


Processing 4o_cot:  78%|███████▊  | 392/500 [03:40<01:04,  1.69it/s]

2.0


Processing 4o_cot:  79%|███████▊  | 393/500 [03:41<00:59,  1.79it/s]

6.0


Processing 4o_cot:  79%|███████▉  | 394/500 [03:41<00:52,  2.01it/s]

8.0


Processing 4o_cot:  79%|███████▉  | 395/500 [03:41<00:47,  2.20it/s]

5.0


Processing 4o_cot:  79%|███████▉  | 396/500 [03:42<00:45,  2.30it/s]

0.0


Processing 4o_cot:  79%|███████▉  | 397/500 [03:42<00:43,  2.37it/s]

3.0


Processing 4o_cot:  80%|███████▉  | 398/500 [03:43<00:55,  1.85it/s]

9.0


Processing 4o_cot:  80%|███████▉  | 399/500 [03:43<00:48,  2.08it/s]

0.0


Processing 4o_cot:  80%|████████  | 400/500 [03:44<00:52,  1.91it/s]

2.0


Processing 4o_cot:  80%|████████  | 401/500 [03:44<00:47,  2.07it/s]

0.0


Processing 4o_cot:  80%|████████  | 402/500 [03:45<00:48,  2.03it/s]

0.0


Processing 4o_cot:  81%|████████  | 403/500 [03:45<00:46,  2.10it/s]

9.0


Processing 4o_cot:  81%|████████  | 404/500 [03:46<00:50,  1.91it/s]

0.0


Processing 4o_cot:  81%|████████  | 405/500 [03:46<00:44,  2.14it/s]

7.0


Processing 4o_cot:  81%|████████  | 406/500 [03:47<00:42,  2.22it/s]

0.0


Processing 4o_cot:  81%|████████▏ | 407/500 [03:47<00:47,  1.97it/s]

0.0


Processing 4o_cot:  82%|████████▏ | 408/500 [03:48<00:43,  2.13it/s]

0.0


Processing 4o_cot:  82%|████████▏ | 409/500 [03:48<00:43,  2.08it/s]

5.0


Processing 4o_cot:  82%|████████▏ | 410/500 [03:49<00:44,  2.04it/s]

2.0


Processing 4o_cot:  82%|████████▏ | 411/500 [03:49<00:44,  2.02it/s]

0.0


Processing 4o_cot:  82%|████████▏ | 412/500 [03:50<00:45,  1.94it/s]

7.0


Processing 4o_cot:  83%|████████▎ | 413/500 [03:50<00:45,  1.90it/s]

0.0


Processing 4o_cot:  83%|████████▎ | 414/500 [03:51<00:40,  2.13it/s]

6.0


Processing 4o_cot:  83%|████████▎ | 415/500 [03:51<00:44,  1.91it/s]

5.0


Processing 4o_cot:  83%|████████▎ | 416/500 [03:52<00:42,  1.99it/s]

0.0


Processing 4o_cot:  83%|████████▎ | 417/500 [03:52<00:44,  1.88it/s]

0.0


Processing 4o_cot:  84%|████████▎ | 418/500 [03:53<00:44,  1.86it/s]

0.0


Processing 4o_cot:  84%|████████▍ | 419/500 [03:53<00:40,  2.02it/s]

0.0


Processing 4o_cot:  84%|████████▍ | 420/500 [03:54<00:36,  2.21it/s]

6.0


Processing 4o_cot:  84%|████████▍ | 421/500 [03:54<00:37,  2.12it/s]

0.0


Processing 4o_cot:  84%|████████▍ | 422/500 [03:54<00:34,  2.26it/s]

0.0


Processing 4o_cot:  85%|████████▍ | 423/500 [03:55<00:34,  2.26it/s]

1.0


Processing 4o_cot:  85%|████████▍ | 424/500 [03:55<00:34,  2.21it/s]

5.0


Processing 4o_cot:  85%|████████▌ | 425/500 [03:56<00:40,  1.87it/s]

0.0


Processing 4o_cot:  85%|████████▌ | 426/500 [03:56<00:35,  2.07it/s]

0.0


Processing 4o_cot:  85%|████████▌ | 427/500 [03:57<00:32,  2.24it/s]

7.0


Processing 4o_cot:  86%|████████▌ | 428/500 [03:57<00:35,  2.01it/s]

4.0


Processing 4o_cot:  86%|████████▌ | 429/500 [03:58<00:35,  2.01it/s]

0.0


Processing 4o_cot:  86%|████████▌ | 430/500 [03:58<00:35,  1.95it/s]

0.0


Processing 4o_cot:  86%|████████▌ | 431/500 [03:59<00:33,  2.04it/s]

5.0


Processing 4o_cot:  86%|████████▋ | 432/500 [04:00<00:38,  1.78it/s]

0.0


Processing 4o_cot:  87%|████████▋ | 433/500 [04:00<00:34,  1.96it/s]

8.0


Processing 4o_cot:  87%|████████▋ | 434/500 [04:00<00:31,  2.07it/s]

4.0


Processing 4o_cot:  87%|████████▋ | 435/500 [04:01<00:31,  2.08it/s]

3.0


Processing 4o_cot:  87%|████████▋ | 436/500 [04:01<00:32,  1.99it/s]

8.0


Processing 4o_cot:  87%|████████▋ | 437/500 [04:02<00:35,  1.79it/s]

5.0


Processing 4o_cot:  88%|████████▊ | 438/500 [04:03<00:38,  1.61it/s]

0.0


Processing 4o_cot:  88%|████████▊ | 439/500 [04:04<00:37,  1.64it/s]

0.0


Processing 4o_cot:  88%|████████▊ | 440/500 [04:04<00:32,  1.83it/s]

8.0


Processing 4o_cot:  88%|████████▊ | 441/500 [04:04<00:29,  1.97it/s]

0.0


Processing 4o_cot:  88%|████████▊ | 442/500 [04:05<00:28,  2.07it/s]

0.0


Processing 4o_cot:  89%|████████▊ | 443/500 [04:05<00:26,  2.17it/s]

0.0


Processing 4o_cot:  89%|████████▉ | 444/500 [04:06<00:30,  1.85it/s]

0.0


Processing 4o_cot:  89%|████████▉ | 445/500 [04:06<00:25,  2.18it/s]

6.0


Processing 4o_cot:  89%|████████▉ | 446/500 [04:07<00:31,  1.74it/s]

0.0


Processing 4o_cot:  89%|████████▉ | 447/500 [04:08<00:29,  1.79it/s]

5.0


Processing 4o_cot:  90%|████████▉ | 448/500 [04:08<00:28,  1.82it/s]

8.0


Processing 4o_cot:  90%|████████▉ | 449/500 [04:08<00:26,  1.95it/s]

0.0


Processing 4o_cot:  90%|█████████ | 450/500 [04:09<00:22,  2.18it/s]

0.0


Processing 4o_cot:  90%|█████████ | 451/500 [04:09<00:23,  2.06it/s]

4.0


Processing 4o_cot:  90%|█████████ | 452/500 [04:10<00:20,  2.29it/s]

0.0


Processing 4o_cot:  91%|█████████ | 453/500 [04:10<00:21,  2.19it/s]

0.0


Processing 4o_cot:  91%|█████████ | 454/500 [04:11<00:20,  2.25it/s]

0.0


Processing 4o_cot:  91%|█████████ | 455/500 [04:11<00:19,  2.36it/s]

5.0


Processing 4o_cot:  91%|█████████ | 456/500 [04:12<00:31,  1.39it/s]

0.0


Processing 4o_cot:  91%|█████████▏| 457/500 [04:13<00:32,  1.33it/s]

1.0


Processing 4o_cot:  92%|█████████▏| 458/500 [04:14<00:33,  1.25it/s]

0.0


Processing 4o_cot:  92%|█████████▏| 459/500 [04:15<00:28,  1.43it/s]

0.0


Processing 4o_cot:  92%|█████████▏| 460/500 [04:15<00:24,  1.66it/s]

0.0


Processing 4o_cot:  92%|█████████▏| 461/500 [04:15<00:22,  1.76it/s]

0.0


Processing 4o_cot:  92%|█████████▏| 462/500 [04:16<00:19,  1.97it/s]

3.0


Processing 4o_cot:  93%|█████████▎| 463/500 [04:18<00:39,  1.07s/it]

2.0


Processing 4o_cot:  93%|█████████▎| 464/500 [04:19<00:31,  1.15it/s]

6.0


Processing 4o_cot:  93%|█████████▎| 465/500 [04:19<00:28,  1.23it/s]

0.0


Processing 4o_cot:  93%|█████████▎| 466/500 [04:20<00:23,  1.44it/s]

5.0


Processing 4o_cot:  93%|█████████▎| 467/500 [04:20<00:21,  1.53it/s]

8.0


Processing 4o_cot:  94%|█████████▎| 468/500 [04:21<00:17,  1.87it/s]

8.0


Processing 4o_cot:  94%|█████████▍| 469/500 [04:21<00:15,  1.98it/s]

0.0


Processing 4o_cot:  94%|█████████▍| 470/500 [04:21<00:14,  2.14it/s]

0.0


Processing 4o_cot:  94%|█████████▍| 471/500 [04:22<00:12,  2.38it/s]

2.0


Processing 4o_cot:  94%|█████████▍| 472/500 [04:22<00:11,  2.53it/s]

0.0


Processing 4o_cot:  95%|█████████▍| 473/500 [04:22<00:10,  2.51it/s]

0.0


Processing 4o_cot:  95%|█████████▍| 474/500 [04:23<00:10,  2.56it/s]

0.0


Processing 4o_cot:  95%|█████████▌| 475/500 [04:23<00:10,  2.34it/s]

4.0


Processing 4o_cot:  95%|█████████▌| 477/500 [04:25<00:13,  1.65it/s]

4.0


Processing 4o_cot:  96%|█████████▌| 478/500 [04:26<00:13,  1.64it/s]

2.0


Processing 4o_cot:  96%|█████████▌| 479/500 [04:26<00:12,  1.64it/s]

0.0


Processing 4o_cot:  96%|█████████▌| 480/500 [04:27<00:10,  1.87it/s]

0.0


Processing 4o_cot:  96%|█████████▌| 481/500 [04:27<00:09,  2.06it/s]

0.0


Processing 4o_cot:  96%|█████████▋| 482/500 [04:28<00:09,  1.95it/s]

8.0


Processing 4o_cot:  97%|█████████▋| 483/500 [04:28<00:09,  1.81it/s]

2.0


Processing 4o_cot:  97%|█████████▋| 484/500 [04:29<00:08,  1.94it/s]

0.0


Processing 4o_cot:  97%|█████████▋| 485/500 [04:29<00:08,  1.85it/s]

0.0


Processing 4o_cot:  97%|█████████▋| 486/500 [04:30<00:08,  1.62it/s]

0.0


Processing 4o_cot:  97%|█████████▋| 487/500 [04:31<00:07,  1.72it/s]

0.0


Processing 4o_cot:  98%|█████████▊| 488/500 [04:31<00:06,  1.99it/s]

4.0


Processing 4o_cot:  98%|█████████▊| 489/500 [04:31<00:05,  2.19it/s]

6.0


Processing 4o_cot:  98%|█████████▊| 490/500 [04:32<00:04,  2.27it/s]

9.0


Processing 4o_cot:  98%|█████████▊| 491/500 [04:32<00:04,  2.03it/s]

5.0


Processing 4o_cot:  98%|█████████▊| 492/500 [04:32<00:03,  2.33it/s]

8.0


Processing 4o_cot:  99%|█████████▊| 493/500 [04:33<00:03,  2.06it/s]

0.0


Processing 4o_cot:  99%|█████████▉| 494/500 [04:34<00:03,  1.89it/s]

5.0


Processing 4o_cot:  99%|█████████▉| 495/500 [04:34<00:02,  2.00it/s]

0.0


Processing 4o_cot:  99%|█████████▉| 496/500 [04:35<00:01,  2.18it/s]

0.0


Processing 4o_cot:  99%|█████████▉| 497/500 [04:35<00:01,  2.15it/s]

6.0


Processing 4o_cot: 100%|█████████▉| 498/500 [04:35<00:00,  2.14it/s]

0.0


Processing 4o_cot: 100%|█████████▉| 499/500 [04:36<00:00,  1.93it/s]

0.0


Processing 4o_cot: 100%|██████████| 500/500 [04:37<00:00,  1.80it/s]


0.0


Processing 4omini_cot:   0%|          | 1/500 [00:01<08:39,  1.04s/it]

8.0


Processing 4omini_cot:   0%|          | 2/500 [00:01<05:08,  1.62it/s]

4.0


Processing 4omini_cot:   1%|          | 3/500 [00:02<05:29,  1.51it/s]

3.0


Processing 4omini_cot:   1%|          | 4/500 [00:02<05:36,  1.47it/s]

7.0


Processing 4omini_cot:   1%|          | 5/500 [00:03<05:27,  1.51it/s]

10.0


Processing 4omini_cot:   1%|          | 6/500 [00:04<05:17,  1.55it/s]

0.0


Processing 4omini_cot:   1%|▏         | 7/500 [00:04<05:02,  1.63it/s]

3.0


Processing 4omini_cot:   2%|▏         | 8/500 [00:05<05:25,  1.51it/s]

0.0


Processing 4omini_cot:   2%|▏         | 9/500 [00:06<06:06,  1.34it/s]

0.0


Processing 4omini_cot:   2%|▏         | 10/500 [00:07<06:17,  1.30it/s]

0.0


Processing 4omini_cot:   2%|▏         | 11/500 [00:08<06:39,  1.22it/s]

0.0


Processing 4omini_cot:   2%|▏         | 12/500 [00:08<06:56,  1.17it/s]

5.0


Processing 4omini_cot:   3%|▎         | 13/500 [00:09<06:11,  1.31it/s]

0.0


Processing 4omini_cot:   3%|▎         | 14/500 [00:10<05:30,  1.47it/s]

7.0


Processing 4omini_cot:   3%|▎         | 15/500 [00:10<04:54,  1.65it/s]

0.0


Processing 4omini_cot:   3%|▎         | 16/500 [00:10<04:21,  1.85it/s]

0.0


Processing 4omini_cot:   3%|▎         | 17/500 [00:11<04:19,  1.86it/s]

3.0


Processing 4omini_cot:   4%|▎         | 18/500 [00:11<04:12,  1.91it/s]

0.0


Processing 4omini_cot:   4%|▍         | 19/500 [00:12<03:46,  2.12it/s]

0.0


Processing 4omini_cot:   4%|▍         | 20/500 [00:13<04:37,  1.73it/s]

3.0


Processing 4omini_cot:   4%|▍         | 21/500 [00:13<04:41,  1.70it/s]

1.0


Processing 4omini_cot:   4%|▍         | 22/500 [00:13<04:03,  1.97it/s]

8.0


Processing 4omini_cot:   5%|▍         | 23/500 [00:14<04:03,  1.96it/s]

8.0


Processing 4omini_cot:   5%|▍         | 24/500 [00:14<03:46,  2.10it/s]

4.0


Processing 4omini_cot:   5%|▌         | 25/500 [00:15<03:28,  2.27it/s]

3.0


Processing 4omini_cot:   5%|▌         | 26/500 [00:15<03:32,  2.23it/s]

3.0


Processing 4omini_cot:   5%|▌         | 27/500 [00:16<03:28,  2.27it/s]

9.0


Processing 4omini_cot:   6%|▌         | 28/500 [00:16<03:14,  2.43it/s]

0.0


Processing 4omini_cot:   6%|▌         | 29/500 [00:16<03:05,  2.53it/s]

7.0


Processing 4omini_cot:   6%|▌         | 30/500 [00:17<03:17,  2.38it/s]

6.0


Processing 4omini_cot:   6%|▌         | 31/500 [00:18<04:20,  1.80it/s]

9.0


Processing 4omini_cot:   6%|▋         | 32/500 [00:18<04:52,  1.60it/s]

0.0


Processing 4omini_cot:   7%|▋         | 33/500 [00:19<04:40,  1.67it/s]

8.0


Processing 4omini_cot:   7%|▋         | 34/500 [00:24<15:14,  1.96s/it]

0.0


Processing 4omini_cot:   7%|▋         | 35/500 [00:25<11:39,  1.51s/it]

3.0


Processing 4omini_cot:   7%|▋         | 36/500 [00:25<09:05,  1.18s/it]

4.0


Processing 4omini_cot:   7%|▋         | 37/500 [00:26<07:50,  1.02s/it]

7.0


Processing 4omini_cot:   8%|▊         | 38/500 [00:26<06:22,  1.21it/s]

0.0


Processing 4omini_cot:   8%|▊         | 39/500 [00:26<05:11,  1.48it/s]

7.0


Processing 4omini_cot:   8%|▊         | 40/500 [00:27<04:37,  1.66it/s]

0.0


Processing 4omini_cot:   8%|▊         | 41/500 [00:27<04:17,  1.78it/s]

0.0


Processing 4omini_cot:   8%|▊         | 42/500 [00:28<04:02,  1.89it/s]

0.0


Processing 4omini_cot:   9%|▊         | 43/500 [00:28<04:03,  1.88it/s]

8.0


Processing 4omini_cot:   9%|▉         | 44/500 [00:29<03:47,  2.00it/s]

0.0


Processing 4omini_cot:   9%|▉         | 45/500 [00:29<03:49,  1.98it/s]

0.0


Processing 4omini_cot:   9%|▉         | 46/500 [00:30<04:05,  1.85it/s]

5.0


Processing 4omini_cot:   9%|▉         | 47/500 [00:31<04:50,  1.56it/s]

0.0


Processing 4omini_cot:  10%|▉         | 48/500 [00:31<04:30,  1.67it/s]

8.0


Processing 4omini_cot:  10%|▉         | 49/500 [00:32<05:26,  1.38it/s]

0.0


Processing 4omini_cot:  10%|█         | 50/500 [00:34<06:49,  1.10it/s]

3.0


Processing 4omini_cot:  10%|█         | 51/500 [00:34<05:47,  1.29it/s]

0.0


Processing 4omini_cot:  10%|█         | 52/500 [00:34<04:48,  1.55it/s]

0.0


Processing 4omini_cot:  11%|█         | 53/500 [00:35<04:05,  1.82it/s]

0.0


Processing 4omini_cot:  11%|█         | 54/500 [00:35<03:56,  1.89it/s]

0.0


Processing 4omini_cot:  11%|█         | 55/500 [00:35<03:26,  2.15it/s]

4.0


Processing 4omini_cot:  11%|█         | 56/500 [00:36<03:28,  2.13it/s]

0.0


Processing 4omini_cot:  11%|█▏        | 57/500 [00:37<03:52,  1.91it/s]

8.0


Processing 4omini_cot:  12%|█▏        | 58/500 [00:37<03:38,  2.02it/s]

0.0


Processing 4omini_cot:  12%|█▏        | 59/500 [00:38<04:05,  1.80it/s]

0.0


Processing 4omini_cot:  12%|█▏        | 60/500 [00:38<03:32,  2.07it/s]

0.0


Processing 4omini_cot:  12%|█▏        | 61/500 [00:38<03:23,  2.16it/s]

0.0


Processing 4omini_cot:  12%|█▏        | 62/500 [00:39<03:03,  2.39it/s]

7.0


Processing 4omini_cot:  13%|█▎        | 63/500 [00:39<03:08,  2.32it/s]

3.0


Processing 4omini_cot:  13%|█▎        | 64/500 [00:40<02:56,  2.47it/s]

0.0


Processing 4omini_cot:  13%|█▎        | 65/500 [00:40<02:44,  2.65it/s]

8.0


Processing 4omini_cot:  13%|█▎        | 66/500 [00:40<02:27,  2.95it/s]

5.0


Processing 4omini_cot:  13%|█▎        | 67/500 [00:40<02:29,  2.90it/s]

0.0


Processing 4omini_cot:  14%|█▎        | 68/500 [00:41<02:33,  2.81it/s]

0.0


Processing 4omini_cot:  14%|█▍        | 69/500 [00:41<02:40,  2.68it/s]

2.0


Processing 4omini_cot:  14%|█▍        | 70/500 [00:42<02:47,  2.57it/s]

0.0


Processing 4omini_cot:  14%|█▍        | 71/500 [00:42<03:01,  2.36it/s]

0.0


Processing 4omini_cot:  14%|█▍        | 72/500 [00:43<03:19,  2.15it/s]

0.0


Processing 4omini_cot:  15%|█▍        | 73/500 [00:43<03:19,  2.14it/s]

8.0


Processing 4omini_cot:  15%|█▍        | 74/500 [00:44<03:25,  2.07it/s]

0.0


Processing 4omini_cot:  15%|█▌        | 75/500 [00:44<03:14,  2.18it/s]

2.0


Processing 4omini_cot:  15%|█▌        | 76/500 [00:45<03:50,  1.84it/s]

0.0


Processing 4omini_cot:  15%|█▌        | 77/500 [00:45<03:30,  2.01it/s]

0.0


Processing 4omini_cot:  16%|█▌        | 78/500 [00:46<03:31,  1.99it/s]

0.0


Processing 4omini_cot:  16%|█▌        | 79/500 [00:46<03:18,  2.12it/s]

2.0


Processing 4omini_cot:  16%|█▌        | 80/500 [00:47<03:10,  2.20it/s]

0.0


Processing 4omini_cot:  16%|█▌        | 81/500 [00:47<03:04,  2.27it/s]

6.0


Processing 4omini_cot:  16%|█▋        | 82/500 [00:48<03:46,  1.84it/s]

0.0


Processing 4omini_cot:  17%|█▋        | 83/500 [00:48<03:42,  1.87it/s]

0.0


Processing 4omini_cot:  17%|█▋        | 84/500 [00:49<03:31,  1.97it/s]

6.0


Processing 4omini_cot:  17%|█▋        | 85/500 [00:49<03:18,  2.09it/s]

0.0


Processing 4omini_cot:  17%|█▋        | 86/500 [00:50<03:22,  2.05it/s]

6.0


Processing 4omini_cot:  17%|█▋        | 87/500 [00:50<03:38,  1.89it/s]

0.0


Processing 4omini_cot:  18%|█▊        | 88/500 [00:51<03:47,  1.81it/s]

3.0


Processing 4omini_cot:  18%|█▊        | 89/500 [00:51<03:41,  1.85it/s]

1.0


Processing 4omini_cot:  18%|█▊        | 90/500 [00:52<03:37,  1.88it/s]

2.0


Processing 4omini_cot:  18%|█▊        | 91/500 [00:52<03:24,  2.00it/s]

5.0


Processing 4omini_cot:  18%|█▊        | 92/500 [00:53<03:49,  1.78it/s]

0.0


Processing 4omini_cot:  19%|█▊        | 93/500 [00:53<03:14,  2.09it/s]

3.0


Processing 4omini_cot:  19%|█▉        | 94/500 [00:54<03:09,  2.15it/s]

0.0


Processing 4omini_cot:  19%|█▉        | 95/500 [00:54<03:26,  1.96it/s]

5.0


Processing 4omini_cot:  19%|█▉        | 96/500 [00:55<03:13,  2.09it/s]

3.0


Processing 4omini_cot:  19%|█▉        | 97/500 [00:55<03:05,  2.18it/s]

8.0


Processing 4omini_cot:  20%|█▉        | 98/500 [00:56<03:10,  2.11it/s]

8.0


Processing 4omini_cot:  20%|█▉        | 99/500 [00:56<03:07,  2.14it/s]

5.0


Processing 4omini_cot:  20%|██        | 100/500 [00:57<02:54,  2.30it/s]

0.0


Processing 4omini_cot:  20%|██        | 101/500 [00:57<03:04,  2.16it/s]

9.0


Processing 4omini_cot:  20%|██        | 102/500 [00:57<02:57,  2.24it/s]

2.0


Processing 4omini_cot:  21%|██        | 103/500 [00:58<03:17,  2.01it/s]

7.0


Processing 4omini_cot:  21%|██        | 104/500 [00:59<03:06,  2.12it/s]

3.0


Processing 4omini_cot:  21%|██        | 105/500 [00:59<03:02,  2.17it/s]

0.0


Processing 4omini_cot:  21%|██        | 106/500 [01:00<03:28,  1.89it/s]

0.0


Processing 4omini_cot:  21%|██▏       | 107/500 [01:00<03:36,  1.81it/s]

0.0


Processing 4omini_cot:  22%|██▏       | 108/500 [01:01<03:43,  1.75it/s]

3.0


Processing 4omini_cot:  22%|██▏       | 109/500 [01:01<03:27,  1.89it/s]

0.0


Processing 4omini_cot:  22%|██▏       | 110/500 [01:02<03:22,  1.93it/s]

0.0


Processing 4omini_cot:  22%|██▏       | 111/500 [01:02<03:08,  2.06it/s]

5.0


Processing 4omini_cot:  22%|██▏       | 112/500 [01:03<03:36,  1.79it/s]

0.0


Processing 4omini_cot:  23%|██▎       | 113/500 [01:03<03:30,  1.84it/s]

0.0


Processing 4omini_cot:  23%|██▎       | 114/500 [01:04<03:13,  2.00it/s]

4.0


Processing 4omini_cot:  23%|██▎       | 115/500 [01:04<03:03,  2.10it/s]

5.0


Processing 4omini_cot:  23%|██▎       | 116/500 [01:05<03:17,  1.94it/s]

0.0


Processing 4omini_cot:  23%|██▎       | 117/500 [01:05<03:07,  2.04it/s]

8.0


Processing 4omini_cot:  24%|██▎       | 118/500 [01:06<03:18,  1.92it/s]

2.0


Processing 4omini_cot:  24%|██▍       | 119/500 [01:06<03:06,  2.04it/s]

0.0


Processing 4omini_cot:  24%|██▍       | 120/500 [01:07<02:55,  2.16it/s]

6.0


Processing 4omini_cot:  24%|██▍       | 121/500 [01:07<02:54,  2.18it/s]

0.0


Processing 4omini_cot:  24%|██▍       | 122/500 [01:08<02:54,  2.16it/s]

3.0


Processing 4omini_cot:  25%|██▍       | 123/500 [01:08<02:59,  2.10it/s]

0.0


Processing 4omini_cot:  25%|██▍       | 124/500 [01:09<03:50,  1.63it/s]

6.0


Processing 4omini_cot:  25%|██▌       | 125/500 [01:10<03:37,  1.73it/s]

8.0


Processing 4omini_cot:  25%|██▌       | 126/500 [01:10<03:17,  1.89it/s]

2.0


Processing 4omini_cot:  25%|██▌       | 127/500 [01:10<03:05,  2.01it/s]

5.0


Processing 4omini_cot:  26%|██▌       | 128/500 [01:13<06:07,  1.01it/s]

0.0


Processing 4omini_cot:  26%|██▌       | 129/500 [01:19<16:16,  2.63s/it]

0.0


Processing 4omini_cot:  26%|██▌       | 130/500 [01:20<12:25,  2.01s/it]

0.0


Processing 4omini_cot:  26%|██▌       | 131/500 [01:20<09:35,  1.56s/it]

0.0


Processing 4omini_cot:  26%|██▋       | 132/500 [01:21<07:42,  1.26s/it]

3.0


Processing 4omini_cot:  27%|██▋       | 133/500 [01:21<06:15,  1.02s/it]

0.0


Processing 4omini_cot:  27%|██▋       | 134/500 [01:22<05:13,  1.17it/s]

0.0


Processing 4omini_cot:  27%|██▋       | 135/500 [01:22<04:14,  1.44it/s]

3.0


Processing 4omini_cot:  27%|██▋       | 136/500 [01:22<03:42,  1.64it/s]

3.0


Processing 4omini_cot:  27%|██▋       | 137/500 [01:23<03:37,  1.67it/s]

6.0


Processing 4omini_cot:  28%|██▊       | 138/500 [01:23<03:25,  1.76it/s]

0.0


Processing 4omini_cot:  28%|██▊       | 139/500 [01:24<03:09,  1.91it/s]

2.0


Processing 4omini_cot:  28%|██▊       | 140/500 [01:24<02:53,  2.08it/s]

0.0


Processing 4omini_cot:  28%|██▊       | 141/500 [01:25<03:11,  1.88it/s]

0.0


Processing 4omini_cot:  28%|██▊       | 142/500 [01:25<03:08,  1.90it/s]

0.0


Processing 4omini_cot:  29%|██▊       | 143/500 [01:26<02:56,  2.02it/s]

0.0


Processing 4omini_cot:  29%|██▉       | 144/500 [01:26<02:34,  2.31it/s]

8.0


Processing 4omini_cot:  29%|██▉       | 145/500 [01:26<02:28,  2.39it/s]

0.0


Processing 4omini_cot:  29%|██▉       | 146/500 [01:31<10:27,  1.77s/it]

4.0


Processing 4omini_cot:  29%|██▉       | 147/500 [01:32<08:04,  1.37s/it]

0.0


Processing 4omini_cot:  30%|██▉       | 148/500 [01:32<06:35,  1.12s/it]

9.0


Processing 4omini_cot:  30%|██▉       | 149/500 [01:33<05:15,  1.11it/s]

0.0


Processing 4omini_cot:  30%|███       | 150/500 [01:33<04:22,  1.33it/s]

3.0


Processing 4omini_cot:  30%|███       | 151/500 [01:34<03:43,  1.56it/s]

0.0


Processing 4omini_cot:  30%|███       | 152/500 [01:34<03:40,  1.58it/s]

0.0


Processing 4omini_cot:  31%|███       | 153/500 [01:35<03:17,  1.76it/s]

5.0


Processing 4omini_cot:  31%|███       | 154/500 [01:35<03:01,  1.91it/s]

8.0


Processing 4omini_cot:  31%|███       | 155/500 [01:35<02:58,  1.93it/s]

7.0


Processing 4omini_cot:  31%|███       | 156/500 [01:36<02:45,  2.08it/s]

8.0


Processing 4omini_cot:  31%|███▏      | 157/500 [01:41<10:37,  1.86s/it]

0.0


Processing 4omini_cot:  32%|███▏      | 158/500 [01:41<08:02,  1.41s/it]

0.0


Processing 4omini_cot:  32%|███▏      | 159/500 [01:42<06:20,  1.11s/it]

0.0


Processing 4omini_cot:  32%|███▏      | 160/500 [01:42<05:34,  1.02it/s]

0.0


Processing 4omini_cot:  32%|███▏      | 161/500 [01:43<04:26,  1.27it/s]

5.0


Processing 4omini_cot:  32%|███▏      | 162/500 [01:43<03:37,  1.56it/s]

0.0


Processing 4omini_cot:  33%|███▎      | 163/500 [01:43<03:16,  1.71it/s]

7.0


Processing 4omini_cot:  33%|███▎      | 164/500 [01:44<03:02,  1.84it/s]

0.0


Processing 4omini_cot:  33%|███▎      | 165/500 [01:44<02:52,  1.94it/s]

3.0


Processing 4omini_cot:  33%|███▎      | 166/500 [01:45<02:50,  1.95it/s]

6.0


Processing 4omini_cot:  33%|███▎      | 167/500 [01:45<02:56,  1.89it/s]

6.0


Processing 4omini_cot:  34%|███▎      | 168/500 [01:46<02:58,  1.86it/s]

0.0


Processing 4omini_cot:  34%|███▍      | 169/500 [01:46<02:36,  2.12it/s]

3.0


Processing 4omini_cot:  34%|███▍      | 170/500 [01:47<03:03,  1.80it/s]

7.0


Processing 4omini_cot:  34%|███▍      | 171/500 [01:48<03:06,  1.76it/s]

8.0


Processing 4omini_cot:  34%|███▍      | 172/500 [01:48<03:01,  1.81it/s]

5.0


Processing 4omini_cot:  35%|███▍      | 173/500 [01:49<03:05,  1.77it/s]

0.0


Processing 4omini_cot:  35%|███▍      | 174/500 [01:49<02:56,  1.84it/s]

0.0


Processing 4omini_cot:  35%|███▌      | 175/500 [01:50<02:46,  1.96it/s]

8.0


Processing 4omini_cot:  35%|███▌      | 176/500 [01:50<02:44,  1.97it/s]

5.0


Processing 4omini_cot:  35%|███▌      | 177/500 [01:51<03:04,  1.75it/s]

0.0


Processing 4omini_cot:  36%|███▌      | 178/500 [02:00<17:19,  3.23s/it]

3.0


Processing 4omini_cot:  36%|███▌      | 179/500 [02:01<13:14,  2.47s/it]

0.0


Processing 4omini_cot:  36%|███▌      | 180/500 [02:02<11:27,  2.15s/it]

4.0


Processing 4omini_cot:  36%|███▌      | 181/500 [02:03<08:54,  1.67s/it]

4.0


Processing 4omini_cot:  36%|███▋      | 182/500 [02:04<07:05,  1.34s/it]

4.0


Processing 4omini_cot:  37%|███▋      | 183/500 [02:04<05:25,  1.03s/it]

0.0


Processing 4omini_cot:  37%|███▋      | 184/500 [02:09<11:12,  2.13s/it]

8.0


Processing 4omini_cot:  37%|███▋      | 185/500 [02:09<08:33,  1.63s/it]

0.0


Processing 4omini_cot:  37%|███▋      | 186/500 [02:09<06:31,  1.25s/it]

5.0


Processing 4omini_cot:  37%|███▋      | 187/500 [02:10<05:21,  1.03s/it]

0.0


Processing 4omini_cot:  38%|███▊      | 188/500 [02:10<04:23,  1.19it/s]

0.0


Processing 4omini_cot:  38%|███▊      | 189/500 [02:14<09:15,  1.79s/it]

8.0


Processing 4omini_cot:  38%|███▊      | 190/500 [02:15<07:07,  1.38s/it]

0.0


Processing 4omini_cot:  38%|███▊      | 191/500 [02:15<05:46,  1.12s/it]

7.0


Processing 4omini_cot:  38%|███▊      | 192/500 [02:16<04:36,  1.11it/s]

0.0


Processing 4omini_cot:  39%|███▊      | 193/500 [02:16<03:50,  1.33it/s]

0.0


Processing 4omini_cot:  39%|███▉      | 194/500 [02:16<03:12,  1.59it/s]

8.0


Processing 4omini_cot:  39%|███▉      | 195/500 [02:17<02:56,  1.73it/s]

8.0


Processing 4omini_cot:  39%|███▉      | 196/500 [02:17<02:32,  1.99it/s]

3.0


Processing 4omini_cot:  39%|███▉      | 197/500 [02:18<02:33,  1.97it/s]

0.0


Processing 4omini_cot:  40%|███▉      | 198/500 [02:18<02:49,  1.79it/s]

1.0


Processing 4omini_cot:  40%|███▉      | 199/500 [02:30<18:53,  3.77s/it]

0.0


Processing 4omini_cot:  40%|████      | 200/500 [02:30<14:16,  2.85s/it]

0.0


Processing 4omini_cot:  40%|████      | 201/500 [02:31<10:51,  2.18s/it]

0.0


Processing 4omini_cot:  40%|████      | 202/500 [02:31<08:13,  1.65s/it]

7.0


Processing 4omini_cot:  41%|████      | 203/500 [02:32<06:11,  1.25s/it]

0.0


Processing 4omini_cot:  41%|████      | 204/500 [02:38<13:34,  2.75s/it]

0.0


Processing 4omini_cot:  41%|████      | 205/500 [02:38<09:53,  2.01s/it]

0.0


Processing 4omini_cot:  41%|████      | 206/500 [02:39<07:28,  1.53s/it]

9.0


Processing 4omini_cot:  41%|████▏     | 207/500 [02:39<05:53,  1.21s/it]

0.0


Processing 4omini_cot:  42%|████▏     | 208/500 [02:39<04:37,  1.05it/s]

8.0


Processing 4omini_cot:  42%|████▏     | 209/500 [02:40<04:00,  1.21it/s]

0.0


Processing 4omini_cot:  42%|████▏     | 210/500 [02:40<03:21,  1.44it/s]

0.0


Processing 4omini_cot:  42%|████▏     | 211/500 [02:41<02:47,  1.73it/s]

8.0


Processing 4omini_cot:  42%|████▏     | 212/500 [02:42<03:17,  1.46it/s]

5.0


Processing 4omini_cot:  43%|████▎     | 213/500 [02:42<02:57,  1.62it/s]

0.0


Processing 4omini_cot:  43%|████▎     | 214/500 [02:43<02:52,  1.66it/s]

0.0


Processing 4omini_cot:  43%|████▎     | 215/500 [02:43<02:43,  1.75it/s]

4.0


Processing 4omini_cot:  43%|████▎     | 216/500 [02:44<02:35,  1.83it/s]

0.0


Processing 4omini_cot:  43%|████▎     | 217/500 [02:44<02:25,  1.94it/s]

8.0


Processing 4omini_cot:  44%|████▎     | 218/500 [02:46<03:56,  1.19it/s]

0.0


Processing 4omini_cot:  44%|████▍     | 219/500 [02:46<03:33,  1.32it/s]

0.0


Processing 4omini_cot:  44%|████▍     | 220/500 [02:47<03:02,  1.53it/s]

6.0


Processing 4omini_cot:  44%|████▍     | 221/500 [02:47<02:40,  1.74it/s]

7.0


Processing 4omini_cot:  44%|████▍     | 222/500 [02:48<03:08,  1.47it/s]

0.0


Processing 4omini_cot:  45%|████▍     | 223/500 [02:49<02:56,  1.57it/s]

0.0


Processing 4omini_cot:  45%|████▍     | 224/500 [02:49<03:00,  1.53it/s]

0.0


Processing 4omini_cot:  45%|████▌     | 225/500 [02:50<02:48,  1.64it/s]

0.0


Processing 4omini_cot:  45%|████▌     | 226/500 [02:50<02:39,  1.72it/s]

0.0


Processing 4omini_cot:  45%|████▌     | 227/500 [02:51<02:24,  1.89it/s]

0.0


Processing 4omini_cot:  46%|████▌     | 228/500 [02:51<02:14,  2.03it/s]

0.0


Processing 4omini_cot:  46%|████▌     | 229/500 [02:51<02:06,  2.14it/s]

0.0


Processing 4omini_cot:  46%|████▌     | 230/500 [02:52<02:07,  2.12it/s]

7.0


Processing 4omini_cot:  46%|████▌     | 231/500 [02:52<02:04,  2.16it/s]

0.0


Processing 4omini_cot:  46%|████▋     | 232/500 [02:53<02:15,  1.98it/s]

0.0


Processing 4omini_cot:  47%|████▋     | 233/500 [02:54<02:44,  1.62it/s]

5.0


Processing 4omini_cot:  47%|████▋     | 234/500 [02:55<03:22,  1.32it/s]

0.0


Processing 4omini_cot:  47%|████▋     | 235/500 [02:55<02:52,  1.54it/s]

0.0


Processing 4omini_cot:  47%|████▋     | 236/500 [02:56<02:40,  1.64it/s]

0.0


Processing 4omini_cot:  47%|████▋     | 237/500 [02:56<02:21,  1.86it/s]

0.0


Processing 4omini_cot:  48%|████▊     | 238/500 [02:57<02:14,  1.95it/s]

3.0


Processing 4omini_cot:  48%|████▊     | 239/500 [02:57<02:03,  2.11it/s]

4.0


Processing 4omini_cot:  48%|████▊     | 240/500 [02:57<01:59,  2.18it/s]

0.0


Processing 4omini_cot:  48%|████▊     | 241/500 [02:58<01:56,  2.23it/s]

0.0


Processing 4omini_cot:  48%|████▊     | 242/500 [02:58<02:00,  2.14it/s]

4.0


Processing 4omini_cot:  49%|████▊     | 243/500 [02:59<01:55,  2.22it/s]

1.0


Processing 4omini_cot:  49%|████▉     | 244/500 [02:59<01:51,  2.30it/s]

0.0


Processing 4omini_cot:  49%|████▉     | 245/500 [03:00<01:49,  2.34it/s]

0.0


Processing 4omini_cot:  49%|████▉     | 246/500 [03:00<01:39,  2.56it/s]

0.0


Processing 4omini_cot:  49%|████▉     | 247/500 [03:00<01:40,  2.51it/s]

5.0


Processing 4omini_cot:  50%|████▉     | 248/500 [03:01<01:49,  2.30it/s]

0.0


Processing 4omini_cot:  50%|████▉     | 249/500 [03:01<01:48,  2.32it/s]

2.0


Processing 4omini_cot:  50%|█████     | 250/500 [03:02<02:08,  1.95it/s]

5.0


Processing 4omini_cot:  50%|█████     | 251/500 [03:02<01:59,  2.09it/s]

6.0


Processing 4omini_cot:  50%|█████     | 252/500 [03:03<02:08,  1.93it/s]

0.0


Processing 4omini_cot:  51%|█████     | 253/500 [03:10<09:52,  2.40s/it]

8.0


Processing 4omini_cot:  51%|█████     | 254/500 [03:10<07:17,  1.78s/it]

7.0


Processing 4omini_cot:  51%|█████     | 255/500 [03:11<05:37,  1.38s/it]

0.0


Processing 4omini_cot:  51%|█████     | 256/500 [03:11<04:28,  1.10s/it]

0.0


Processing 4omini_cot:  51%|█████▏    | 257/500 [03:12<03:42,  1.09it/s]

3.0


Processing 4omini_cot:  52%|█████▏    | 258/500 [03:12<03:20,  1.21it/s]

0.0


Processing 4omini_cot:  52%|█████▏    | 259/500 [03:13<03:02,  1.32it/s]

0.0


Processing 4omini_cot:  52%|█████▏    | 260/500 [03:13<02:30,  1.59it/s]

9.0


Processing 4omini_cot:  52%|█████▏    | 261/500 [03:14<02:28,  1.61it/s]

0.0


Processing 4omini_cot:  52%|█████▏    | 262/500 [03:14<02:28,  1.60it/s]

8.0


Processing 4omini_cot:  53%|█████▎    | 263/500 [03:15<02:12,  1.79it/s]

0.0


Processing 4omini_cot:  53%|█████▎    | 264/500 [03:15<02:11,  1.80it/s]

5.0


Processing 4omini_cot:  53%|█████▎    | 265/500 [03:16<02:04,  1.89it/s]

0.0


Processing 4omini_cot:  53%|█████▎    | 266/500 [03:16<01:55,  2.03it/s]

5.0


Processing 4omini_cot:  53%|█████▎    | 267/500 [03:19<04:21,  1.12s/it]

0.0


Processing 4omini_cot:  54%|█████▎    | 268/500 [03:23<08:33,  2.21s/it]

0.0


Processing 4omini_cot:  54%|█████▍    | 269/500 [03:24<06:24,  1.66s/it]

3.0


Processing 4omini_cot:  54%|█████▍    | 270/500 [03:24<04:51,  1.27s/it]

4.0


Processing 4omini_cot:  54%|█████▍    | 271/500 [03:25<04:17,  1.12s/it]

0.0


Processing 4omini_cot:  54%|█████▍    | 272/500 [03:26<04:12,  1.11s/it]

1.0


Processing 4omini_cot:  55%|█████▍    | 273/500 [03:27<03:35,  1.05it/s]

0.0


Processing 4omini_cot:  55%|█████▍    | 274/500 [03:27<02:58,  1.27it/s]

0.0


Processing 4omini_cot:  55%|█████▌    | 275/500 [03:27<02:33,  1.46it/s]

0.0


Processing 4omini_cot:  55%|█████▌    | 276/500 [03:28<02:20,  1.60it/s]

3.0


Processing 4omini_cot:  55%|█████▌    | 277/500 [03:28<02:03,  1.80it/s]

8.0


Processing 4omini_cot:  56%|█████▌    | 278/500 [03:29<01:57,  1.89it/s]

0.0


Processing 4omini_cot:  56%|█████▌    | 279/500 [03:29<01:54,  1.93it/s]

0.0


Processing 4omini_cot:  56%|█████▌    | 280/500 [03:32<04:42,  1.29s/it]

0.0


Processing 4omini_cot:  56%|█████▌    | 281/500 [03:33<04:22,  1.20s/it]

0.0


Processing 4omini_cot:  56%|█████▋    | 282/500 [03:34<03:39,  1.01s/it]

4.0


Processing 4omini_cot:  57%|█████▋    | 283/500 [03:34<03:03,  1.19it/s]

0.0


Processing 4omini_cot:  57%|█████▋    | 284/500 [03:35<02:30,  1.44it/s]

0.0


Processing 4omini_cot:  57%|█████▋    | 285/500 [03:35<02:07,  1.68it/s]

4.0


Processing 4omini_cot:  57%|█████▋    | 286/500 [03:36<01:58,  1.81it/s]

0.0


Processing 4omini_cot:  57%|█████▋    | 287/500 [03:36<01:45,  2.02it/s]

0.0


Processing 4omini_cot:  58%|█████▊    | 288/500 [03:37<01:51,  1.89it/s]

0.0


Processing 4omini_cot:  58%|█████▊    | 289/500 [03:37<01:38,  2.14it/s]

0.0


Processing 4omini_cot:  58%|█████▊    | 290/500 [03:37<01:32,  2.28it/s]

0.0


Processing 4omini_cot:  58%|█████▊    | 291/500 [03:38<01:28,  2.37it/s]

0.0


Processing 4omini_cot:  58%|█████▊    | 292/500 [03:38<01:32,  2.25it/s]

0.0


Processing 4omini_cot:  59%|█████▊    | 293/500 [03:39<01:36,  2.15it/s]

3.0


Processing 4omini_cot:  59%|█████▉    | 294/500 [03:40<02:00,  1.71it/s]

0.0


Processing 4omini_cot:  59%|█████▉    | 295/500 [03:40<01:54,  1.78it/s]

6.0


Processing 4omini_cot:  59%|█████▉    | 296/500 [03:40<01:39,  2.05it/s]

0.0


Processing 4omini_cot:  59%|█████▉    | 297/500 [03:41<01:25,  2.37it/s]

0.0


Processing 4omini_cot:  60%|█████▉    | 298/500 [03:41<01:20,  2.52it/s]

0.0


Processing 4omini_cot:  60%|█████▉    | 299/500 [03:41<01:25,  2.34it/s]

0.0


Processing 4omini_cot:  60%|██████    | 300/500 [03:42<01:21,  2.44it/s]

8.0


Processing 4omini_cot:  60%|██████    | 301/500 [03:42<01:19,  2.51it/s]

0.0


Processing 4omini_cot:  60%|██████    | 302/500 [03:43<01:18,  2.52it/s]

0.0


Processing 4omini_cot:  61%|██████    | 303/500 [03:43<01:18,  2.52it/s]

0.0


Processing 4omini_cot:  61%|██████    | 304/500 [03:43<01:18,  2.51it/s]

0.0


Processing 4omini_cot:  61%|██████    | 305/500 [03:44<01:17,  2.51it/s]

3.0


Processing 4omini_cot:  61%|██████    | 306/500 [03:44<01:30,  2.13it/s]

5.0


Processing 4omini_cot:  61%|██████▏   | 307/500 [03:45<01:26,  2.23it/s]

0.0


Processing 4omini_cot:  62%|██████▏   | 308/500 [03:45<01:29,  2.13it/s]

0.0


Processing 4omini_cot:  62%|██████▏   | 309/500 [03:46<01:25,  2.23it/s]

0.0


Processing 4omini_cot:  62%|██████▏   | 310/500 [03:46<01:22,  2.29it/s]

0.0


Processing 4omini_cot:  62%|██████▏   | 311/500 [03:47<01:26,  2.17it/s]

0.0


Processing 4omini_cot:  62%|██████▏   | 312/500 [03:47<01:23,  2.24it/s]

0.0


Processing 4omini_cot:  63%|██████▎   | 313/500 [03:47<01:15,  2.49it/s]

5.0


Processing 4omini_cot:  63%|██████▎   | 314/500 [03:48<01:21,  2.28it/s]

3.0


Processing 4omini_cot:  63%|██████▎   | 315/500 [03:48<01:24,  2.18it/s]

4.0


Processing 4omini_cot:  63%|██████▎   | 316/500 [03:49<01:21,  2.25it/s]

9.0


Processing 4omini_cot:  63%|██████▎   | 317/500 [03:49<01:19,  2.30it/s]

0.0


Processing 4omini_cot:  64%|██████▎   | 318/500 [03:50<01:18,  2.33it/s]

1.0


Processing 4omini_cot:  64%|██████▍   | 319/500 [03:50<01:17,  2.35it/s]

0.0


Processing 4omini_cot:  64%|██████▍   | 320/500 [03:51<01:20,  2.23it/s]

0.0


Processing 4omini_cot:  64%|██████▍   | 321/500 [03:51<01:23,  2.14it/s]

0.0


Processing 4omini_cot:  64%|██████▍   | 322/500 [03:52<01:25,  2.08it/s]

0.0


Processing 4omini_cot:  65%|██████▍   | 323/500 [03:54<02:52,  1.02it/s]

6.0


Processing 4omini_cot:  65%|██████▍   | 324/500 [03:54<02:16,  1.29it/s]

0.0


Processing 4omini_cot:  65%|██████▌   | 325/500 [03:55<02:12,  1.32it/s]

5.0


Processing 4omini_cot:  65%|██████▌   | 326/500 [03:55<01:49,  1.59it/s]

0.0


Processing 4omini_cot:  65%|██████▌   | 327/500 [03:56<01:42,  1.68it/s]

0.0


Processing 4omini_cot:  66%|██████▌   | 328/500 [03:56<01:27,  1.96it/s]

8.0


Processing 4omini_cot:  66%|██████▌   | 329/500 [03:56<01:16,  2.25it/s]

0.0


Processing 4omini_cot:  66%|██████▌   | 330/500 [03:56<01:07,  2.51it/s]

9.0


Processing 4omini_cot:  66%|██████▌   | 331/500 [03:57<01:03,  2.65it/s]

7.0


Processing 4omini_cot:  66%|██████▋   | 332/500 [03:57<01:05,  2.57it/s]

0.0


Processing 4omini_cot:  67%|██████▋   | 333/500 [03:58<01:05,  2.54it/s]

3.0


Processing 4omini_cot:  67%|██████▋   | 334/500 [03:58<01:05,  2.52it/s]

0.0


Processing 4omini_cot:  67%|██████▋   | 335/500 [03:58<01:02,  2.65it/s]

3.0


Processing 4omini_cot:  67%|██████▋   | 336/500 [03:59<01:04,  2.54it/s]

0.0


Processing 4omini_cot:  67%|██████▋   | 337/500 [03:59<01:02,  2.59it/s]

0.0


Processing 4omini_cot:  68%|██████▊   | 338/500 [04:00<01:01,  2.63it/s]

0.0


Processing 4omini_cot:  68%|██████▊   | 339/500 [04:00<01:04,  2.51it/s]

3.0


Processing 4omini_cot:  68%|██████▊   | 340/500 [04:00<01:05,  2.44it/s]

4.0


Processing 4omini_cot:  68%|██████▊   | 341/500 [04:01<01:01,  2.58it/s]

8.0


Processing 4omini_cot:  68%|██████▊   | 342/500 [04:01<00:59,  2.64it/s]

5.0


Processing 4omini_cot:  69%|██████▊   | 343/500 [04:02<01:02,  2.51it/s]

0.0


Processing 4omini_cot:  69%|██████▉   | 344/500 [04:02<01:15,  2.07it/s]

3.0


Processing 4omini_cot:  69%|██████▉   | 345/500 [04:03<01:11,  2.16it/s]

0.0


Processing 4omini_cot:  69%|██████▉   | 346/500 [04:03<01:14,  2.07it/s]

3.0


Processing 4omini_cot:  69%|██████▉   | 347/500 [04:04<01:19,  1.93it/s]

0.0


Processing 4omini_cot:  70%|██████▉   | 348/500 [04:05<01:29,  1.69it/s]

0.0


Processing 4omini_cot:  70%|██████▉   | 349/500 [04:05<01:18,  1.92it/s]

7.0


Processing 4omini_cot:  70%|███████   | 350/500 [04:05<01:08,  2.20it/s]

5.0


Processing 4omini_cot:  70%|███████   | 351/500 [04:06<01:05,  2.26it/s]

0.0


Processing 4omini_cot:  70%|███████   | 352/500 [04:06<01:03,  2.32it/s]

0.0


Processing 4omini_cot:  71%|███████   | 353/500 [04:06<01:02,  2.35it/s]

0.0


Processing 4omini_cot:  71%|███████   | 354/500 [04:07<01:20,  1.81it/s]

0.0


Processing 4omini_cot:  71%|███████   | 355/500 [04:08<01:17,  1.87it/s]

5.0


Processing 4omini_cot:  71%|███████   | 356/500 [04:11<02:54,  1.21s/it]

0.0


Processing 4omini_cot:  71%|███████▏  | 357/500 [04:11<02:21,  1.01it/s]

0.0


Processing 4omini_cot:  72%|███████▏  | 358/500 [04:11<01:51,  1.27it/s]

10.0


Processing 4omini_cot:  72%|███████▏  | 359/500 [04:12<01:34,  1.49it/s]

0.0


Processing 4omini_cot:  72%|███████▏  | 360/500 [04:12<01:23,  1.68it/s]

0.0


Processing 4omini_cot:  72%|███████▏  | 361/500 [04:13<01:27,  1.59it/s]

0.0


Processing 4omini_cot:  72%|███████▏  | 362/500 [04:13<01:18,  1.75it/s]

0.0


Processing 4omini_cot:  73%|███████▎  | 363/500 [04:14<01:09,  1.98it/s]

0.0


Processing 4omini_cot:  73%|███████▎  | 364/500 [04:14<01:06,  2.05it/s]

0.0


Processing 4omini_cot:  73%|███████▎  | 365/500 [04:15<01:03,  2.13it/s]

0.0


Processing 4omini_cot:  73%|███████▎  | 366/500 [04:15<00:58,  2.27it/s]

8.0


Processing 4omini_cot:  73%|███████▎  | 367/500 [04:15<01:03,  2.10it/s]

4.0


Processing 4omini_cot:  74%|███████▎  | 368/500 [04:16<01:07,  1.97it/s]

0.0


Processing 4omini_cot:  74%|███████▍  | 369/500 [04:16<01:04,  2.03it/s]

6.0


Processing 4omini_cot:  74%|███████▍  | 370/500 [04:17<00:59,  2.19it/s]

4.0


Processing 4omini_cot:  74%|███████▍  | 371/500 [04:17<00:56,  2.28it/s]

5.0


Processing 4omini_cot:  74%|███████▍  | 372/500 [04:18<00:55,  2.31it/s]

1.0


Processing 4omini_cot:  75%|███████▍  | 373/500 [04:18<00:57,  2.19it/s]

3.0


Processing 4omini_cot:  75%|███████▍  | 374/500 [04:19<01:03,  1.99it/s]

3.0


Processing 4omini_cot:  75%|███████▌  | 375/500 [04:19<01:03,  1.98it/s]

0.0


Processing 4omini_cot:  75%|███████▌  | 376/500 [04:20<00:59,  2.10it/s]

0.0


Processing 4omini_cot:  75%|███████▌  | 377/500 [04:20<00:56,  2.19it/s]

0.0


Processing 4omini_cot:  76%|███████▌  | 378/500 [04:21<00:57,  2.11it/s]

0.0


Processing 4omini_cot:  76%|███████▌  | 379/500 [04:21<00:58,  2.06it/s]

0.0


Processing 4omini_cot:  76%|███████▌  | 380/500 [04:22<00:57,  2.10it/s]

1.0


Processing 4omini_cot:  76%|███████▌  | 381/500 [04:22<00:56,  2.11it/s]

0.0


Processing 4omini_cot:  76%|███████▋  | 382/500 [04:23<00:57,  2.04it/s]

4.0


Processing 4omini_cot:  77%|███████▋  | 383/500 [04:23<00:57,  2.02it/s]

0.0


Processing 4omini_cot:  77%|███████▋  | 384/500 [04:24<01:04,  1.79it/s]

0.0


Processing 4omini_cot:  77%|███████▋  | 385/500 [04:24<00:59,  1.93it/s]

6.0


Processing 4omini_cot:  77%|███████▋  | 386/500 [04:25<00:58,  1.94it/s]

0.0


Processing 4omini_cot:  77%|███████▋  | 387/500 [04:25<00:54,  2.08it/s]

0.0


Processing 4omini_cot:  78%|███████▊  | 388/500 [04:26<00:51,  2.18it/s]

0.0


Processing 4omini_cot:  78%|███████▊  | 389/500 [04:26<00:52,  2.12it/s]

0.0


Processing 4omini_cot:  78%|███████▊  | 390/500 [04:27<00:57,  1.93it/s]

8.0


Processing 4omini_cot:  78%|███████▊  | 391/500 [04:27<00:51,  2.10it/s]

0.0


Processing 4omini_cot:  78%|███████▊  | 392/500 [04:28<00:50,  2.14it/s]

5.0


Processing 4omini_cot:  79%|███████▊  | 393/500 [04:28<00:51,  2.08it/s]

7.0


Processing 4omini_cot:  79%|███████▉  | 394/500 [04:29<00:52,  2.04it/s]

5.0


Processing 4omini_cot:  79%|███████▉  | 395/500 [04:29<00:51,  2.02it/s]

5.0


Processing 4omini_cot:  79%|███████▉  | 396/500 [04:30<00:55,  1.87it/s]

6.0


Processing 4omini_cot:  79%|███████▉  | 397/500 [04:30<00:54,  1.89it/s]

0.0


Processing 4omini_cot:  80%|███████▉  | 398/500 [04:31<00:50,  2.04it/s]

7.0


Processing 4omini_cot:  80%|███████▉  | 399/500 [04:31<00:59,  1.70it/s]

0.0


Processing 4omini_cot:  80%|████████  | 400/500 [04:32<00:56,  1.77it/s]

0.0


Processing 4omini_cot:  80%|████████  | 401/500 [04:33<01:00,  1.63it/s]

8.0


Processing 4omini_cot:  80%|████████  | 402/500 [04:33<00:57,  1.72it/s]

0.0


Processing 4omini_cot:  81%|████████  | 403/500 [04:34<00:54,  1.78it/s]

9.0


Processing 4omini_cot:  81%|████████  | 404/500 [04:34<00:52,  1.83it/s]

7.0


Processing 4omini_cot:  81%|████████  | 405/500 [04:35<00:53,  1.77it/s]

3.0


Processing 4omini_cot:  81%|████████  | 406/500 [04:35<00:52,  1.81it/s]

0.0


Processing 4omini_cot:  81%|████████▏ | 407/500 [04:36<00:50,  1.85it/s]

8.0


Processing 4omini_cot:  82%|████████▏ | 408/500 [04:36<00:45,  2.01it/s]

0.0


Processing 4omini_cot:  82%|████████▏ | 409/500 [04:37<00:45,  2.00it/s]

3.0


Processing 4omini_cot:  82%|████████▏ | 410/500 [04:37<00:43,  2.08it/s]

4.0


Processing 4omini_cot:  82%|████████▏ | 411/500 [04:38<00:46,  1.93it/s]

0.0


Processing 4omini_cot:  82%|████████▏ | 412/500 [04:38<00:42,  2.06it/s]

0.0


Processing 4omini_cot:  83%|████████▎ | 413/500 [04:39<00:45,  1.90it/s]

8.0


Processing 4omini_cot:  83%|████████▎ | 414/500 [04:39<00:44,  1.94it/s]

3.0


Processing 4omini_cot:  83%|████████▎ | 415/500 [04:40<00:46,  1.83it/s]

8.0


Processing 4omini_cot:  83%|████████▎ | 416/500 [04:40<00:42,  1.97it/s]

6.0


Processing 4omini_cot:  83%|████████▎ | 417/500 [04:41<00:40,  2.03it/s]

2.0


Processing 4omini_cot:  84%|████████▎ | 418/500 [04:41<00:42,  1.91it/s]

5.0


Processing 4omini_cot:  84%|████████▍ | 419/500 [04:42<00:38,  2.09it/s]

3.0


Processing 4omini_cot:  84%|████████▍ | 420/500 [04:42<00:34,  2.30it/s]

7.0


Processing 4omini_cot:  84%|████████▍ | 421/500 [04:42<00:33,  2.34it/s]

0.0


Processing 4omini_cot:  84%|████████▍ | 422/500 [04:43<00:37,  2.10it/s]

0.0


Processing 4omini_cot:  85%|████████▍ | 423/500 [04:44<00:37,  2.06it/s]

5.0


Processing 4omini_cot:  85%|████████▍ | 424/500 [04:44<00:35,  2.16it/s]

5.0


Processing 4omini_cot:  85%|████████▌ | 425/500 [04:44<00:30,  2.48it/s]

5.0


Processing 4omini_cot:  85%|████████▌ | 426/500 [04:45<00:29,  2.53it/s]

0.0


Processing 4omini_cot:  85%|████████▌ | 427/500 [04:46<00:46,  1.58it/s]

3.0


Processing 4omini_cot:  86%|████████▌ | 428/500 [04:46<00:39,  1.83it/s]

0.0


Processing 4omini_cot:  86%|████████▌ | 429/500 [04:47<00:35,  1.99it/s]

8.0


Processing 4omini_cot:  86%|████████▌ | 430/500 [04:47<00:35,  1.98it/s]

0.0


Processing 4omini_cot:  86%|████████▌ | 431/500 [04:48<00:41,  1.66it/s]

0.0


Processing 4omini_cot:  86%|████████▋ | 432/500 [04:48<00:38,  1.75it/s]

3.0


Processing 4omini_cot:  87%|████████▋ | 433/500 [04:49<00:39,  1.71it/s]

0.0


Processing 4omini_cot:  87%|████████▋ | 434/500 [04:49<00:35,  1.88it/s]

0.0


Processing 4omini_cot:  87%|████████▋ | 435/500 [04:50<00:32,  2.03it/s]

0.0


Processing 4omini_cot:  87%|████████▋ | 436/500 [04:50<00:31,  2.01it/s]

0.0


Processing 4omini_cot:  87%|████████▋ | 437/500 [04:51<00:30,  2.10it/s]

0.0


Processing 4omini_cot:  88%|████████▊ | 438/500 [04:51<00:28,  2.21it/s]

0.0


Processing 4omini_cot:  88%|████████▊ | 439/500 [04:51<00:24,  2.46it/s]

6.0


Processing 4omini_cot:  88%|████████▊ | 440/500 [04:52<00:24,  2.44it/s]

0.0


Processing 4omini_cot:  88%|████████▊ | 441/500 [04:52<00:24,  2.44it/s]

2.0


Processing 4omini_cot:  88%|████████▊ | 442/500 [04:53<00:27,  2.12it/s]

0.0


Processing 4omini_cot:  89%|████████▊ | 443/500 [04:53<00:27,  2.06it/s]

2.0


Processing 4omini_cot:  89%|████████▉ | 444/500 [04:54<00:25,  2.16it/s]

0.0


Processing 4omini_cot:  89%|████████▉ | 445/500 [04:54<00:24,  2.24it/s]

0.0


Processing 4omini_cot:  89%|████████▉ | 446/500 [04:55<00:25,  2.15it/s]

0.0


Processing 4omini_cot:  89%|████████▉ | 447/500 [04:55<00:27,  1.94it/s]

8.0


Processing 4omini_cot:  90%|████████▉ | 448/500 [04:56<00:22,  2.31it/s]

0.0


Processing 4omini_cot:  90%|████████▉ | 449/500 [04:56<00:20,  2.49it/s]

0.0


Processing 4omini_cot:  90%|█████████ | 450/500 [04:56<00:19,  2.60it/s]

0.0


Processing 4omini_cot:  90%|█████████ | 451/500 [04:57<00:19,  2.56it/s]

7.0


Processing 4omini_cot:  90%|█████████ | 452/500 [04:57<00:19,  2.42it/s]

0.0


Processing 4omini_cot:  91%|█████████ | 453/500 [04:58<00:19,  2.37it/s]

0.0


Processing 4omini_cot:  91%|█████████ | 454/500 [04:58<00:19,  2.37it/s]

0.0


Processing 4omini_cot:  91%|█████████ | 455/500 [04:58<00:18,  2.38it/s]

0.0


Processing 4omini_cot:  91%|█████████ | 456/500 [04:59<00:18,  2.40it/s]

0.0


Processing 4omini_cot:  91%|█████████▏| 457/500 [04:59<00:17,  2.44it/s]

1.0


Processing 4omini_cot:  92%|█████████▏| 458/500 [05:00<00:17,  2.39it/s]

8.0


Processing 4omini_cot:  92%|█████████▏| 459/500 [05:00<00:18,  2.25it/s]

1.0


Processing 4omini_cot:  92%|█████████▏| 460/500 [05:01<00:18,  2.21it/s]

7.0


Processing 4omini_cot:  92%|█████████▏| 461/500 [05:01<00:16,  2.34it/s]

8.0


Processing 4omini_cot:  92%|█████████▏| 462/500 [05:01<00:14,  2.54it/s]

0.0


Processing 4omini_cot:  93%|█████████▎| 463/500 [05:02<00:15,  2.38it/s]

0.0


Processing 4omini_cot:  93%|█████████▎| 464/500 [05:02<00:15,  2.27it/s]

0.0


Processing 4omini_cot:  93%|█████████▎| 465/500 [05:03<00:17,  2.02it/s]

2.0


Processing 4omini_cot:  93%|█████████▎| 466/500 [05:03<00:15,  2.13it/s]

0.0


Processing 4omini_cot:  93%|█████████▎| 467/500 [05:04<00:16,  2.06it/s]

0.0


Processing 4omini_cot:  94%|█████████▎| 468/500 [05:04<00:16,  1.96it/s]

1.0


Processing 4omini_cot:  94%|█████████▍| 469/500 [05:05<00:17,  1.80it/s]

0.0


Processing 4omini_cot:  94%|█████████▍| 470/500 [05:06<00:18,  1.58it/s]

0.0


Processing 4omini_cot:  94%|█████████▍| 471/500 [05:06<00:17,  1.68it/s]

0.0


Processing 4omini_cot:  94%|█████████▍| 472/500 [05:07<00:16,  1.75it/s]

0.0


Processing 4omini_cot:  95%|█████████▍| 473/500 [05:07<00:14,  1.91it/s]

3.0


Processing 4omini_cot:  95%|█████████▍| 474/500 [05:08<00:12,  2.06it/s]

6.0


Processing 4omini_cot:  95%|█████████▌| 475/500 [05:08<00:11,  2.16it/s]

0.0


Processing 4omini_cot:  95%|█████████▌| 476/500 [05:09<00:12,  1.97it/s]

2.0


Processing 4omini_cot:  95%|█████████▌| 477/500 [05:09<00:11,  2.07it/s]

5.0


Processing 4omini_cot:  96%|█████████▌| 478/500 [05:10<00:10,  2.16it/s]

0.0


Processing 4omini_cot:  96%|█████████▌| 479/500 [05:10<00:08,  2.35it/s]

6.0


Processing 4omini_cot:  96%|█████████▌| 480/500 [05:10<00:08,  2.35it/s]

0.0


Processing 4omini_cot:  96%|█████████▌| 481/500 [05:12<00:12,  1.54it/s]

8.0


Processing 4omini_cot:  96%|█████████▋| 482/500 [05:12<00:10,  1.74it/s]

5.0


Processing 4omini_cot:  97%|█████████▋| 483/500 [05:12<00:08,  1.89it/s]

3.0


Processing 4omini_cot:  97%|█████████▋| 484/500 [05:13<00:09,  1.71it/s]

0.0


Processing 4omini_cot:  97%|█████████▋| 485/500 [05:13<00:07,  1.88it/s]

7.0


Processing 4omini_cot:  97%|█████████▋| 486/500 [05:14<00:06,  2.02it/s]

5.0


Processing 4omini_cot:  97%|█████████▋| 487/500 [05:14<00:06,  2.01it/s]

0.0


Processing 4omini_cot:  98%|█████████▊| 488/500 [05:15<00:05,  2.30it/s]

0.0


Processing 4omini_cot:  98%|█████████▊| 489/500 [05:15<00:04,  2.24it/s]

0.0


Processing 4omini_cot:  98%|█████████▊| 490/500 [05:16<00:04,  2.05it/s]

3.0


Processing 4omini_cot:  98%|█████████▊| 491/500 [05:16<00:04,  2.00it/s]

8.0


Processing 4omini_cot:  98%|█████████▊| 492/500 [05:17<00:03,  2.13it/s]

6.0


Processing 4omini_cot:  99%|█████████▊| 493/500 [05:17<00:03,  2.08it/s]

0.0


Processing 4omini_cot:  99%|█████████▉| 494/500 [05:18<00:02,  2.18it/s]

0.0


Processing 4omini_cot:  99%|█████████▉| 495/500 [05:18<00:02,  1.96it/s]

6.0


Processing 4omini_cot:  99%|█████████▉| 496/500 [05:19<00:01,  2.11it/s]

3.0


Processing 4omini_cot:  99%|█████████▉| 497/500 [05:19<00:01,  2.19it/s]

8.0


Processing 4omini_cot: 100%|█████████▉| 498/500 [05:19<00:00,  2.39it/s]

0.0


Processing 4omini_cot: 100%|█████████▉| 499/500 [05:20<00:00,  2.47it/s]

0.0


Processing 4omini_cot: 100%|██████████| 500/500 [05:20<00:00,  1.56it/s]

0.0





In [133]:
# First evaluate correctness using the functions

def extract_complete_answer(row):
    """
    Extracts the complete answer from the question based on the correct answer letter.
    Handles both Yes/No answers and lettered options (A,B,C,etc).
    """
    question = str(row['Question'])
    correct_answer = str(row['Correct Answer']).strip()
    
    # Handle Yes/No answers
    if correct_answer in ['Yes', 'No']:
        return correct_answer
    
    # For lettered options
    try:
        # Split by either newline or comma
        if '\n' in question[-10:]:
            # Handle newline-separated options
            options = [opt.strip() for opt in question.split('\n') if opt.strip()]
            for opt in options:
                if opt.startswith(f"{correct_answer})") or opt.startswith(f"{correct_answer} )"):
                    return opt.strip()
        
        # Handle comma-separated options
        if 'options are:' in question.lower() or 'options:' in question.lower():
            # Find the options part
            options_part = question.split('options are:')[-1] if 'options are:' in question.lower() else question.split('options:')[-1]
            options = [opt.strip() for opt in options_part.split(',')]
            for opt in options:
                if opt.strip().startswith(f"{correct_answer})") or opt.strip().startswith(f"{correct_answer} )"):
                    return opt.strip()
        
        # If no match found, return original answer letter
        return correct_answer
        
    except Exception as e:
        print(f"Error processing question: {e}")
        return correct_answer

# Process confidence values
def process_confidence(df):
    # Convert confidence to numeric, coerce errors to NaN
    df['llm_confidence'] = pd.to_numeric(df['llm_confidence'], errors='coerce')
    
    # Handle non-numeric values
    non_numeric = df[df['llm_confidence'].isna()]
    for idx in non_numeric.index:
        val = str(df.loc[idx, 'llm_confidence'])
        # Try to extract float using regex
        match = re.search(r'-?\d+\.?\d*', val)
        if match:
            try:
                float_val = float(match.group())
                df.loc[idx, 'llm_confidence'] = float_val
            except:
                df.loc[idx, 'llm_confidence'] = -1
        else:
            df.loc[idx, 'llm_confidence'] = -1
            
    return df

def evaluate_llm_answers(df):
    """
    Evaluates LLM answers against correct answers and adds a Correctness column.
    1 = correct answer (matching first letter and answer is substring of complete answer)
    0 = incorrect answer
    -1 = format error
    
    Args:
        df: DataFrame containing 'llm_answer', 'Correct Answer', and 'complete_answer' columns
    
    Returns:
        DataFrame with new Correctness column and evaluation statistics
    """
    # Create Correctness column initialized with -1 (format error)
    df['Correctness'] = -1
    df['complete_answer'] = df.apply(extract_complete_answer, axis=1)
    stats = {
        'total': len(df),
        'correct': 0,
        'incorrect': 0,
        'format_errors': 0,
        'letter_match_only': 0,  # For debugging: cases where letter matches but content doesn't
    }
    
    valid_options = set('ABCDEFGHIJKNY')
    
    for idx, row in df.iterrows():
        llm_answer = str(row['llm_answer']).strip()
        correct_answer = str(row['Correct Answer']).strip()
        complete_answer = str(row['complete_answer']).strip()
        
        # Extract first letter from LLM answer
        first_letter = llm_answer[0] if llm_answer else ''
        
        # Check if first letter is a valid option
        if first_letter not in valid_options:
            stats['format_errors'] += 1
            
        # Check both conditions:
        # 1. First letter matches
        # 2. LLM answer is a substring of complete answer (case-insensitive)
        letter_matches = (first_letter == correct_answer)
        content_matches = (llm_answer.lower() in complete_answer.lower())
        
        if letter_matches:
            df.at[idx, 'Correctness'] = 1
            stats['correct'] += 1
        elif content_matches:
            df.at[idx, 'Correctness'] = 1
            stats['correct'] += 1
        else:
            df.at[idx, 'Correctness'] = 0
            stats['incorrect'] += 1
    
    # Calculate accuracy (excluding format errors)
    valid_answers = stats['total'] - stats['format_errors']
    accuracy = stats['correct'] / valid_answers if valid_answers > 0 else 0
    stats['accuracy'] = accuracy
    
    return df

# Process confidence values
df_4o_cot_sample = process_confidence(df_4o_cot_sample)
df_4omini_cot_sample = process_confidence(df_4omini_cot_sample)

# Evaluate answers
df_4o_cot_sample = evaluate_llm_answers(df_4o_cot_sample)
df_4omini_cot_sample = evaluate_llm_answers(df_4omini_cot_sample)

# Calculate thresholds
threshold_4o = df_4o_cot_sample['reasoning_score'].mean()
threshold_4omini = df_4omini_cot_sample['reasoning_score'].mean()

# Split into high/low categories
df_4o_cot_sample['reasoning_category'] = df_4o_cot_sample['reasoning_score'].apply(lambda x: 'high' if x >= threshold_4o else 'low')
df_4omini_cot_sample['reasoning_category'] = df_4omini_cot_sample['reasoning_score'].apply(lambda x: 'high' if x >= threshold_4omini else 'low')

# Compare metrics for each category
print("\n4O-CoT Results:")
print("High reasoning category:")
print(f"Average correctness: {df_4o_cot_sample[df_4o_cot_sample['reasoning_category']=='high']['Correctness'].mean():.3f}")
print(f"Average confidence: {df_4o_cot_sample[df_4o_cot_sample['reasoning_category']=='high']['llm_confidence'].mean():.3f}")
print("Low reasoning category:")
print(f"Average correctness: {df_4o_cot_sample[df_4o_cot_sample['reasoning_category']=='low']['Correctness'].mean():.3f}")
print(f"Average confidence: {df_4o_cot_sample[df_4o_cot_sample['reasoning_category']=='low']['llm_confidence'].mean():.3f}")

print("\n4O-Mini-CoT Results:")
print("High reasoning category:")
print(f"Average correctness: {df_4omini_cot_sample[df_4omini_cot_sample['reasoning_category']=='high']['Correctness'].mean():.3f}")
print(f"Average confidence: {df_4omini_cot_sample[df_4omini_cot_sample['reasoning_category']=='high']['llm_confidence'].mean():.3f}")
print("Low reasoning category:")
print(f"Average correctness: {df_4omini_cot_sample[df_4omini_cot_sample['reasoning_category']=='low']['Correctness'].mean():.3f}")
print(f"Average confidence: {df_4omini_cot_sample[df_4omini_cot_sample['reasoning_category']=='low']['llm_confidence'].mean():.3f}")


4O-CoT Results:
High reasoning category:
Average correctness: 0.789
Average confidence: 0.961
Low reasoning category:
Average correctness: 0.764
Average confidence: 0.947

4O-Mini-CoT Results:
High reasoning category:
Average correctness: 0.636
Average confidence: 0.962
Low reasoning category:
Average correctness: 0.643
Average confidence: 0.958


NameError: name 'dfs' is not defined

In [134]:
df_4o_cot_sample.to_csv('data/Results/Reasoning_quality/openai_4o_cot_4o_scored.csv', index=False)
df_4omini_cot_sample.to_csv('data/Results/Reasoning_quality/openai_4omini_cot_4o_scored.csv', index=False)

In [131]:
df_4o_cot

Unnamed: 0,Question,Category,Name,Type,Correct Answer,llm_answer,llm_confidence,llm_full_response,token_usage,logprobs,reasoning
0,"A large gene has dozens of exons, of which the...",Molecular Biology,GPQA_main,Science,D,A) polyA tail,0.9,<reasoning>The proposed therapy involves using...,"{'completion_tokens': 148, 'prompt_tokens': 36...","{'content': [{'token': '<', 'bytes': [60], 'lo...",The proposed therapy involves using a Morpholi...
1,Two quantum states with energies E1 and E2 hav...,Physics (general),GPQA_main,Science,B,B) 10^-4 eV,1.0,<reasoning>The ability to resolve two energy l...,"{'completion_tokens': 325, 'prompt_tokens': 30...","{'content': [{'token': '<', 'bytes': [60], 'lo...",The ability to resolve two energy levels is re...
2,trans-cinnamaldehyde was treated with methylma...,Organic Chemistry,GPQA_main,Science,D,B) 10,1.0,<reasoning>Trans-cinnamaldehyde is C9H8O. Trea...,"{'completion_tokens': 183, 'prompt_tokens': 31...","{'content': [{'token': '<', 'bytes': [60], 'lo...",Trans-cinnamaldehyde is C9H8O. Treatment with ...
3,how many of the following compounds exhibit op...,Organic Chemistry,GPQA_main,Science,D,D) 4,0.8,<reasoning>Optical activity is exhibited by co...,"{'completion_tokens': 537, 'prompt_tokens': 39...","{'content': [{'token': '<', 'bytes': [60], 'lo...",Optical activity is exhibited by compounds whi...
4,A coating is applied to a substrate resulting ...,Chemistry (general),GPQA_main,Science,D,A) 134°,0.8,<reasoning>The Cassie-Baxter model suggests th...,"{'completion_tokens': 169, 'prompt_tokens': 35...","{'content': [{'token': '<', 'bytes': [60], 'lo...",The Cassie-Baxter model suggests that the appa...
...,...,...,...,...,...,...,...,...,...,...,...
3143,A land development company was the owner of a4...,ori_mmlu-professional_law,MMLUPro_test,Knowledge,G,G) Fee simple subject to condition subsequent.,0.9,<reasoning>The country club has an interest in...,"{'completion_tokens': 155, 'prompt_tokens': 69...","{'content': [{'token': '<', 'bytes': [60], 'lo...",The country club has an interest in the 50-acr...
3144,There are three main types of buying situatio...,ori_mmlu-marketing,MMLUPro_test,Knowledge,J,J) Buyclasses.,1.0,"<reasoning>Robinson, Faris, and Wind identifie...","{'completion_tokens': 67, 'prompt_tokens': 294...","{'content': [{'token': '<', 'bytes': [60], 'lo...","Robinson, Faris, and Wind identified three mai..."
3145,A department store distributes its overhead ba...,stemez-Business,MMLUPro_test,Knowledge,G,"G) 12%, $640.80",1.0,<reasoning>The overhead percentage is calculat...,"{'completion_tokens': 106, 'prompt_tokens': 39...","{'content': [{'token': '<', 'bytes': [60], 'lo...",The overhead percentage is calculated by divid...
3146,Field and Co. purchased 100 dolls at $3.25 eac...,stemez-Business,MMLUPro_test,Knowledge,E,A) $4.38,1.0,<reasoning>The initial cost for Field and Co. ...,"{'completion_tokens': 217, 'prompt_tokens': 33...","{'content': [{'token': '<', 'bytes': [60], 'lo...",The initial cost for Field and Co. is 100 doll...


In [125]:
df_4o_cot_sample

Unnamed: 0,Question,Category,Name,Type,Correct Answer,llm_answer,llm_confidence,llm_full_response,token_usage,logprobs,reasoning,reasoning_score
2998,"Late one night, an accountant walked into a ba...",ori_mmlu-professional_law,MMLUPro_test,Knowledge,E,C) contributing cause of the pedestrian's inju...,0.9,"<reasoning>In legal terms, the bartender's act...","{'completion_tokens': 123, 'prompt_tokens': 47...","{'content': [{'token': '<', 'bytes': [60], 'lo...","In legal terms, the bartender's act of serving...",0.0
2976,Estimate the PEG ratio for a firm that has the...,theoremQA-Finance,MMLUPro_test,Knowledge,H,B) 0.75,0.9,<reasoning>\nThe PEG ratio is calculated as th...,"{'completion_tokens': 365, 'prompt_tokens': 40...","{'content': [{'token': '<', 'bytes': [60], 'lo...",\nThe PEG ratio is calculated as the Price-to-...,0.0
1065,Context: Every vumpus is not sour. Vumpuses ar...,logical,ProntoQA,Logical,A,A) True,1.0,"<reasoning>According to the given context, a v...","{'completion_tokens': 89, 'prompt_tokens': 355...","{'content': [{'token': '<', 'bytes': [60], 'lo...","According to the given context, a vumpus (Fae)...",0.0
1990,What is the meaning of proportionality in rela...,international_law,MMLU_test,Knowledge,C,C) The victim State may only use such means an...,1.0,<reasoning>Proportionality in international la...,"{'completion_tokens': 139, 'prompt_tokens': 30...","{'content': [{'token': '<', 'bytes': [60], 'lo...","Proportionality in international law, particul...",5.0
2995,A glass vase sells for $25.00. The net profit ...,stemez-Business,MMLUPro_test,Knowledge,E,E) $11.50,1.0,<reasoning>The selling price of the vase is $2...,"{'completion_tokens': 133, 'prompt_tokens': 31...","{'content': [{'token': '<', 'bytes': [60], 'lo...",The selling price of the vase is $25.00. Net p...,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
692,"If you follow these instructions, do you retur...",navigate,BigBench_hard,Spatial,Yes,Yes,1.0,"<reasoning>Starting from the initial point, on...","{'completion_tokens': 72, 'prompt_tokens': 247...","{'content': [{'token': '<', 'bytes': [60], 'lo...","Starting from the initial point, one takes 3 s...",0.0
2121,Carol Collector was a serious antique car buye...,professional_law,MMLU_test,Knowledge,D,"D) Carol, because the ""as is"" disclosure was n...",0.9,<reasoning>The primary issue here is whether C...,"{'completion_tokens': 142, 'prompt_tokens': 45...","{'content': [{'token': '<', 'bytes': [60], 'lo...",The primary issue here is whether Carol was mi...,6.0
2611,What is the greatest common factor of 252 and ...,ori_mmlu-high_school_mathematics,MMLUPro_test,Math,D,D) 12,1.0,<reasoning>To find the greatest common factor ...,"{'completion_tokens': 152, 'prompt_tokens': 27...","{'content': [{'token': '<', 'bytes': [60], 'lo...",To find the greatest common factor (GCF) of 25...,0.0
240,A meta-substituted benzoic acid reacts with a ...,Organic Chemistry,GPQA_main,Science,B,B) 2>1>4>3,0.8,<reasoning>The base that gives a brown precipi...,"{'completion_tokens': 341, 'prompt_tokens': 39...","{'content': [{'token': '<', 'bytes': [60], 'lo...",The base that gives a brown precipitate with N...,0.0


In [127]:
df_4omini_cot_sample['reasoning_score'].mean()

np.float64(2.404)

In [128]:
df_4o_cot_sample['reasoning_score'].mean()

np.float64(2.541082164328657)