In [1]:
import os
import pandas as pd

# Path to the main directory
main_dir = "../../data/adaptive_consistency_outputs/code-davinci-002/"

# List to store dataframes
dataframes = []

# Iterate over each subdirectory and process files
for sub_dir, _, files in os.walk(main_dir):
    if sub_dir == main_dir:
        continue  # Skip the main directory
    
    # List to store dataframes for the current subdirectory
    sub_dataframes = []
    
    for file in files:
        if file.endswith('.csv'):
            # Read CSV file
            file_path = os.path.join(sub_dir, file)
            df = pd.read_csv(file_path)
            
            # Append the dataframe to the list for the current subdirectory
            sub_dataframes.append(df)
    
    if sub_dataframes:
        # Concatenate dataframes within the current subdirectory
        sub_df = pd.concat(sub_dataframes, ignore_index=True)
        
        # Add a new column 'Name' with the folder name
        folder_name = os.path.basename(sub_dir)
        sub_df['Name'] = folder_name
        
        # Append the subdirectory dataframe to the main list
        dataframes.append(sub_df)

# Print or inspect the list of dataframes
# for df in dataframes:
#     print(f"Dataframe for folder: {df['Name'].iloc[0]}")
#     print(df.head())
#     print()

In [2]:
# Concatenate all dataframes into a single dataframe
final_df_1 = pd.concat(dataframes, ignore_index=True)

# Add a new column 'model' with the value 'code-davinci-002'
final_df_1['model'] = 'code-davinci-002'

In [3]:
final_df_1['Final Answer_0'].value_counts()[:30]

Final Answer_0
no       2348
yes      2173
(A)      1389
(B)      1381
(C)       922
error     829
(D)       428
True      425
2.0       422
4.0       341
5.0       332
7.0       323
8.0       314
3.0       302
1.0       301
False     300
6.0       284
10.0      281
12.0      270
9.0       249
14.0      217
20.0      198
15.0      192
30.0      172
16.0      168
11.0      166
18.0      164
(E)       158
13.0      144
17.0      139
Name: count, dtype: int64

In [4]:
final_df_1['CoT_0']

0        A: Let's think step by step.\nIf we look at (A...
1        A: Let's think step by step.\nIf we look at (A...
2        A: Let's think step by step.\nIf we look at (A...
3        A: Let's think step by step.\nIf we look at (A...
4        A: Let's think step by step.\nIf we look at (A...
                               ...                        
24060    # If the first day of 2019 is a Tuesday, and t...
24061    # If the first day of 2019 is a Tuesday, and t...
24062    # If the first day of 2019 is a Tuesday, and t...
24063    # If the first day of 2019 is a Tuesday, and t...
24064    # If the first day of 2019 is a Tuesday, and t...
Name: CoT_0, Length: 24065, dtype: object

In [5]:
final_df_1['Final Answer_0'].value_counts()[-20:] # 

Final Answer_0
18.46153846153846     1
5.866666666666666     1
790.0                 1
235.20000000000002    1
5.04                  1
11000.0               1
41.142857142857146    1
-1900.0               1
1662.5                1
5.9399999999999995    1
324000.0              1
66.2                  1
564.0                 1
6178.4800000000005    1
1756.0                1
86.66666666666667     1
810.0                 1
-97.66666666666667    1
1607.8125             1
3.571428571428571     1
Name: count, dtype: int64

In [6]:
final_df_1['Final Answer_0'].value_counts().index[8]

'2.0'

In [7]:
final_df_1['Final Answer_0'].value_counts().index[9]

'4.0'

In [8]:
def clean_answer_columns(df):
    # List of column names to clean
    answer_columns = [f"Final Answer_{i}" for i in range(40)] + ["Correct Answer"]
    
    # Iterate over each answer column
    for column in answer_columns:
        # Convert the column to string type
        df[column] = df[column].astype(str)
        
        # Iterate over each value in the column
        for i, value in enumerate(df[column]):
            try:
                # Check if the value can be converted to float
                float_value = float(value)
                
                # Format the float value to have one decimal place
                df.at[i, column] = f"{float_value:.1f}"
            except ValueError:
                # If the value cannot be converted to float, leave it as is
                pass
    
    return df

final_df_1 = clean_answer_columns(final_df_1)
final_df_1['Final Answer_0'].value_counts()[:20]

Final Answer_0
no       2348
yes      2173
(A)      1389
(B)      1381
(C)       922
error     829
(D)       428
2.0       426
True      425
4.0       343
5.0       336
7.0       323
8.0       314
3.0       306
1.0       301
False     300
6.0       285
10.0      284
12.0      273
9.0       249
Name: count, dtype: int64

In [15]:
final_df_1['Model'] = 'code-davinci-002'

In [12]:
main_dir = "../../data/adaptive_consistency_outputs/vicuna-13b/"

# List to store dataframes
dataframes = []

# Iterate over each subdirectory and process files
for sub_dir, _, files in os.walk(main_dir):
    if sub_dir == main_dir:
        continue  # Skip the main directory
    
    # List to store dataframes for the current subdirectory
    sub_dataframes = []
    
    for file in files:
        if file.endswith('.csv'):
            # Read CSV file
            file_path = os.path.join(sub_dir, file)
            try:
                df = pd.read_csv(file_path)
            except:
                print(file)
                break
            
            # Append the dataframe to the list for the current subdirectory
            sub_dataframes.append(df)
    
    if sub_dataframes:
        # Concatenate dataframes within the current subdirectory
        sub_df = pd.concat(sub_dataframes, ignore_index=True)
        
        # Add a new column 'Name' with the folder name
        folder_name = os.path.basename(sub_dir)
        sub_df['Name'] = folder_name
        
        # Append the subdirectory dataframe to the main list
        dataframes.append(sub_df)

salient_translation_seed2_cleaned.csv


In [13]:
len(dataframes)

13

In [14]:
# Concatenate all dataframes into a single dataframe
final_df_2 = pd.concat(dataframes, ignore_index=True)

# Add a new column 'model' with the value 'code-davinci-002'
final_df_2['model'] = 'vicuna-13b'
final_df_2= clean_answer_columns(final_df_2)

In [15]:
final_df_2['Final Answer_0'].value_counts()[:20]

Final Answer_0
no       4106
error    3584
yes      1748
(A)       983
(B)       821
(C)       725
True      343
2.0       321
4.0       304
7.0       285
5.0       280
8.0       274
0.0       261
6.0       254
(D)       254
3.0       249
12.0      237
1.0       236
False     235
9.0       232
Name: count, dtype: int64

In [16]:
final_df = pd.concat([final_df_1,final_df_2]).reset_index(drop=True)

In [17]:
saved_dir = "../../data/adaptive_consistency_outputs/"

final_df.to_csv(os.path.join(saved_dir, "final_asc.csv"), index=False)

In [18]:
final_df

Unnamed: 0,Category,Question,Correct Answer,CoT_0,Final Answer_0,Instruction Violation_0,CoT_1,Final Answer_1,Instruction Violation_1,CoT_2,...,Final Answer_37,Instruction Violation_37,CoT_38,Final Answer_38,Instruction Violation_38,CoT_39,Final Answer_39,Instruction Violation_39,Name,model
0,snarks,Which statement is sarcastic?\nOptions:\n(A) H...,(B),A: Let's think step by step.\nIf we look at (A...,(B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,...,(B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(B),"[(0, 0)]",snarks,code-davinci-002
1,snarks,Which statement is sarcastic?\nOptions:\n(A) H...,(A),A: Let's think step by step.\nIf we look at (A...,(B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,...,(B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(A) or (B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",snarks,code-davinci-002
2,snarks,Which statement is sarcastic?\nOptions:\n(A) J...,(A),A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,either (A) or (B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,...,(A),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",snarks,code-davinci-002
3,snarks,Which statement is sarcastic?\nOptions:\n(A) Y...,(B),A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,...,(B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(B),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(B),"[(0, 0)]",snarks,code-davinci-002
4,snarks,Which statement is sarcastic?\nOptions:\n(A) H...,(A),A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,...,(A),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",A: Let's think step by step.\nIf we look at (A...,(A),"[(0, 0)]",snarks,code-davinci-002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49686,Date,"The first day of 2019 is a Tuesday, and today ...",12/07/2018,"# If the first day of 2019 is a Tuesday, and ...",01/02/2019,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",01/01/2019,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",...,12/02/2018,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",error,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",12/26/2018,"[(0, 0)]",date,vicuna-13b
49687,Date,"The first day of 2019 is a Tuesday, and today ...",01/07/2018,"# If the first day of 2019 is a Tuesday, and ...",01/01/2018,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",01/01/2018,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",...,01/01/2018,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",01/01/2018,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",01/01/2018,"[(0, 0)]",date,vicuna-13b
49688,Date,"The first day of 2019 is a Tuesday, and today ...",12/31/2018,"# If the first day of 2019 is a Tuesday, and ...",12/31/2018,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",2019-01-15 00:00:00,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",...,01/01/2019,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",error,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",12/26/2018,"[(0, 0)]",date,vicuna-13b
49689,Date,"The first day of 2019 is a Tuesday, and today ...",01/14/2019,"# One week from today,\none_week_from_today =...",error,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",error,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",...,error,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",error,"[(0, 0)]","# If the first day of 2019 is a Tuesday, and ...",error,"[(0, 0)]",date,vicuna-13b
