In [1]:
import pandas as pd
from IPython.display import display
import argparse
import os
from aggregate_entity_results import collect_results, aggregate_results
from utils import generate_explanation_prediction

In [2]:
data_dir = '/scratch/dzhang5/LLM/TWEET-FID/'
output_dir = '/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1'
model_name = "refuel-llm"
few_shot_selection = "semantic_similarity"
text_column = "context"
label_symbol = "^^^^"
verified = False
result_dir = '/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1'
unlabeled_dir = '/scratch/dzhang5/LLM/TWEET-FID/second-round/'

In [3]:
def process_multiple_files():
    ori_df = pd.read_csv(data_path)
    predictions_list = []
    for label_type in ['Food', 'Location', 'Symptom', 'Keyword']:
        if verified:
            output_name = os.path.split(model_name)[-1] + '_' + few_shot_selection + '_' + label_type + '_final_' + os.path.split(data_path)[-1]
        else:
            output_name = os.path.split(model_name)[-1] + '_' + few_shot_selection + '_' + label_type + '_' + os.path.split(data_path)[-1]
        output_path = os.path.join(output_dir, output_name)
        predictions = pd.read_csv(output_path)
        predictions_list.append(predictions)
    merged_predictions = predictions_list[0]
    for i in range(1, len(predictions_list)):
        next_prediction = predictions_list[i]
        assert (merged_predictions[text_column] == next_prediction[text_column]).all()
        cols_to_merge = next_prediction.columns.difference(merged_predictions.columns)
        merged_predictions = merged_predictions.join(next_prediction[cols_to_merge], validate='1:1')
    assert (ori_df['context'] == merged_predictions['context']).all()
    merged_predictions['All_answer_successfully_labeled'] = (merged_predictions['Food_answer_successfully_labeled'] & merged_predictions['Location_answer_successfully_labeled'] & merged_predictions['Symptom_answer_successfully_labeled'] & merged_predictions['Keyword_answer_successfully_labeled'])
    for label_type in ['Food', 'Location', 'Symptom', 'Keyword', 'All']:
        display(merged_predictions[f'{label_type}_answer_successfully_labeled'].value_counts())
    unlabeled_dict = {}
    for label_type in ['Food', 'Location', 'Symptom', 'Keyword']:
        unlabeled_dict[label_type] = ori_df.loc[~merged_predictions[f'{label_type}_answer_successfully_labeled']].copy()
        unlabeled_dict[label_type].reset_index(inplace=True)
        unlabeled_dict[label_type].rename(columns={'index':'ori_index'}, inplace=True)
    labeled_df = ori_df.loc[merged_predictions['All_answer_successfully_labeled']].copy()
    labeled_df.reset_index(inplace=True)
    labeled_df.rename(columns={'index':'ori_index'},inplace=True)
    keep_predictions = merged_predictions.loc[merged_predictions['All_answer_successfully_labeled']].copy()
    words_list = []
    for label_type in ['Food', 'Location', 'Symptom', 'Keyword']:
        candidate_words = collect_results(keep_predictions, text_column, label_type, label_symbol)
        words_list.append(candidate_words)
    labeled_df['CategorizedLabels_prediction'] = aggregate_results(words_list)
    labeled_df['sentence_explanation_prediction'] = labeled_df['CategorizedLabels_prediction'].apply(lambda x: generate_explanation_prediction(x, 'N/A'))
    agg_output_suffix_name = os.path.split(data_path)[-1].replace('unlabeled', 'unlabeled-first')
    unlabeled_output_suffix_name = os.path.split(data_path)[-1].replace('unlabeled', 'unlabeled-second')
    if verified:
        agg_output_name = os.path.split(model_name)[-1] + '_' + few_shot_selection + '_aggregated_final_' + agg_output_suffix_name
    else:
        agg_output_name = os.path.split(model_name)[-1] + '_' + few_shot_selection + '_aggregated_' + agg_output_suffix_name
    agg_output_path = os.path.join(result_dir, agg_output_name)
    print(agg_output_path)
    labeled_df.to_csv(agg_output_path, index=False)
    for label_type in ['Food', 'Location', 'Symptom', 'Keyword']:
        unlabeled_output_name = f'{label_type}-{unlabeled_output_suffix_name}'
        unlabeled_output_path = os.path.join(unlabeled_dir, unlabeled_output_name)
        print(unlabeled_output_path)
        unlabeled_dict[label_type].to_csv(unlabeled_output_path, index=False)
    return len(labeled_df)

In [4]:
count_labeled = 0
missing_file = 0
for ix in range(1,21):
    data_name = f'unlabeled-{ix}.csv'
    data_path = os.path.join(data_dir, data_name)
    try:
        count_labeled += process_multiple_files()
    except FileNotFoundError:
        print(f'{data_name} has missing files')
        missing_file +=1
        continue
print("Total Labeled:", count_labeled, 'missing files:', missing_file)

Food_answer_successfully_labeled
True     1364
False    1136
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1283
False    1217
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1388
False    1112
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1367
False    1133
Name: count, dtype: int64

All_answer_successfully_labeled
False    2191
True      309
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-1.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-1.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-1.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-1.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-1.csv


Food_answer_successfully_labeled
True     1447
False    1053
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1521
False     979
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1678
False     822
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1549
False     951
Name: count, dtype: int64

All_answer_successfully_labeled
False    2064
True      436
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-2.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-2.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-2.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-2.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-2.csv


Food_answer_successfully_labeled
True     1403
False    1097
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1273
False    1227
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1485
False    1015
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1412
False    1088
Name: count, dtype: int64

All_answer_successfully_labeled
False    2176
True      324
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-3.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-3.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-3.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-3.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-3.csv


Food_answer_successfully_labeled
True     1388
False    1112
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1535
False     965
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1624
False     876
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1564
False     936
Name: count, dtype: int64

All_answer_successfully_labeled
False    2099
True      401
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-4.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-4.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-4.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-4.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-4.csv


Food_answer_successfully_labeled
True     1406
False    1094
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1411
False    1089
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1537
False     963
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1422
False    1078
Name: count, dtype: int64

All_answer_successfully_labeled
False    2114
True      386
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-5.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-5.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-5.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-5.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-5.csv


Food_answer_successfully_labeled
True     1606
False     894
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1617
False     883
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1572
False     928
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1592
False     908
Name: count, dtype: int64

All_answer_successfully_labeled
False    2000
True      500
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-6.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-6.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-6.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-6.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-6.csv


Food_answer_successfully_labeled
True     1508
False     992
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1625
False     875
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1709
False     791
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1624
False     876
Name: count, dtype: int64

All_answer_successfully_labeled
False    1942
True      558
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-7.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-7.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-7.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-7.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-7.csv


Food_answer_successfully_labeled
True     1306
False    1194
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1365
False    1135
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1477
False    1023
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1354
False    1146
Name: count, dtype: int64

All_answer_successfully_labeled
False    2154
True      346
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-8.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-8.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-8.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-8.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-8.csv


Food_answer_successfully_labeled
True     1361
False    1139
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1403
False    1097
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1451
False    1049
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1353
False    1147
Name: count, dtype: int64

All_answer_successfully_labeled
False    2167
True      333
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-9.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-9.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-9.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-9.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-9.csv


Food_answer_successfully_labeled
True     1539
False     961
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1484
False    1016
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1449
False    1051
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1474
False    1026
Name: count, dtype: int64

All_answer_successfully_labeled
False    2154
True      346
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-10.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-10.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-10.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-10.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-10.csv


Food_answer_successfully_labeled
True     1501
False     999
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1469
False    1031
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1465
False    1035
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1427
False    1073
Name: count, dtype: int64

All_answer_successfully_labeled
False    2184
True      316
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-11.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-11.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-11.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-11.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-11.csv


Food_answer_successfully_labeled
True     1539
False     961
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1425
False    1075
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1533
False     967
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1559
False     941
Name: count, dtype: int64

All_answer_successfully_labeled
False    2156
True      344
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-12.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-12.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-12.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-12.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-12.csv


Food_answer_successfully_labeled
True     1430
False    1070
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1457
False    1043
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1462
False    1038
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1449
False    1051
Name: count, dtype: int64

All_answer_successfully_labeled
False    2166
True      334
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-13.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-13.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-13.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-13.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-13.csv


Food_answer_successfully_labeled
True     1451
False    1049
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1369
False    1131
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1382
False    1118
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1450
False    1050
Name: count, dtype: int64

All_answer_successfully_labeled
False    2179
True      321
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-14.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-14.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-14.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-14.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-14.csv


Food_answer_successfully_labeled
True     1653
False     847
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1563
False     937
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1525
False     975
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1484
False    1016
Name: count, dtype: int64

All_answer_successfully_labeled
False    2074
True      426
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-15.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-15.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-15.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-15.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-15.csv


Food_answer_successfully_labeled
True     1981
False     519
Name: count, dtype: int64

Location_answer_successfully_labeled
True     2131
False     369
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1967
False     533
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1745
False     755
Name: count, dtype: int64

All_answer_successfully_labeled
False    1511
True      989
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-16.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-16.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-16.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-16.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-16.csv


Food_answer_successfully_labeled
True     1723
False     777
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1854
False     646
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1932
False     568
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1791
False     709
Name: count, dtype: int64

All_answer_successfully_labeled
False    1671
True      829
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-17.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-17.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-17.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-17.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-17.csv


Food_answer_successfully_labeled
True     1284
False    1216
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1673
False     827
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     2033
False     467
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1521
False     979
Name: count, dtype: int64

All_answer_successfully_labeled
False    2020
True      480
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-18.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-18.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-18.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-18.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-18.csv


Food_answer_successfully_labeled
True     1924
False     576
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1566
False     934
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1616
False     884
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1622
False     878
Name: count, dtype: int64

All_answer_successfully_labeled
False    2017
True      483
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-19.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-19.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-19.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-19.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-19.csv


Food_answer_successfully_labeled
True     1295
False    1205
Name: count, dtype: int64

Location_answer_successfully_labeled
True     1706
False     794
Name: count, dtype: int64

Symptom_answer_successfully_labeled
True     1847
False     653
Name: count, dtype: int64

Keyword_answer_successfully_labeled
True     1841
False     659
Name: count, dtype: int64

All_answer_successfully_labeled
False    2006
True      494
Name: count, dtype: int64

aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
aggregate entities results ...
/scratch/dzhang5/LLM/TWEET-FID/unlabeled-results-autolabel-ner-qa-mv/0.1/refuel-llm_semantic_similarity_aggregated_unlabeled-first-20.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Food-unlabeled-second-20.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Location-unlabeled-second-20.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Symptom-unlabeled-second-20.csv
/scratch/dzhang5/LLM/TWEET-FID/second-round/Keyword-unlabeled-second-20.csv
Total Labeled: 8955 missing files: 0
