In [1]:
import os
import logging
import json
import pickle
from tqdm import tqdm
import glob
import re
from itertools import chain
import numpy as np
import pandas as pd
import collections
import seaborn as sns
import matplotlib.pyplot as plt

# Document Retrieval

In [2]:
try:
    # load dir_dict from json file in home directory
    home_dir = os.path.expanduser("~")
    with open(f"{home_dir}/.biomedqa_dir.json", encoding="utf-8") as fp:
        dir_dict = json.load(fp)
except Exception as exc:
    print("Error: unable to load directory dictionary. Please run setup.py")
    raise exc

# set directories
BASE_DIR = dir_dict["base_dir"]
DATA_DIR = dir_dict["data_dir"]
MODEL_DIR = dir_dict["model_dir"]
LOG_DIR = dir_dict["log_dir"]
RESULTS_DIR = dir_dict["results_dir"]


# set parameters
challenge_no = 10
DATASET = "bioasq"
YEAR = "2022"
DATASET_NAME = "Task10BGoldenEnriched"
RESULT_FILE_NAME = "lda_Task10BGoldenEnriched_on_Task10BGoldenEnriched_CmaEsSampler_v1.csv"
RESULTS_DIR = f"{RESULTS_DIR}/biomed_qa/document_retrieval/topic_model/{DATASET}/{YEAR}/{DATASET_NAME}"

# Load data from results_file into a DataFrame
results_df = pd.read_csv(f"{RESULTS_DIR}/{RESULT_FILE_NAME}")
display(results_df.head(10))
# set challenge & metrics
challenge = f"BioASQ Task {challenge_no}b - Phase A"
metrics = results_df.columns[1:-1]
display(metrics)

Unnamed: 0,System,Mean precision,Recall,F-Measure,Batch
0,Proposed,0.305556,0.540372,0.312666,1
1,bio-answerfinder,0.3908,0.417,0.3553,1
2,RYGH-1,0.2889,0.6122,0.2999,1
3,RYGH-4,0.2774,0.6177,0.2943,1
4,RYGH-3,0.2765,0.6162,0.2937,1
5,RYGH,0.273,0.598,0.2911,1
6,The basic end-to-end,0.2496,0.5135,0.2787,1
7,Basic e2e mid speed,0.2396,0.496,0.2668,1
8,gsl_zs_rrf1,0.2311,0.5658,0.2584,1
9,bio-answerfinder-2,0.2613,0.4715,0.2578,1


Index(['Mean precision', 'Recall', 'F-Measure'], dtype='object')

In [4]:
# display top f_measure scores of each batch
index=2
with pd.option_context('display.max_rows', None):  # more options can be specified also
    display(results_df.groupby("Batch").apply(lambda x: x.nlargest(25, metrics[index])).drop(columns=["Batch"]))

Unnamed: 0_level_0,Unnamed: 1_level_0,System,Mean precision,Recall,F-Measure
Batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,bio-answerfinder,0.3908,0.417,0.3553
1,0,Proposed,0.305556,0.540372,0.312666
1,2,RYGH-1,0.2889,0.6122,0.2999
1,3,RYGH-4,0.2774,0.6177,0.2943
1,4,RYGH-3,0.2765,0.6162,0.2937
1,5,RYGH,0.273,0.598,0.2911
1,6,The basic end-to-end,0.2496,0.5135,0.2787
1,7,Basic e2e mid speed,0.2396,0.496,0.2668
1,8,gsl_zs_rrf1,0.2311,0.5658,0.2584
1,9,bio-answerfinder-2,0.2613,0.4715,0.2578


In [None]:
# comparison of each batch
for batch in results_df['batch'].unique():
    # Choose the batch number for which you want to visualize the data
    selected_batch = batch

    # Filter the data for the selected batch
    selected_batch_data = results_df[results_df['batch'] == selected_batch]

    # Melt the dataframe to combine all metrics into a single column
    melted_data = pd.melt(selected_batch_data, id_vars=['system'], value_vars=metrics)

    # Set the bright color palette
    sns.set_palette("bright")

    # Create a grouped bar plot to visualize the comparison of all metrics for each system
    plt.figure(figsize=(12, 6))
    ax = sns.barplot(x='system', y='value', hue='variable', data=melted_data, errorbar=None)

    # Display numeric values on top of each bar
    for p in ax.patches:
        ax.annotate(f'{p.get_height():.2f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='center', xytext=(0, 10), textcoords='offset points')

    # Set the title, x-axis label, and y-axis label
    plt.xlabel('System')
    plt.ylabel('Score')
    title = f"[{challenge}] Comparison of Systems by Each Team (Batch {selected_batch})"
    plt.title(title)
    plt.xticks(rotation=45)
    plt.tight_layout()

    # save the plot as png
    plt.savefig(f"{RESULTS_DIR}/{title}.png")
    # Show the plot
    plt.show()


In [None]:
# Set the bright color palette
sns.set_palette("bright")

# Get unique batch numbers
unique_batches = results_df['batch'].unique()

# Create a grid of plots
num_cols = 3  # Number of columns in the grid
num_rows = -(-len(unique_batches) // num_cols)  # Calculate the number of rows needed

fig, axes = plt.subplots(num_rows, num_cols, figsize=(3*10, 7 * num_rows))

for i, batch in enumerate(unique_batches):
    row_idx = i // num_cols
    col_idx = i % num_cols
    ax = axes[row_idx, col_idx]

    # Filter the data for the selected batch
    selected_batch_data = results_df[results_df['batch'] == batch]

    # Melt the dataframe to combine all metrics into a single column
    melted_data = pd.melt(selected_batch_data, id_vars=['system'], value_vars=metrics)

    # Create a grouped bar plot to visualize the comparison of all metrics for each system
    sns.barplot(x='system', y='value', hue='variable', data=melted_data, ax=ax, errorbar=None)

    # Display numeric values on top of each bar
    for p in ax.patches:
        ax.annotate(f'{p.get_height():.2f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='center', xytext=(0, 10), textcoords='offset points')

    # Set the title, x-axis label, and y-axis label
    ax.set_xlabel('System')
    ax.set_ylabel('Score')
    ax.set_title(f'[{challenge}] Batch {batch}')

    ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
    ax.legend(title='Metrics')

# Adjust layout
plt.tight_layout()

# save the plot as png
title = f'[{challenge}] Comparison of Systems by Team (all in one)'
plt.savefig(f"{RESULTS_DIR}/{title}.png")

# Show the plot
plt.show()


In [None]:
# groupby each system and calculate the mean of each metric
data_grouped = results_df.groupby(['system'])[metrics].mean()
# sort by 'f_measure' column
data_grouped.sort_values(by=metrics[2], ascending=False)

In [None]:
# Melt the dataframe to combine all metrics into a single column
melted_data = pd.melt(results_df, id_vars=['system', 'batch'], value_vars=metrics)

# Set the bright color palette
sns.set_palette("bright")

# Create a grouped bar plot to visualize the comparison of all metrics for each system
plt.figure(figsize=(12, 6))
ax = sns.barplot(x='system', y='value', hue='variable', data=melted_data, errorbar=None)

# Display numeric values on top of each bar
for p in ax.patches:
    ax.annotate(f'{p.get_height():.2f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='center', xytext=(0, 10), textcoords='offset points')

plt.xlabel('System')
plt.ylabel('Score')
title = f"[{challenge}] Comparison of Systems by Each Team (Average of All Batches)"
plt.title(title)
plt.xticks(rotation=45)
plt.tight_layout()

# save the plot
plt.savefig(f"{RESULTS_DIR}/{title}.png")

# Show the plot
plt.show()

In [None]:
# Melt the dataframe to combine all metrics into a single column
melted_data = pd.melt(results_df, id_vars=['system', 'batch'], value_vars=metrics)

# Set the bright color palette
sns.set_palette("bright")

# Create a line plot to visualize the trends over batches for each system and metric
plt.figure(figsize=(12, 6))
sns.lineplot(x='batch', y='value', hue='variable', style='system', data=melted_data, markers=True)
plt.xlabel('Batch')
plt.ylabel('Score')
title = f"[{challenge}] Trends over Batches"
plt.title(title)
plt.legend(loc='upper left', bbox_to_anchor=(1.02, 1), borderaxespad=0.)
plt.tight_layout()

# save the plot
plt.savefig(f"{RESULTS_DIR}/{title}.png")

# Show the plot
plt.show()

In [None]:
# Find the top 3 systems based on metric for the entire dataset
metric = 'f_measure'
top_3_systems = results_df.groupby('system')[metric].mean().nlargest(3).index

# Filter the data to include only the top 3 systems
top_3_data = results_df[results_df['system'].isin(top_3_systems)]

# Melt the dataframe to combine all metrics into a single column
melted_data = pd.melt(top_3_data, id_vars=['system', 'batch'], value_vars=['mean_precision', 'recall', 'f_measure'])

# Set the bright color palette
sns.set_palette("bright")

# Create a line plot to visualize the trends over batches for the top 3 systems and metrics
plt.figure(figsize=(12, 6))
sns.lineplot(x='batch', y='value', hue='variable', style='system', data=melted_data, markers=True)
plt.xlabel('Batch')
plt.ylabel('Score')
title = f"[{challenge}]Trends over Batches for Top 3 Systems (Based on {metric})"
plt.title(title)
plt.legend(loc='upper left', bbox_to_anchor=(1.02, 1), borderaxespad=0.)
plt.tight_layout()

#save the plot
plt.savefig(f"{RESULTS_DIR}/{title}.png")

# Show the plot
plt.show()

# Answer Extraction

In [None]:
try:
    # load dir_dict from json file in home directory
    home_dir = os.path.expanduser("~")
    with open(f"{home_dir}/.biomedqa_dir.json", encoding="utf-8") as fp:
        dir_dict = json.load(fp)
except Exception as exc:
    print("Error: unable to load directory dictionary. Please run setup.py")
    raise exc

# set directories
BASE_DIR = dir_dict["base_dir"]
DATA_DIR = dir_dict["data_dir"]
MODEL_DIR = dir_dict["model_dir"]
LOG_DIR = dir_dict["log_dir"]
RESULTS_DIR = dir_dict["results_dir"]

# set parameters
challenge_no = 10
DATASET = 'bioasq'
YEAR = '2022'
DATASET_NAME = 'Task10BGoldenEnriched'
q_type = 'list'
RESULTS_DIR = f"{RESULTS_DIR}/biomed_qa/answer_extraction/transformer/minilm_ft/{DATASET}/{YEAR}/{DATASET_NAME}"
TEAM_SCORE_FILE = f"{RESULTS_DIR}/{DATASET_NAME}_{q_type}_results_by_team.csv"

# Load team scores into a DataFrame
team_scores_df = pd.read_csv(TEAM_SCORE_FILE)

# remove rows where system == proposed
team_scores_df = team_scores_df[team_scores_df['system'] != 'proposed']

# load proposed scores in to a DataFrame
model = 'list_model_15'
context_scr = 'by_snippets'
PROPOSED_SCORE_DIR = f"{RESULTS_DIR}/{model}/{context_scr}"
PROPOSED_SCORE_FILE = f"{PROPOSED_SCORE_DIR}/{DATASET_NAME}_{q_type}_results.csv"
PROPOSED_SCORE_FILE = f"{PROPOSED_SCORE_DIR}/{DATASET_NAME}_{q_type}_results.csv"
proposed_scores_df = pd.read_csv(PROPOSED_SCORE_FILE)

# add system and phase columns
proposed_scores_df['system'] = 'Proposed'
proposed_scores_df['phase'] = 'A & B'
score_df = pd.concat([team_scores_df, proposed_scores_df], ignore_index=True, sort=False)
score_df = proposed_scores_df
display(score_df)

# set challenge & metrics
challenge = f"BioASQ Task {challenge_no}b - Phase B"
metrics = score_df.columns[2:]
display(metrics)

In [16]:
try:
    # load dir_dict from json file in home directory
    home_dir = os.path.expanduser("~")
    with open(f"{home_dir}/.biomedqa_dir.json", encoding="utf-8") as fp:
        dir_dict = json.load(fp)
except Exception as exc:
    print("Error: unable to load directory dictionary. Please run setup.py")
    raise exc

# set directories
BASE_DIR = dir_dict["base_dir"]
DATA_DIR = dir_dict["data_dir"]
MODEL_DIR = dir_dict["model_dir"]
LOG_DIR = dir_dict["log_dir"]
RESULTS_DIR = dir_dict["results_dir"]

# set parameters
challenge_no = 10
DATASET = 'bioasq'
YEAR = '2022'
DATASET_NAME = 'Task10BGoldenEnriched'
q_type = 'list'
RESULTS_DIR = f"{RESULTS_DIR}/biomed_qa/answer_extraction/transformer/minilm_ft/{DATASET}/{YEAR}/{DATASET_NAME}"
scores_df = pd.read_csv(f"{RESULTS_DIR}/{DATASET_NAME}_factoid_and_list.csv")
display(score_df)

# set challenge & metrics
challenge = f"BioASQ Task {challenge_no}b - Phase B"
metrics = score_df.columns[2:]
display(metrics)

Unnamed: 0,System,Batch,factoid_strict,factoid_lenient,factoid_mrr,list_mean_prec,list_recall,list_F-measure
0,Proposed(golden_snippets),1,0.6176,0.8823,0.7132,0.79676,0.6511,0.66143
1,Proposed(golden_documents),1,0.4411,0.7941,0.5696,0.52710,0.7134,0.55940
2,Proposed(document_retrieval),1,0.3823,0.6764,0.4828,0.23860,0.6380,0.37920
3,UDEL-LAB5,1,0.3824,0.5882,0.4706,0.47140,0.8310,0.58100
4,Ir_sys1,1,0.4118,0.5000,0.4559,0.57140,0.4702,0.48870
...,...,...,...,...,...,...,...,...
170,AUEB-System4,6,0.1667,0.5000,0.3056,0.34670,0.1491,0.19120
171,Ir_sys3,6,0.1667,0.5000,0.2917,0.58520,0.3380,0.37400
172,NCU-IISR-AS-GIS-5,6,0.1667,0.3333,0.2222,0.69330,0.3108,0.34580
173,NCU-IISR/AS-GIS-3,6,0.1667,0.3333,0.2222,0.69330,0.3108,0.34580


Index(['factoid_strict', 'factoid_lenient', 'factoid_mrr', 'list_mean_prec',
       'list_recall', 'list_F-measure'],
      dtype='object')

In [18]:
# display top x metric scores of each batch
index = 2
print(f"Top {metrics[index]} scores of each batch:")
with pd.option_context('display.max_rows', None):  # more options can be specified also
    display(score_df.groupby("Batch").apply(lambda x: x.nlargest(35, metrics[index])).drop(columns=["Batch"]))

Top factoid_mrr scores of each batch:


Unnamed: 0_level_0,Unnamed: 1_level_0,System,factoid_strict,factoid_lenient,factoid_mrr,list_mean_prec,list_recall,list_F-measure
Batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,0,Proposed(golden_snippets),0.6176,0.8823,0.7132,0.79676,0.6511,0.66143
1,1,Proposed(golden_documents),0.4411,0.7941,0.5696,0.5271,0.7134,0.5594
1,2,Proposed(document_retrieval),0.3823,0.6764,0.4828,0.2386,0.638,0.3792
1,3,UDEL-LAB5,0.3824,0.5882,0.4706,0.4714,0.831,0.581
1,4,Ir_sys1,0.4118,0.5,0.4559,0.5714,0.4702,0.4887
1,5,Ir_sys3,0.4118,0.5294,0.4559,0.6224,0.4881,0.5238
1,6,UDEL-LAB2,0.3824,0.5588,0.4534,0.7201,0.8405,0.7469
1,7,UDEL-LAB1,0.3529,0.5882,0.4397,0.6974,0.8226,0.7346
1,8,lalala,0.3824,0.5,0.4363,0.6046,0.7286,0.6459
1,9,Ir_sys2,0.3529,0.5,0.4176,0.5595,0.4738,0.4948


In [None]:
# comparison of each batch
for batch in score_df['batch'].unique():
    # Choose the batch number for which you want to visualize the data
    selected_batch = batch

    # Filter the data for the selected batch
    selected_batch_data = score_df[score_df['batch'] == selected_batch]

    # Melt the dataframe to combine all metrics into a single column
    melted_data = pd.melt(selected_batch_data, id_vars=['system'], value_vars=metrics)

    # Set the bright color palette
    sns.set_palette("bright")

    # Create a grouped bar plot to visualize the comparison of all metrics for each system
    plt.figure(figsize=(12, 6))
    ax = sns.barplot(x='system', y='value', hue='variable', data=melted_data, errorbar=None)

    # Display numeric values on top of each bar
    for p in ax.patches:
        ax.annotate(f'{p.get_height():.2f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='center', xytext=(0, 10), textcoords='offset points')

    # Set the title, x-axis label, and y-axis label
    plt.xlabel('System')
    plt.ylabel('Score')
    title = f'[{challenge}] Comparison of Systems by Team for {q_type} questions Batch {selected_batch} (context={context_scr})'
    plt.title(title)
    plt.xticks(rotation=45)
    plt.tight_layout()

    # save the plot as png
    plt.savefig(f"{PROPOSED_SCORE_DIR}/{title}.png")
    # Show the plot
    plt.show()

In [None]:
# Set the bright color palette
sns.set_palette("bright")

# Get unique batch numbers
unique_batches = score_df['batch'].unique()

# Create a grid of plots
num_cols = 3  # Number of columns in the grid
num_rows = -(-len(unique_batches) // num_cols)  # Calculate the number of rows needed

fig, axes = plt.subplots(num_rows, num_cols, figsize=(3*10, 7 * num_rows))

for i, batch in enumerate(unique_batches):
    row_idx = i // num_cols
    col_idx = i % num_cols
    ax = axes[row_idx, col_idx]

    # Filter the data for the selected batch
    selected_batch_data = score_df[score_df['batch'] == batch]

    # Melt the dataframe to combine all metrics into a single column
    melted_data = pd.melt(selected_batch_data, id_vars=['system'], value_vars=metrics)

    # Create a grouped bar plot to visualize the comparison of all metrics for each system
    sns.barplot(x='system', y='value', hue='variable', data=melted_data, ax=ax, errorbar=None)

    # Display numeric values on top of each bar
    for p in ax.patches:
        ax.annotate(f'{p.get_height():.2f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='center', xytext=(0, 10), textcoords='offset points')

    # Set the title, x-axis label, and y-axis label
    ax.set_xlabel('System')
    ax.set_ylabel('Score')
    ax.set_title(f'[{challenge}] Batch {batch}')

    ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
    ax.legend(title='Metrics')

# Adjust layout
plt.tight_layout()

# save the plot as png
title = f'[{challenge}] Comparison of Systems by Team for {q_type} questions all in one (context={context_scr})'
plt.savefig(f"{PROPOSED_SCORE_DIR}/{title}.png")
# Show the plot
plt.show()


In [None]:
# groupby each system and calculate the mean of each metric
# data_grouped = score_df.groupby(['system']).mean()
data_grouped = score_df.groupby(['system'])[metrics].mean()
# sort by x metric column
index = 2
print(f"sorted by {metrics[index]} scores:")
data_grouped.sort_values(by=metrics[index], ascending=False)

In [None]:
# Melt the dataframe to combine all metrics into a single column
melted_data = pd.melt(score_df, id_vars=['system', 'batch'], value_vars=metrics)

# Set the bright color palette
sns.set_palette("bright")

# Create a grouped bar plot to visualize the comparison of all metrics for each system
plt.figure(figsize=(12, 6))
ax = sns.barplot(x='system', y='value', hue='variable', data=melted_data, errorbar=None)

# Display numeric values on top of each bar
for p in ax.patches:
    ax.annotate(f'{p.get_height():.2f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='center', xytext=(0, 10), textcoords='offset points')

plt.xlabel('System')
plt.ylabel('Score')
# title = f'[{challenge}] Comparison of Systems by Team for {q_type} questions Average of All Batches (context={context_scr})'
title = f'[{challenge}] Comparison of Systems by Team for {q_type} questions Average of All Batches (Golden snippets as input)'
plt.title(title)
plt.xticks(rotation=45)
plt.tight_layout()

# save the plot
plt.savefig(f"{PROPOSED_SCORE_DIR}/{title}.png")
# Show the plot
plt.show()

In [None]:
both_phase_systems = score_df[score_df['phase'] == 'A & B']
# display top x metric scores of each batch
index = 2
print(f"Top {metrics[index]} scores of each batch:")
display(both_phase_systems.groupby("batch").apply(lambda x: x.nlargest(3, metrics[2])))

In [None]:
if context_scr == 'by_lda':
    both_phase_systems = score_df[score_df['phase'] == 'A & B']
    # Melt the dataframe to combine all metrics into a single column
    melted_data = pd.melt(both_phase_systems, id_vars=['system', 'batch'], value_vars=metrics)

    # Set the bright color palette
    sns.set_palette("bright")

    # Create a grouped bar plot to visualize the comparison of all metrics for each system
    plt.figure(figsize=(12, 6))
    ax = sns.barplot(x='system', y='value', hue='variable', data=melted_data, errorbar=None)

    # Display numeric values on top of each bar
    for p in ax.patches:
        ax.annotate(f'{p.get_height():.2f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='center', xytext=(0, 10), textcoords='offset points')

    plt.xlabel('System')
    plt.ylabel('Score')
    title = f'[{challenge}] Systems participated in A & B for {q_type} questions Average of All Batches (context={context_scr})'
    plt.title(title)
    plt.xticks(rotation=45)
    plt.tight_layout()

    # save the plot
    plt.savefig(f"{PROPOSED_SCORE_DIR}/{title}.png")
    # Show the plot
    plt.show()