## The repeatability analysis of structured reports generated by GPT-4-turbo, Institution A

In [4]:
import pandas as pd
import numpy as np
from scipy.stats import mode
from scipy.stats import norm
from statsmodels.stats.proportion import proportion_confint

# Load the CSV files
session1 = pd.read_excel('session1_final.xlsx')
session2 = pd.read_excel('session2_final.xlsx')
session3 = pd.read_excel('session3_final.xlsx')

# Merge the data on Study_ID
merged_data = pd.merge(pd.merge(session1, session2, on='Study_ID', suffixes=('_s1', '_s2')),
                       session3, on='Study_ID')

# Function to determine the representative value, tie status, and columns where tie occurred
def determine_representative_value(row):
    representative_value = {}
    tie_status = {}
    tie_columns = []
    
    for column in session1.columns[1:]:  # Skip the 'Study_ID' column
        values = [row[f"{column}_s1"], row[f"{column}_s2"], row[column]]
        most_common_value = pd.Series(values).mode()
        
        if len(most_common_value) == 1:
            representative_value[column] = most_common_value[0]
            tie_status[column] = 0
        else:
            representative_value[column] = values[0]  # Use the value from session1
            tie_status[column] = 1
            tie_columns.append(column)
    
    representative_value['tie_columns'] = ','.join(tie_columns)
    return pd.Series({**representative_value, **{f"{k}_tie": v for k, v in tie_status.items()}})

# Apply the function to determine the representative value, tie status, and tie columns for each row
conclusion_data = merged_data.apply(determine_representative_value, axis=1)

# Add Study_ID to the conclusion data
conclusion_data['Study_ID'] = merged_data['Study_ID']

# Create the 'tie' column for the final conclusion data
tie_columns = [f"{column}_tie" for column in session1.columns[1:]]
conclusion_data['tie'] = conclusion_data[tie_columns].max(axis=1)

# Select only the relevant columns for the conclusion data
final_columns = ['Study_ID'] + session1.columns[1:].tolist() + ['tie', 'tie_columns']
conclusion_data = conclusion_data[final_columns]

# Save the conclusion data to a new CSV file
conclusion_data.to_excel('conclusion_final.xlsx', index=False)

print("Conclusion data has been saved to conclusion_final.xlsx")

Conclusion data has been saved to conclusion_final.xlsx


## 1. free-marginal multirater kappa

In [2]:
# Function to calculate free-marginal multirater kappa
def calculate_free_marginal_kappa(data, columns):
    # Extract relevant columns
    data_subset = data[columns]
    
    # Calculate observed agreement (Po)
    agreement_count = (data_subset.nunique(axis=1) == 1).sum()
    total_count = len(data_subset)
    Po = agreement_count / total_count
    
    # Calculate expected agreement by chance (Pe)
    num_categories = data_subset.apply(lambda col: col.nunique()).max()
    Pe = 1 / num_categories
    
    # Calculate free-marginal kappa
    kappa_free = (Po - Pe) / (1 - Pe)
    
    return kappa_free, Po, Pe

# Function to calculate standard error and confidence intervals
def calculate_confidence_interval(kappa, Po, Pe, n, z=1.96):
    # Calculate standard error
    se_kappa = np.sqrt((Po * (1 - Po)) / (n * (1 - Pe)**2))
    
    # Calculate confidence intervals
    ci_lower = kappa - z * se_kappa
    ci_upper = kappa + z * se_kappa
    
    # Adjust confidence interval bounds to be within [-1, 1]
    ci_lower = max(ci_lower, -1)
    ci_upper = min(ci_upper, 1)
    
    return ci_lower, ci_upper

# Columns to analyze
columns_to_analyze = {
    '1-1. Appearance': ['1-1. Appearance_s1', '1-1. Appearance_s2', '1-1. Appearance'],
    '1-2. Size': ['1-2. Size_s1', '1-2. Size_s2', '1-2. Size'],
    '1-3. Location': ['1-3. Location_s1', '1-3. Location_s2', '1-3. Location'],
    '1-4. Pancreatic duct': ['1-4. Pancreatic duct_s1', '1-4. Pancreatic duct_s2', '1-4. Pancreatic duct'],
    '1-5. Biliary tree': ['1-5. Biliary tree_s1', '1-5. Biliary tree_s2', '1-5. Biliary tree'],
    '2-1. SMA Contact': ['2-1. SMA Contact_s1', '2-1. SMA Contact_s2', '2-1. SMA Contact'],
    '2-2. Celiac Axis Contact': ['2-2. Celiac Axis Contact_s1', '2-2. Celiac Axis Contact_s2', '2-2. Celiac Axis Contact'],
    '2-3. CHA Contact': ['2-3. CHA Contact_s1', '2-3. CHA Contact_s2', '2-3. CHA Contact'],
    '3-1. MPV Contact': ['3-1. MPV Contact_s1', '3-1. MPV Contact_s2', '3-1. MPV Contact'],
    '3-2. SMV Contact': ['3-2. SMV Contact_s1', '3-2. SMV Contact_s2', '3-2. SMV Contact'],
    '4-1. Liver lesions': ['4-1. Liver lesions_s1', '4-1. Liver lesions_s2', '4-1. Liver lesions'],
    '4-2. Peritoneal or omental nodules': ['4-2. Peritoneal or omental nodules_s1', '4-2. Peritoneal or omental nodules_s2', '4-2. Peritoneal or omental nodules'],
    '4-3. Ascites': ['4-3. Ascites_s1', '4-3. Ascites_s2', '4-3. Ascites'],
    '4-4. Suspicious lymph nodes': ['4-4. Suspicious lymph nodes_s1', '4-4. Suspicious lymph nodes_s2', '4-4. Suspicious lymph nodes'],
    '4-5. Other extrapancreatic disease': ['4-5. Other extrapancreatic disease_s1', '4-5. Other extrapancreatic disease_s2', '4-5. Other extrapancreatic disease'],
    '5-1. Resectability': ['5-1. Resectability_s1', '5-1. Resectability_s2', '5-1. Resectability']
}

# Calculate free-marginal multirater kappa and confidence intervals for each category
results = {}
for key, columns in columns_to_analyze.items():
    kappa, Po, Pe = calculate_free_marginal_kappa(merged_data, columns)
    ci_lower, ci_upper = calculate_confidence_interval(kappa, Po, Pe, len(merged_data))
    results[key] = {
        'kappa': kappa,
        '95% CI lower': ci_lower,
        '95% CI upper': ci_upper
    }

results_df = pd.DataFrame(results).T

# Display the results
print(results_df)


                                       kappa  95% CI lower  95% CI upper
1-1. Appearance                     0.922705      0.871039      0.974371
1-2. Size                           0.991127      0.973811      1.000000
1-3. Location                       0.989565      0.969202      1.000000
1-4. Pancreatic duct                0.908696      0.843147      0.974244
1-5. Biliary tree                   0.960870      0.917171      1.000000
2-1. SMA Contact                    0.960870      0.917171      1.000000
2-2. Celiac Axis Contact            0.967391      0.930976      1.000000
2-3. CHA Contact                    0.988406      0.965780      1.000000
3-1. MPV Contact                    0.956522      0.914661      0.998383
3-2. SMV Contact                    0.945652      0.899061      0.992243
4-1. Liver lesions                  0.902174      0.840813      0.963535
4-2. Peritoneal or omental nodules  1.000000      1.000000      1.000000
4-3. Ascites                        1.000000      1

## 2. percent agreement

In [5]:
# Function to calculate percent agreement and 95% CI
def calculate_percent_agreement_with_ci(df1, df2, df3, alpha=0.05):
    agreement_count = 0
    total_count = len(df1)
    
    for i in range(total_count):
        if df1[i] == df2[i] == df3[i]:
            agreement_count += 1
    
    percent_agreement = (agreement_count / total_count) * 100
    
    # Calculate confidence interval
    ci_low, ci_upp = proportion_confint(count=agreement_count, nobs=total_count, alpha=alpha, method='wilson')
    ci_low *= 100
    ci_upp *= 100
    
    return percent_agreement, ci_low, ci_upp

# Columns to analyze
columns_to_analyze = {
    '1-1. Appearance': ['1-1. Appearance_s1', '1-1. Appearance_s2', '1-1. Appearance'],
    '1-2. Size': ['1-2. Size_s1', '1-2. Size_s2', '1-2. Size'],
    '1-3. Location': ['1-3. Location_s1', '1-3. Location_s2', '1-3. Location'],
    '1-4. Pancreatic duct': ['1-4. Pancreatic duct_s1', '1-4. Pancreatic duct_s2', '1-4. Pancreatic duct'],
    '1-5. Biliary tree': ['1-5. Biliary tree_s1', '1-5. Biliary tree_s2', '1-5. Biliary tree'],
    '2-1. SMA Contact': ['2-1. SMA Contact_s1', '2-1. SMA Contact_s2', '2-1. SMA Contact'],
    '2-2. Celiac Axis Contact': ['2-2. Celiac Axis Contact_s1', '2-2. Celiac Axis Contact_s2', '2-2. Celiac Axis Contact'],
    '2-3. CHA Contact': ['2-3. CHA Contact_s1', '2-3. CHA Contact_s2', '2-3. CHA Contact'],
    '3-1. MPV Contact': ['3-1. MPV Contact_s1', '3-1. MPV Contact_s2', '3-1. MPV Contact'],
    '3-2. SMV Contact': ['3-2. SMV Contact_s1', '3-2. SMV Contact_s2', '3-2. SMV Contact'],
    '4-1. Liver lesions': ['4-1. Liver lesions_s1', '4-1. Liver lesions_s2', '4-1. Liver lesions'],
    '4-2. Peritoneal or omental nodules': ['4-2. Peritoneal or omental nodules_s1', '4-2. Peritoneal or omental nodules_s2', '4-2. Peritoneal or omental nodules'],
    '4-3. Ascites': ['4-3. Ascites_s1', '4-3. Ascites_s2', '4-3. Ascites'],
    '4-4. Suspicious lymph nodes': ['4-4. Suspicious lymph nodes_s1', '4-4. Suspicious lymph nodes_s2', '4-4. Suspicious lymph nodes'],
    '4-5. Other extrapancreatic disease': ['4-5. Other extrapancreatic disease_s1', '4-5. Other extrapancreatic disease_s2', '4-5. Other extrapancreatic disease'],
    '5-1. Resectability': ['5-1. Resectability_s1', '5-1. Resectability_s2', '5-1. Resectability']
}

# Calculate percent agreement and 95% CI for each category
percent_agreement_results = {}

for key, columns in columns_to_analyze.items():
    df1 = merged_data[columns[0]].values
    df2 = merged_data[columns[1]].values
    df3 = merged_data[columns[2]].values
    percent_agreement, ci_low, ci_upp = calculate_percent_agreement_with_ci(df1, df2, df3)
    percent_agreement_results[key] = {
        'Percent Agreement': percent_agreement,
        '95% CI Lower': ci_low,
        '95% CI Upper': ci_upp
    }

# Convert the results to a DataFrame for better visualization
results_df = pd.DataFrame(percent_agreement_results).T

# Display the results
print(results_df)


                                    Percent Agreement  95% CI Lower  \
1-1. Appearance                             93.043478     86.871127   
1-2. Size                                   99.130435     95.238327   
1-3. Location                               99.130435     95.238327   
1-4. Pancreatic duct                        93.913043     87.966688   
1-5. Biliary tree                           97.391304     92.609938   
2-1. SMA Contact                            97.391304     92.609938   
2-2. Celiac Axis Contact                    97.391304     92.609938   
2-3. CHA Contact                            99.130435     95.238327   
3-1. MPV Contact                            96.521739     91.396704   
3-2. SMV Contact                            95.652174     90.224215   
4-1. Liver lesions                          92.173913     85.793118   
4-2. Peritoneal or omental nodules         100.000000     96.767577   
4-3. Ascites                               100.000000     96.767577   
4-4. S