In [10]:
import json

from openai import OpenAI
import os
import pandas as pd
from fc_metric import calculate_fc_using_gpt

In [2]:
if not os.getenv("OPENAI_API_KEY"):
    raise ValueError(
        "Please set the OPENAI_API_KEY environment variable before running the factual consistency metric script"
    )
client = OpenAI()

In [73]:
abstracts_file_name = 'filtered_data_with_abstracts'

In [3]:
df = pd.read_csv(f"../data/{abstracts_file_name}.csv")
df

Unnamed: 0,id,title,url,identifier,sem_scholar_title,sem_scholar_abstract
0,811az5,Firearm Injuries Drop 20 Percent When Gun Owne...,http://www.nejm.org/doi/full/10.1056/NEJMc1712773,DOI:10.1056/NEJMc1712773,Reduction in Firearm Injuries during NRA Annua...,Decline in Firearm Injuries during NRA Convent...
1,814dwi,Supplementation with probiotics during late pr...,http://journals.plos.org/plosmedicine/article?...,DOI:10.1371/journal.pmed.1002507,Diet during pregnancy and infancy and risk of ...,Background There is uncertainty about the infl...
2,815lr6,Undisclosed Conflicts of Interests among Biome...,https://www.ncbi.nlm.nih.gov/pubmed/29400625,PMID:29400625,Undisclosed conflicts of interest among biomed...,ABSTRACT Background: Textbooks are a formative...
3,817tfx,One more species of Tardigrade (famed for thei...,http://journals.plos.org/plosone/article?id=10...,DOI:10.1371/journal.pone.0192210,An integrative description of Macrobiotus shon...,Tardigrade research in Japan dates back over 1...
4,8183ly,There's a20% reduction in gun injuries during ...,http://www.nejm.org/doi/full/10.1056/NEJMc1712773,DOI:10.1056/NEJMc1712773,Reduction in Firearm Injuries during NRA Annua...,Decline in Firearm Injuries during NRA Convent...
...,...,...,...,...,...,...
5753,dpnj23,"Statements about building walls, deportation a...",https://journals.plos.org/plosone/article?id=1...,DOI:10.1371/journal.pone.0222837,Declared impact of the US President’s statemen...,"Statements about building walls, deportation a..."
5754,dpnu7e,Many college students will uncritically accept...,https://journals.plos.org/plosone/article?id=1...,DOI:10.1371/journal.pone.0223736,When calculators lie: A demonstration of uncri...,Calculators are often unnecessary to solve rou...
5755,dpqhem,New method for making polymers with perfectly ...,https://pubs.acs.org/doi/10.1021/jacs.9b08240,DOI:10.1021/jacs.9b08240,Homogenous Synthesis of Monodisperse High Olig...,Whereas monodisperse polymers are ubiquitous i...
5756,dptk4o,Research Shows That Doing a Bad Job Wrapping P...,https://onlinelibrary.wiley.com/doi/epdf/10.10...,DOI:10.1002/jcpy.1140,Presentation Matters: The Effect of Wrapping N...,While gift-givers typically wrap gifts prior t...


In [None]:
df['fc_score'] = -1
if not os.path.exists(f"{abstracts_file_name}_fc_scores.csv"):
    df_out = df.copy()
else:
    df_out = pd.read_csv(f"{abstracts_file_name}_fc_scores.csv")

## Creating a batch request for the factual consistency metric using the lines from min to max

In [74]:
min = 3000
max = 6000
file_name = f"request_abstracts_{abstracts_file_name}_{min}_{max}.jsonl"
with open(file_name, "w+") as f:
    for i, row in df.iterrows():
        if i <= min:
            continue
        if i > max:
            break
        messages = calculate_fc_using_gpt(row["sem_scholar_abstract"], row["title"], row["sem_scholar_title"], False, return_prompt=True)
        input = {
            "custom_id": "row-" + str(i) + "-fc-id-" + str(row["id"]),
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": "gpt-4o-mini",
                "messages": messages
            }
        }
        f.write(json.dumps(input) + "\n")

In [75]:
# Uploading a request file to OpenAI
request_file = client.files.create(
  file=open(file_name, "rb"),
  purpose="batch"
)
request_file

FileObject(id='file-8R1SEAsykzgRP9YDUhTNUw', bytes=7142857, created_at=1732573698, filename='request_abstracts_filtered_data_with_abstracts_3000_6000.jsonl', object='file', purpose='batch', status='processed', status_details=None)

In [70]:
# Creating a batch request using the uploaded input file
batch = client.batches.create(
  input_file_id=request_file.id,
  endpoint="/v1/chat/completions",
  completion_window="24h"
)
batch

Batch(id='batch_6744f82785548190a21d820dc6be5ec8', completion_window='24h', created_at=1732573223, endpoint='/v1/chat/completions', input_file_id='file-VNGtb539pWqoEPAqzFxf11', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1732659623, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

## Retrieving the batch to find out about the completion status

In [76]:
batch = client.batches.retrieve(batch.id)
batch

Batch(id='batch_6744f82785548190a21d820dc6be5ec8', completion_window='24h', created_at=1732573223, endpoint='/v1/chat/completions', input_file_id='file-VNGtb539pWqoEPAqzFxf11', object='batch', status='in_progress', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1732659623, failed_at=None, finalizing_at=None, in_progress_at=1732573225, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=2500))

## Downloading the output file to get the factual consistency scores and adding them to the output_dataframe

In [52]:
response_content = client.files.content(batch.output_file_id)

In [64]:
for line in response_content.iter_lines():
    output = json.loads(line)
    score_json = output['response']["body"]["choices"][0]["message"]["content"]
    score = json.loads(score_json).get("score", -2)
    custom_id = output["custom_id"]
    row_id = int(custom_id.split("-")[1])
    post_id = custom_id.split("-")[4]
    if df.loc[row_id, "id"] != post_id:
        raise ValueError(f"Row ID {row_id} does not match post ID {post_id}")
    df_out.loc[row_id, 'fc_score'] = score
df_out

Unnamed: 0,id,title,url,identifier,sem_scholar_title,sem_scholar_abstract,fc_score
0,811az5,Firearm Injuries Drop 20 Percent When Gun Owne...,http://www.nejm.org/doi/full/10.1056/NEJMc1712773,DOI:10.1056/NEJMc1712773,Reduction in Firearm Injuries during NRA Annua...,Decline in Firearm Injuries during NRA Convent...,4
1,814dwi,Supplementation with probiotics during late pr...,http://journals.plos.org/plosmedicine/article?...,DOI:10.1371/journal.pmed.1002507,Diet during pregnancy and infancy and risk of ...,Background There is uncertainty about the infl...,5
2,815lr6,Undisclosed Conflicts of Interests among Biome...,https://www.ncbi.nlm.nih.gov/pubmed/29400625,PMID:29400625,Undisclosed conflicts of interest among biomed...,ABSTRACT Background: Textbooks are a formative...,5
3,817tfx,One more species of Tardigrade (famed for thei...,http://journals.plos.org/plosone/article?id=10...,DOI:10.1371/journal.pone.0192210,An integrative description of Macrobiotus shon...,Tardigrade research in Japan dates back over 1...,2
4,8183ly,There's a20% reduction in gun injuries during ...,http://www.nejm.org/doi/full/10.1056/NEJMc1712773,DOI:10.1056/NEJMc1712773,Reduction in Firearm Injuries during NRA Annua...,Decline in Firearm Injuries during NRA Convent...,4
...,...,...,...,...,...,...,...
5753,dpnj23,"Statements about building walls, deportation a...",https://journals.plos.org/plosone/article?id=1...,DOI:10.1371/journal.pone.0222837,Declared impact of the US President’s statemen...,"Statements about building walls, deportation a...",-1
5754,dpnu7e,Many college students will uncritically accept...,https://journals.plos.org/plosone/article?id=1...,DOI:10.1371/journal.pone.0223736,When calculators lie: A demonstration of uncri...,Calculators are often unnecessary to solve rou...,-1
5755,dpqhem,New method for making polymers with perfectly ...,https://pubs.acs.org/doi/10.1021/jacs.9b08240,DOI:10.1021/jacs.9b08240,Homogenous Synthesis of Monodisperse High Olig...,Whereas monodisperse polymers are ubiquitous i...,-1
5756,dptk4o,Research Shows That Doing a Bad Job Wrapping P...,https://onlinelibrary.wiley.com/doi/epdf/10.10...,DOI:10.1002/jcpy.1140,Presentation Matters: The Effect of Wrapping N...,While gift-givers typically wrap gifts prior t...,-1


In [65]:
df_out.to_csv("filtered_data_with_abstracts_fc_scores.csv", index=False)