In [36]:
from openai import OpenAI
from openai import AsyncOpenAI
import asyncio
import json
import gzip
import os
from pydantic import BaseModel, Field, RootModel

from typing import Dict

# Load data

In [5]:
root_dir = "../data"

In [6]:
with open("../config.json") as f:
        config = json.load(f)

In [7]:
paper_list = []
with gzip.open(os.path.join(root_dir, "2024-04-23.json.gz")) as f:
    paper_list = json.loads(f.read().decode("utf-8"))

In [8]:
type(paper_list)

dict

In [9]:
temp_paper_list = dict()
for key, value in paper_list.items():
    temp_paper_list[key] = value
    if len(temp_paper_list) > 4:
        break

In [11]:
with open("../data/test_arxiv_paper_info.json", "w") as f:
    json.dump(temp_paper_list, f)

In [12]:
temp_paper_list

{'oai:arXiv.org:2404.13060v1': {'id': 'oai:arXiv.org:2404.13060v1',
  'title': 'The Necessity of AI Audit Standards Boards',
  'abstract': "arXiv:2404.13060v1 Announce Type: new \nAbstract: Auditing of AI systems is a promising way to understand and manage ethical problems and societal risks associated with contemporary AI systems, as well as some anticipated future risks. Efforts to develop standards for auditing Artificial Intelligence (AI) systems have therefore understandably gained momentum. However, we argue that creating auditing standards is not just insufficient, but actively harmful by proliferating unheeded and inconsistent standards, especially in light of the rapid evolution and ethical and safety challenges of AI. Instead, the paper proposes the establishment of an AI Audit Standards Board, responsible for developing and updating auditing methods and standards in line with the evolving nature of AI technologies. Such a body would ensure that auditing practices remain rele

In [42]:
with gzip.open(os.path.join(root_dir, "2024-04-26.json.gz"), 'wb') as f:
    bytes_to_write = json.dumps(temp_paper_list).encode("utf-8")
    f.write(bytes_to_write)

In [4]:
len(paper_list)

195

In [16]:
exp_paper_list = []
for paper in paper_list.values():
    exp_paper_list.append(paper)
    if len(exp_paper_list) > 4:
        break

In [17]:
len(exp_paper_list)

5

In [18]:
exp_paper_list[0]

{'id': 'oai:arXiv.org:2404.13060v1',
 'title': 'The Necessity of AI Audit Standards Boards',
 'abstract': "arXiv:2404.13060v1 Announce Type: new \nAbstract: Auditing of AI systems is a promising way to understand and manage ethical problems and societal risks associated with contemporary AI systems, as well as some anticipated future risks. Efforts to develop standards for auditing Artificial Intelligence (AI) systems have therefore understandably gained momentum. However, we argue that creating auditing standards is not just insufficient, but actively harmful by proliferating unheeded and inconsistent standards, especially in light of the rapid evolution and ethical and safety challenges of AI. Instead, the paper proposes the establishment of an AI Audit Standards Board, responsible for developing and updating auditing methods and standards in line with the evolving nature of AI technologies. Such a body would ensure that auditing practices remain relevant, robust, and responsive to t

In [47]:
class LLMPaperReader:
    system_message = """
        You are an assistant to help the user decide if a paper is very relevant to the topics of interests.
        """

    user_message = """
        Please read the following paper title and abstract:
        --------------
        Title: {title}
        Abstract: {abstract}
        --------------
        Based on the title and abstract, please rate the direct relevance of the paper with the following topics:
        --------------
        {topics}
        --------------
        For each topic, rate the relevance as a number between 0 and 1, where 0 means not relevant and 1 means very relevant.
        The paper MUST directly mention the topics to be relevant; papers with indirect relations and potential implications should have scores close to 0.
        If the paper is relevant to the topic, provide a short explanation; otherwise, leave the explanation empty.
        Use your best guess when you are not sure.
        The output should be in JSON format and follow the following schema:
        --------------
        ```json
        {{
            'topic 1': {{
                'relevance': 0,
                'reason': ''
            }},
            'topic 2': {{
                'relevance': 0.9,
                'reason': 'The paper ....'
            }}
        }}
         ```
    """

    def __init__(self, model, topics):
        self.model = model
        self.topics = topics
        self.client = AsyncOpenAI()

    async def read_papers(self, papers):
        responses = await asyncio.gather(
            *[self.read_paper(paper) for paper in papers]
        )
        return responses

    async def read_paper(self, paper):
        response = await self._call_api(paper)
        response_content = response.choices[0].message.content
        return response_content
        # response_json = json.loads(response_content)
        # return response_json

    async def _call_api(self, paper):
        response = await self.client.chat.completions.create(
            model=self.model,
            response_format={"type": "json_object"},
            messages=[
                {"role": "system", "content": self.system_message},
                {
                    "role": "user",
                    "content": self.user_message.format(
                        title=paper["title"],
                        abstract=paper["abstract"],
                        topics=self.topics,
                    ),
                },
            ],
        )
        return response


In [48]:
llm_reader = LLMPaperReader(config["openai_model"], config["topics"])

In [49]:
results = await llm_reader.read_paper(exp_paper_list[0])

In [50]:
results

'\n    {\n        "Security of AI and language models": {\n            "relevance": 0,\n            "reason": ""\n        },\n        "Applications of AI and language models in social science research": {\n            "relevance": 0,\n            "reason": ""\n        },\n        "Using AI to simulate humans in various contexts": {\n            "relevance": 0,\n            "reason": ""\n        },\n        "Methods to increase the factuality of language model response": {\n            "relevance": 0,\n            "reason": ""\n        },\n        "AI and language models for generating misinformation or fact-checking": {\n            "relevance": 0.8,\n            "reason": "The paper focuses on the importance of auditing AI systems for ethical and safety reasons, which indirectly relates to the need for fact-checking and ensuring accuracy in AI-generated content."\n        }\n    }\n    '

In [37]:
class Judgement(BaseModel):
    relevance: float = Field(ge=0, le=1)
    reason: str

JudgementDict = RootModel[Dict[str, Judgement]]

In [34]:
Judgement.model_json_schema()

{'properties': {'relevance': {'maximum': 1.0,
   'minimum': 0.0,
   'title': 'Relevance',
   'type': 'number'},
  'reason': {'title': 'Reason', 'type': 'string'}},
 'required': ['relevance', 'reason'],
 'title': 'Judgement',
 'type': 'object'}

In [45]:
temp_judgement = results["AI and language models for generating misinformation or fact-checking"]

In [63]:
validation_result = JudgementDict.model_validate_json(results)

In [75]:
validation_result.model_dump_json()

'{"Security of AI and language models":{"relevance":0.0,"reason":""},"Applications of AI and language models in social science research":{"relevance":0.0,"reason":""},"Using AI to simulate humans in various contexts":{"relevance":0.0,"reason":""},"Methods to increase the factuality of language model response":{"relevance":0.0,"reason":""},"AI and language models for generating misinformation or fact-checking":{"relevance":0.8,"reason":"The paper focuses on the importance of auditing AI systems for ethical and safety reasons, which indirectly relates to the need for fact-checking and ensuring accuracy in AI-generated content."}}'

In [70]:
dump_result = validation_result.to_json()

AttributeError: 'RootModel[Dict[str, Judgement]]' object has no attribute 'to_json'

In [69]:
type(dump_result)

dict

In [33]:
print(Judgement.schema_json(indent=2))

{
  "properties": {
    "relevance": {
      "maximum": 1.0,
      "minimum": 0.0,
      "title": "Relevance",
      "type": "number"
    },
    "reason": {
      "title": "Reason",
      "type": "string"
    }
  },
  "required": [
    "relevance",
    "reason"
  ],
  "title": "Judgement",
  "type": "object"
}


In [34]:
results = await llm_reader.read_papers(exp_paper_list)

In [35]:
len(results)

5

In [19]:
loop = asyncio.get_event_loop()

In [22]:
loop.run_until_complete(llm_reader.read_paper(exp_paper_list[0]))

RuntimeError: This event loop is already running

In [46]:
judgement_list = []
with open("../data/2024-04-26.resp.json") as f:
    for line in f:
        temp_judgement = json.loads(line)
        judgement_list.append(temp_judgement)

In [47]:
judgement_list[0]

{'id': 'oai:arXiv.org:2404.13060v1',
 'judgement': {'Security of AI and language models': {'relevance': 0,
   'reason': ''},
  'Applications of AI and language models in social science research': {'relevance': 0,
   'reason': ''},
  'Using AI to simulate humans in various contexts': {'relevance': 0,
   'reason': ''},
  'Methods to increase the factuality of language model response': {'relevance': 0,
   'reason': ''},
  'AI and language models for generating misinformation or fact-checking': {'relevance': 0.9,
   'reason': 'The paper focuses on the auditing of AI systems and proposes the establishment of an AI Audit Standards Board to manage ethical problems and societal risks associated with AI systems. While it does not directly mention generating misinformation or fact-checking, the emphasis on ethical considerations and governance mechanisms aligns with the broader discussion around ensuring the responsible use of AI, which could indirectly relate to fact-checking and misinformation