In [31]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("gaia-benchmark/GAIA", "2023_all")
validation = ds["validation"]
print(validation)
type(validation[1]["Annotator Metadata"])

Dataset({
    features: ['task_id', 'Question', 'Level', 'Final answer', 'file_name', 'file_path', 'Annotator Metadata'],
    num_rows: 165
})


dict

In [32]:
import csv
import psycopg2
import json
from psycopg2.extras import Json

# Database connection parameters
db_params = {
 
}
from datasets import load_dataset
import psycopg2
import json
from psycopg2.extras import Json


# Function to insert data into the Tasks table
def insert_task(cursor, task_id, question, level, expected_answer, file_name, file_path, annotations):
    sql = """
    INSERT INTO Tasks (TaskId, Question, ExpectedAnswer, Level, FileName, FilePath, Annotations)
    VALUES (%s, %s, %s, %s, %s, %s, %s)
    ON CONFLICT (TaskId) DO UPDATE SET
    Question = EXCLUDED.Question,
    ExpectedAnswer = EXCLUDED.ExpectedAnswer,
    Level = EXCLUDED.Level,
    FileName = EXCLUDED.FileName,
    FilePath = EXCLUDED.FilePath,
    Annotations = EXCLUDED.Annotations
    """
    cursor.execute(sql, (task_id, question, expected_answer, level, file_name, file_path, Json(annotations)))

# Main function to process the dataset and insert data
def process_dataset():
    # Load the dataset
    ds = load_dataset("gaia-benchmark/GAIA", "2023_all")
    validation_set = ds["validation"]

    conn = psycopg2.connect(**db_params)
    cursor = conn.cursor()

    try:
        for row in validation_set:
            task_id = row['task_id']
            question = row['Question']
            level = row['Level']
            expected_answer = row['Final answer']
            file_name = row['file_name'] if row['file_name'] else None
            file_path = row['file_path'] if row['file_path'] else None
            annotations = row['Annotator Metadata']

            insert_task(cursor, task_id, question, level, expected_answer, file_name, file_path, annotations)

        conn.commit()
        print(f"Data inserted successfully! {validation_set.num_rows} rows processed.")

    except (Exception, psycopg2.Error) as error:
        print("Error while inserting data:", error)
        conn.rollback()

    finally:
        if conn:
            cursor.close()
            conn.close()

# Run the script
if __name__ == "__main__":
    process_dataset()

Data inserted successfully! 165 rows processed.


In [52]:
from datetime import datetime
from sqlalchemy import Boolean, DateTime
# data_layer.a

from sqlalchemy import create_engine, Column, Integer, String, JSON
# from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, declarative_base

Base = declarative_base()
from sqlalchemy import create_engine, Column, Integer, String, JSON, Boolean, DateTime
from sqlalchemy.orm import sessionmaker, declarative_base
from datetime import datetime

Base = declarative_base()

class Task(Base):
    __tablename__ = 'tasks'

    taskid = Column(String, primary_key=True, name="taskid")  # lowercase column names
    question = Column(String, nullable=False, name="question")
    expectedanswer = Column(String, name="expectedanswer")
    level = Column(Integer, name="level")
    filename = Column(String, name="filename")
    filepath = Column(String, name="filepath")
    annotations = Column(JSON, name="annotations")

    def __repr__(self):
        return f"<Task(taskid='{self.taskid}', question='{self.question[:30]}...', level={self.level})>"

class LLM(Base):
    __tablename__ = 'llms'

    llmid = Column(Integer, primary_key=True, name="llmid")
    llmname = Column(String, nullable=False, name="llmname")
    version = Column(String, name="version")
    parameters = Column(String, name="parameters")

    def __repr__(self):
        return f"<LLM(llmid={self.llmid}, llmname='{self.llmname}', version='{self.version}')>"

class LLMResponse(Base):
    __tablename__ = 'llmresponses'

    responseid = Column(Integer, primary_key=True, name="responseid")
    taskid = Column(String, name="taskid")
    llmid = Column(Integer, name="llmid")
    responsetext = Column(String, nullable=False, name="responsetext")
    isannotated = Column(Boolean, default=False, name="isannotated")
    resultcategory = Column(String, name="resultcategory")
    timestamp = Column(DateTime, default=datetime.utcnow, name="timestamp")

    def __repr__(self):
        return f"<LLMResponse(responseid={self.responseid}, taskid='{self.taskid}', llmid={self.llmid})>"

# Database connection
DATABASE_URL = ""
engine = create_engine(DATABASE_URL)
SessionMaker = sessionmaker(bind=engine)


In [58]:
# data_access.py

from sqlalchemy.orm import Session
# from data_layer import Task, LLM, LLMResponse

class DataAccess:
    def __init__(self, session: Session):
        self.session = session

    def get_all_tasks(self):
        return self.session.query(Task).all()

    def get_task_by_id(self, task_id: str):
        return self.session.query(Task).filter(Task.taskid == task_id).first()

    def get_tasks_by_level(self, level: int):
        return self.session.query(Task).filter(Task.level == level).all()

    def get_all_llms(self):
        return self.session.query(LLM).all()

    def get_llm_by_id(self, llm_id: int):
        return self.session.query(LLM).filter(LLM.llmid == llm_id).first()

    def get_responses_for_task(self, task_id: str):
        return self.session.query(LLMResponse).filter(LLMResponse.taskid == task_id).all()

    def get_responses_for_llm(self, llm_id: int):
        return self.session.query(LLMResponse).filter(LLMResponse.llmid == llm_id).all()

In [57]:
# main.py

# from sqlalchemy.orm import Session
# from data_layer import engine, Session as SessionMaker
# from data_access import DataAccess

def main():
    session = SessionMaker()
    data_access = DataAccess(session)

    try:
        # Get all tasks
        all_tasks = data_access.get_all_tasks()
        print(f"Total tasks: {len(all_tasks)}")

        # Get a specific task
        task = data_access.get_task_by_id("c61d22de-5f6c-4958-a7f6-5e9707bd3466")
        if task:
            print(f"Task question: {task.question}")
            print(f"Task expected answer: {task.expectedanswer}")
            print(f"Task annotations: {task.annotations}")

        # Get tasks by level
        level_2_tasks = data_access.get_tasks_by_level(2)
        print(f"Level 2 tasks: {len(level_2_tasks)}")

        # Get all LLMs
        all_llms = data_access.get_all_llms()
        print(f"Total LLMs: {len(all_llms)}")

        # Get responses for a specific task
        task_responses = data_access.get_responses_for_task("c61d22de-5f6c-4958-a7f6-5e9707bd3466")
        print(f"Responses for task: {len(task_responses)}")

    finally:
        session.close_all()
        

if __name__ == "__main__":
    main()

Total tasks: 165
Task question: A paper about AI regulation that was originally submitted to arXiv.org in June 2022 shows a figure with three axes, where each axis has a label word at both ends. Which of these words is used to describe a type of society in a Physics and Society article submitted to arXiv.org on August 11, 2016?
Task expected answer: egalitarian
Task annotations: {'Steps': '1. Go to arxiv.org and navigate to the Advanced Search page.\n2. Enter "AI regulation" in the search box and select "All fields" from the dropdown.\n3. Enter 2022-06-01 and 2022-07-01 into the date inputs, select "Submission date (original)", and submit the search.\n4. Go through the search results to find the article that has a figure with three axes and labels on each end of the axes, titled "Fairness in Agreement With European Values: An Interdisciplinary Perspective on AI Regulation".\n5. Note the six words used as labels: deontological, egalitarian, localized, standardized, utilitarian, and cons

  session.close_all()
