In [1]:
import pandas as pd
import gzip
import json
import datetime
import os
import re

In [2]:
def remove_parentheses_content(s):
    return re.sub(r"\([^)]*\)", "", s)

In [3]:
date = datetime.datetime.now().strftime("%Y-%m-%d")

In [6]:
temp_data_dir = "../data"

In [21]:
paper_file = os.path.join(temp_data_dir, f"{date}.json.gz")
with gzip.open(paper_file, "rb") as f:
    paper_dict = json.loads(f.read().decode("utf-8"))

In [33]:
paper_dict

{'http://arxiv.org/abs/2312.06707': {'id': 'http://arxiv.org/abs/2312.06707',
  'title': "Exploring Public's Perception of Safety and Video Surveillance Technology: A Survey Approach. (arXiv:2312.06707v1 [cs.CY])",
  'abstract': "Addressing public safety effectively requires incorporating diverse\nstakeholder perspectives, particularly those of the community, which are often\nunderrepresented compared to other stakeholders. This study presents a\ncomprehensive analysis of the community's general public safety concerns, their\nview of existing surveillance technologies, and their perception of AI-driven\nsolutions for enhancing safety in urban environments, focusing on Charlotte,\nNC. Through a survey approach, including in-person surveys conducted in August\nand September 2023 with 410 participants, this research investigates\ndemographic factors such as age, gender, ethnicity, and educational level to\ngain insights into public perception and concerns toward public safety and\npossibl

In [12]:
judgement_file = os.path.join(temp_data_dir, f"{date}.resp.json")
judgement_results = {}
with open(judgement_file) as f:
    for line in f:
        paper = json.loads(line)
        judgement_results[paper['id']] = paper['judgement']

In [16]:
judgement_results['http://arxiv.org/abs/2312.06707']

{'Security of AI and language models': {'relevance': 0,
  'reason': 'The paper does not directly address security of AI and language models.'},
 'Applications of AI and language models in social science research': {'relevance': 0.9,
  'reason': 'The paper directly addresses the application of AI and surveillance technology in social science research, particularly in the context of public safety concerns.'},
 'Using AI to simulate humans in various contexts': {'relevance': 0,
  'reason': ''},
 'Methods to increase the factuality of language model response': {'relevance': 0,
  'reason': ''},
 'AI and language models for generating misinformation or fact-checking': {'relevance': 0,
  'reason': ''}}

In [23]:
for paper_id in paper_dict.keys():
    paper_judgement = judgement_results[paper_id]
    paper_dict[paper_id]['judgement'] = paper_judgement

In [25]:
relevance_threshold = 0.8

In [29]:
relevant_papers = {}
for id, paper_info in paper_dict.items():
    for topic, relevance in paper_info['judgement'].items():
        if relevance['relevance'] > relevance_threshold:
            temp_obj = {
                    "paper_id": id,
                    "relevance": relevance
                }
            if topic in relevant_papers:
                relevant_papers[topic].append(temp_obj)
            else:
                relevant_papers[topic] = [temp_obj]

In [32]:
relevant_papers

{'Applications of AI and language models in social science research': [{'paper_id': 'http://arxiv.org/abs/2312.06707',
   'relevance': {'relevance': 0.9,
    'reason': 'The paper directly addresses the application of AI and surveillance technology in social science research, particularly in the context of public safety concerns.'}},
  {'paper_id': 'http://arxiv.org/abs/2312.06861',
   'relevance': {'relevance': 0.9,
    'reason': 'The paper directly discusses the application of AI-based tools in moderating social media platforms and ensuring safety of conversational AI technologies, which aligns with the topic of applications of AI and language models in social science research.'}},
  {'paper_id': 'http://arxiv.org/abs/2312.07492',
   'relevance': {'relevance': 1,
    'reason': 'The paper directly addresses the application of generative language models in social science research by introducing a comprehensive benchmark to capture the amplification of social bias via stigmas in language