/
arxiv_assistant.py
150 lines (115 loc) · 4.81 KB
/
arxiv_assistant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import re
from dotenv import load_dotenv
from feedparser import FeedParserDict
from phasellm.llms import ClaudeWrapper
from phasellm.agents import EmailSenderAgent, RSSAgent
load_dotenv()
# Load OpenAI and newsapi.org API keys.
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
# Load Gmail credentials.
gmail_email = os.getenv("GMAIL_EMAIL")
gmail_password = os.getenv("GMAIL_PASSWORD") # https://myaccount.google.com/u/1/apppasswords
# Set up the LLM
llm = ClaudeWrapper(anthropic_api_key)
def interest_analysis(title: str, abstract: str, interests: str):
interest_analysis_prompt = \
f"""
I want to determine if an academic paper is relevant to my interests. I am interested in: {interests}. The paper
is titled: {title}. It has the following abstract: {abstract}. Is this paper relevant to my interests? Respond
with either 'yes' or 'no'. Do not explain your reasoning.
Example responses are given between the ### ### symbols. Respond exactly as shown in the examples.
###yes###
or
###no###
"""
return llm.text_completion(prompt=interest_analysis_prompt)
def summarize(title: str, abstract: str, interests: str):
"""
This function summarizes why the paper might be relevant to the user's interests.
Args:
title: The title of the paper.
abstract: The abstract of the paper.
interests: The user's interests.
Returns: The summary of why the paper might be relevant to the user's interests.
"""
# Summarize why the paper might be relevant to the user's interests.
summary_prompt = \
f"""
Summarize why the the following paper is relevant to my interests. My interests are: {interests}. The paper is
titled: {title}. It has the following abstract: {abstract}.
"""
return llm.text_completion(prompt=summary_prompt)
def send_email(title: str, abstract: str, link: str, summary: str) -> None:
"""
This function sends an email to the user with the title of the paper and the summary.
Args:
title: The title of the paper.
abstract: The abstract of the paper.
link: The link to the paper.
summary: The summary of the paper.
Returns:
"""
# Send email
print('Sending email...')
content = f"Title: {title}\n\nSummary:\n{summary}\n\nAbstract:\n{abstract}\n\nLink: {link}"
email_agent = EmailSenderAgent(
sender_name='arXiv Assistant',
smtp='smtp.gmail.com',
sender_address=gmail_email,
password=gmail_password,
port=587
)
email_agent.send_plain_email(recipient_email=gmail_email, subject=title, content=content)
def analyze_and_email(paper: FeedParserDict, interests: str, retries: int = 0) -> None:
"""
This function analyzes the latest papers from arXiv and emails the user if any of them are relevant to their
interests.
Args:
paper: The paper to analyze.
interests: The user's interests.
retries: The number of retry attempts made so far.
Returns:
"""
# Allow for a maximum of 1 retry.
max_retries = 1
title = paper['title']
abstract = paper['summary']
link = paper['link']
interested = interest_analysis(title=title, abstract=abstract, interests=interests)
# Find the answer within the response.
answer = re.search(r'###(yes|no)###', interested)
if not answer:
if retries < max_retries:
analyze_and_email(paper=paper, interests=interests, retries=retries + 1)
else:
interested = answer.group(0)
# Send email if the user is interested.
if interested == '###yes###':
summary = summarize(title=title, abstract=abstract, interests=interests)
send_email(title=title, abstract=abstract, link=link, summary=summary)
elif interested == '###no###':
pass
else:
print(f'LLM did not respond in the expected format after {max_retries}. Skipping paper:\n{title}')
def main():
"""
Entry point for the arXiv assistant.
Returns:
"""
# Ask user what they want to read about.
interests = input("What kinds of papers do you want to be notified about?")
papers_processed = 0
rss_agent = RSSAgent(url='https://arxiv.org/rss/cs')
with rss_agent.poll(60) as poller:
for papers in poller():
print(f'Found {len(papers)} new paper(s).')
for paper in papers:
analyze_and_email(
paper=paper,
interests=interests
)
papers_processed += 1
print(f'Processed {papers_processed} paper(s).')
if __name__ == '__main__':
main()