In [11]:
# ! pip install  arxiv
# ! pip install dotenv
# ! pip install anthropic
# ! pip install openai

In [12]:
# APPKey 配置
# vi ~/.zshrc
# export OPENAI_API_KEY="your_openai_api_key"
# export QWEN_API_KEY="your_qwen_api_key"
# source ~/.zshrc

In [13]:
import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv
import anthropic
import openai

In [14]:
PAPER_DIR = "papers"

In [15]:
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    
    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    # Use arxiv to find the papers 
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query = topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)
    
    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info  
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
    
    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)
    
    print(f"Results are saved in: {file_path}")
    
    return paper_ids

In [16]:
search_papers("computers")

Results are saved in: papers/computers/papers_info.json


['1310.7911v2',
 'math/9711204v1',
 '2208.00733v1',
 '2504.07020v1',
 '2403.03925v1']

In [17]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.
    
    Args:
        paper_id: The ID of the paper to look for
        
    Returns:
        JSON string with paper information if found, error message if not found
    """
 
    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    
    return f"There's no saved information related to paper {paper_id}."

In [18]:
# extract_info('1310.7911v2')

In [29]:
# Tools for OpenAI and Anthropic
# tools = [
#     {
#         "name": "search_papers",
#         "description": "Search for papers on arXiv based on a topic and store their information.",
#         "input_schema": {
#             "type": "object",
#             "properties": {
#                 "topic": {
#                     "type": "string",
#                     "description": "The topic to search for"
#                 }, 
#                 "max_results": {
#                     "type": "integer",
#                     "description": "Maximum number of results to retrieve",
#                     "default": 5
#                 }
#             },
#             "required": ["topic"]
#         }
#     },
#     {
#         "name": "extract_info",
#         "description": "Search for information about a specific paper across all topic directories.",
#         "input_schema": {
#             "type": "object",
#             "properties": {
#                 "paper_id": {
#                     "type": "string",
#                     "description": "The ID of the paper to look for"
#                 }
#             },
#             "required": ["paper_id"]
#         }
#     }
# ]

tools = [
    {
        "type": "function",
        "function": {
            "name": "search_papers",
            "description": "Search for papers on arXiv based on a topic and store their information.",
            "input_schema": {
                "type": "object",
                "properties": {
                    "topic": {
                        "type": "string",
                        "description": "The topic to search for"
                    }, 
                    "max_results": {
                        "type": "integer",
                        "description": "Maximum number of results to retrieve",
                        "default": 5
                    }
                },
                "required": ["topic"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "extract_info",
            "description": "Search for information about a specific paper across all topic directories.",
            "input_schema": {
                "type": "object",
                "properties": {
                    "paper_id": {
                        "type": "string",
                        "description": "The ID of the paper to look for"
                    }
                },
                "required": ["paper_id"]
            }
        }
    }
]

In [20]:
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):
    
    result = mapping_tool_function[tool_name](**tool_args)

    if result is None:
        result = "The operation completed but didn't return any results."
        
    elif isinstance(result, list):
        result = ', '.join(result)
        
    elif isinstance(result, dict):
        # Convert dictionaries to formatted JSON strings
        result = json.dumps(result, indent=2)
    
    else:
        # For any other type, convert using str()
        result = str(result)
    return result

In [21]:

from dotenv import load_dotenv
import os
load_dotenv() 

## Claude API setup
# client = anthropic.Anthropic()

## OpenAI API setup
# from openai import OpenAI
# client = OpenAI(
#     api_key=os.environ.get("OPENAI_API_KEY"),
# )

# ALIYUN DashScope API setup
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv("QWEN_API_KEY"),
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
model = "qwen-plus"


In [None]:
def get_response(messages):
    completion = client.chat.completions.create(
        model=model,  # 模型列表：https://help.aliyun.com/zh/model-studio/getting-started/models
        messages=messages,
        tools=tools,
    )
    return completion

def process_query(query):
    
    messages = [{'role': 'user', 'content': query}]

    # QWEN API
    print("-" * 60)
    i = 1
    first_response = get_response(messages)

    print(f"第{i}轮大模型输出信息：{first_response}\n")
    assistant_output = first_response.choices[0].message

    if assistant_output.content is None:
        assistant_output.content = ""
    messages.append(assistant_output)

    # 如果不需要调用工具，则直接返回最终答案
    if assistant_output.tool_calls == None:  # 如果模型判断无需调用工具，则将assistant的回复直接打印出来，无需进行模型的第二轮调用
        print(f"无需调用工具，我可以直接回复：{assistant_output.content}")
        return
    
    # 如果需要调用工具，则进行模型的多轮调用，直到模型判断无需调用工具
    while assistant_output.tool_calls != None:
        tool_info = {
            "content": "",
            "role": "tool",
            "tool_call_id": assistant_output.tool_calls[0].id,
        }

        tool_name = assistant_output.tool_calls[0].function.name
        tool_args = json.loads(assistant_output.tool_calls[0].function.arguments)
        
        print(f"模型决定调用工具: {tool_name}，参数: {tool_args}")
        
        # 执行你定义的函数
        result = execute_tool(tool_name, tool_args)
        
        # 将工具执行结果添加到消息历史中
        tool_info["content"] = result
        tool_info["name"] = tool_name

        print(f"工具 {tool_name} 执行结果: {result}")

        messages.append(tool_info)

        assistant_output = get_response(messages).choices[0].message
        if assistant_output.content is None:
            assistant_output.content = ""
        messages.append(assistant_output)
        i += 1
        print(f"第{i}轮大模型输出信息：{assistant_output}\n")

    print(f"最终答案：{assistant_output.content}")

    # OpenAI API
    # response = client.chat.completions.create(max_tokens = 2024,
    #                               model = model, 
    #                               tools = tools,
    #                               messages = messages,
    #                               tool_choice='auto')
    

    # response_message = response.choices[0].message
    # tool_calls = response_message.tool_calls

    # # 检查模型是否决定调用工具
    # if tool_calls:
    #     # 将模型的回复（包含工具调用请求）添加到消息历史中
    #     messages.append(response_message)

    #     # 执行工具调用
    #     for tool_call in tool_calls:
    #         tool_name = tool_call.function.name
    #         tool_args = json.loads(tool_call.function.arguments)
            
    #         print(f"模型决定调用工具: {tool_name}，参数: {tool_args}")
            
    #         # 执行你定义的函数
    #         result = execute_tool(tool_name, tool_args)
            
    #         # 将工具执行结果添加到消息历史中
    #         messages.append(
    #             {
    #                 "tool_call_id": tool_call.id,
    #                 "role": "tool",
    #                 "name": tool_name,
    #                 "content": result,
    #             }
    #         )
        
    #     # 再次调用模型，让它根据工具返回的结果生成最终回复
    #     second_response = client.chat.completions.create(
    #         model=model,
    #         messages=messages,
    #     )
    #     final_response = second_response.choices[0].message.content
    #     print(f"最终回复: {final_response}")

    # else:
    #     # 如果模型没有调用工具，直接打印它的回复
    #     final_response = response_message.content
    #     print(f"最终回复: {final_response}")

    # anthropic API
    # response = client.messages.create(max_tokens = 2024,
    #                               model = 'claude-3-7-sonnet-20250219', 
    #                               tools = tools,
    #                               messages = messages)

    # res_content = response.content
    # process_query = True
    # while process_query:
    #     assistant_content = []

    #     for content in res_content:
    #         if content.type == 'text':
                
    #             print(content.text)
    #             assistant_content.append(content)
                
    #             if len(res_content) == 1:
    #                 process_query = False
            
    #         elif content.type == 'tool_use':
                
    #             assistant_content.append(content)
    #             messages.append({'role': 'assistant', 'content': assistant_content})
                
    #             tool_id = content.id
    #             tool_args = content.input
    #             tool_name = content.name
    #             print(f"Calling tool {tool_name} with args {tool_args}")
                
    #             result = execute_tool(tool_name, tool_args)
    #             messages.append({"role": "user", 
    #                               "content": [
    #                                   {
    #                                       "type": "tool_result",
    #                                       "tool_use_id": tool_id,
    #                                       "content": result
    #                                   }
    #                               ]
    #                             })
    #             response = client.responses.create(max_tokens = 2024,
    #                               model = 'gpt-4.1', 
    #                               tools = tools,
    #                               messages = messages) 
    #             res_content = response.content

    #             if len(res_content) == 1 and res_content[0].type == "text":
    #                 print(res_content[0].text)
    #                 process_query = False

In [30]:
process_query("What are the latest papers on quantum computing?")

------------------------------------------------------------

第1轮大模型输出信息：ChatCompletion(id='chatcmpl-b0d29e93-860a-95d1-a33f-b928fead1828', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content='', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_952a27b6cc9c450787e328', function=Function(arguments='{"topic": "quantum computing"}', name='search_papers'), type='function', index=0)]))], created=1750674576, model='qwen-plus', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=22, prompt_tokens=184, total_tokens=206, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0)))

模型决定调用工具: search_papers，参数: {'topic': 'quantum computing'}
Results are saved in: papers/quantum_computing/papers_info.json
工具 search_papers 执行结果: 2208.00733v1, quant-ph/0003

In [23]:
def chat_loop():
    print("Type your queries or 'quit' to exit.")
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == 'quit':
                break
    
            process_query(query)
            print("\n")
        except Exception as e:
            print(f"\nError: {str(e)}")