## 测试通过Search的方式来搜索用户

In [6]:
import requests
import json
import os


# 加载配置文件
def load_config():
    config_path = os.path.join(os.path.dirname(os.path.abspath('')), 'config.json')
    try:
        with open(config_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        print("配置文件未找到，使用默认设置")
        return {}


# 定义搜索作者的函数
def search_author(author_name):
    # 加载配置
    config = load_config()
    
    # dblp author 搜索 API 的基础 URL
    base_url = "https://dblp.org/search/author/api"
    
    # 设置查询参数
    params = {
        "q": author_name,  # 查询的作者名称
        "format": "json",  # 返回结果的格式为 JSON
        "h": 10,          # 最多返回 10 个结果
        "f": 0             # 从第 0 个结果开始
    }
    
    # 根据配置决定是否使用代理
    proxies = None
    if config.get('network', {}).get('proxy', {}).get('enabled', False):
        proxy_http = config['network']['proxy']['http']
        proxy_https = config['network']['proxy']['https']
        proxies = {
            'http': proxy_http,
            'https': proxy_https
        }
        print(f"使用代理：{proxy_http}")
    
    # 发送 GET 请求
    response = requests.get(base_url, params=params, proxies=proxies)
    
    # 检查请求是否成功
    if response.status_code == 200:
        # 解析返回的 JSON 数据
        data = response.json()
        return data
    else:
        print(f"请求失败，状态码：{response.status_code}")
        return None


# 解析作者信息并创建候选项
def parse_authors(result):
    if not result or 'result' not in result:
        return []
    
    authors = []
    hits = result['result'].get('hits', {})
    
    if 'hit' in hits:
        hit_list = hits['hit']
        # 如果只有一个结果，hit可能不是list
        if not isinstance(hit_list, list):
            hit_list = [hit_list]
        
        for hit in hit_list:
            info = hit.get('info', {})
            author_name = info.get('author', '未知')
            author_id = hit.get('@id', '未知')
            dblp_url = info.get('url', '未知')
            
            # 提取单位信息
            affiliations = []
            notes = info.get('notes', {})
            if 'note' in notes:
                note_list = notes['note']
                if not isinstance(note_list, list):
                    note_list = [note_list]
                
                for note in note_list:
                    if note.get('@type') == 'affiliation':
                        affiliations.append(note.get('text', ''))
            
            # 提取别名
            aliases = []
            if 'aliases' in info:
                alias_info = info['aliases']
                if 'alias' in alias_info:
                    alias_list = alias_info['alias']
                    if not isinstance(alias_list, list):
                        alias_list = [alias_list]
                    aliases = alias_list
            
            authors.append({
                'name': author_name,
                'id': author_id,
                'url': dblp_url,
                'affiliations': affiliations,
                'aliases': aliases
            })
    
    return authors


# 显示候选项并让用户选择
def select_author(authors):
    if not authors:
        print("没有找到任何作者信息")
        return None
    
    print("\n找到以下作者候选项：")
    print("=" * 80)
    
    for i, author in enumerate(authors, 1):
        print(f"{i}. {author['name']}")
        print(f"   ID: {author['id']}")
        print(f"   URL: {author['url']}")
        
        if author['aliases']:
            print(f"   别名: {', '.join(author['aliases'])}")
        
        if author['affiliations']:
            print(f"   单位: {'; '.join(author['affiliations'])}")
        else:
            print(f"   单位: 未知")
        
        print("-" * 80)
    
    # 让用户选择
    while True:
        try:
            choice = input(f"\n请输入序号 (1-{len(authors)}) 或输入 'q' 退出: ")
            if choice.lower() == 'q':
                return None
            
            choice_num = int(choice)
            if 1 <= choice_num <= len(authors):
                selected_author = authors[choice_num - 1]
                print(f"\n您选择了: {selected_author['name']}")
                return selected_author
            else:
                print(f"请输入有效的序号 (1-{len(authors)})")
        except ValueError:
            print("请输入有效的数字")


# 创建作者文件夹和raw子文件夹
def create_author_directories(author_name):
    # 清理作者姓名，替换空格为下划线，移除特殊字符
    clean_name = author_name.replace(' ', '_').replace('-', '_')
    # 移除可能导致文件系统问题的字符
    clean_name = ''.join(c for c in clean_name if c.isalnum() or c in ('_', '.'))
    
    # 获取当前脚本所在目录的上级目录（项目根目录）
    current_dir = os.path.dirname(os.path.abspath(''))
    users_dir = os.path.join(current_dir, 'users')
    author_dir = os.path.join(users_dir, clean_name)
    raw_dir = os.path.join(author_dir, 'raw')
    
    # 创建目录
    try:
        os.makedirs(raw_dir, exist_ok=True)
        print(f"创建作者目录: {author_dir}")
        print(f"创建raw子目录: {raw_dir}")
        return author_dir, raw_dir
    except Exception as e:
        print(f"创建目录失败: {e}")
        return None, None


# 下载bibtex文件
def download_bibtex(author_url, raw_dir, author_name):
    # 构造bibtex下载URL
    bibtex_url = author_url + ".bib?param=1"
    
    # 加载配置以获取代理设置
    config = load_config()
    
    # 根据配置决定是否使用代理
    proxies = None
    if config.get('network', {}).get('proxy', {}).get('enabled', False):
        proxy_http = config['network']['proxy']['http']
        proxy_https = config['network']['proxy']['https']
        proxies = {
            'http': proxy_http,
            'https': proxy_https
        }
    
    try:
        print(f"正在下载bibtex文件: {bibtex_url}")
        
        # 发送GET请求下载bibtex
        response = requests.get(bibtex_url, proxies=proxies, timeout=30)
        
        if response.status_code == 200:
            # 保存bibtex文件
            clean_name = author_name.replace(' ', '_').replace('-', '_')
            clean_name = ''.join(c for c in clean_name if c.isalnum() or c in ('_', '.'))
            
            bibtex_filename = f"{clean_name}_publications.bib"
            bibtex_path = os.path.join(raw_dir, bibtex_filename)
            
            with open(bibtex_path, 'w', encoding='utf-8') as f:
                f.write(response.text)
            
            print(f"bibtex文件保存成功: {bibtex_path}")
            
            # 简单统计下载的条目数量
            bib_entries = response.text.count('@')
            print(f"下载了 {bib_entries} 个文献条目")
            
            return bibtex_path
        else:
            print(f"下载失败，状态码: {response.status_code}")
            return None
            
    except Exception as e:
        print(f"下载bibtex文件时发生错误: {e}")
        return None


# 主程序
def main():
    # 要搜索的作者
    author_name = input("请输入要搜索的作者姓名: ").strip()
    
    if not author_name:
        print("作者姓名不能为空")
        return
    
    print(f"\n正在搜索作者: {author_name}")
    
    # 调用函数进行搜索
    result = search_author(author_name)
    
    if result:
        # 解析作者信息
        authors = parse_authors(result)
        
        if authors:
            # 限制最多10个候选项
            authors = authors[:10]
            
            # 让用户选择
            selected_author = select_author(authors)
            
            if selected_author:
                print(f"\n最终选择的作者信息：")
                print(json.dumps(selected_author, indent=2, ensure_ascii=False))
                
                # 创建作者文件夹和raw子文件夹
                author_dir, raw_dir = create_author_directories(selected_author['name'])
                
                if author_dir and raw_dir:
                    # 下载bibtex文件
                    bibtex_path = download_bibtex(selected_author['url'], raw_dir, selected_author['name'])
                    
                    if bibtex_path:
                        print(f"\n✅ 作者 {selected_author['name']} 的设置已完成！")
                        print(f"📁 作者目录: {author_dir}")
                        print(f"📄 bibtex文件: {bibtex_path}")
                    else:
                        print(f"\n❌ bibtex文件下载失败")
                else:
                    print(f"\n❌ 创建作者目录失败")
        else:
            print("未找到相关作者信息。")
    else:
        print("搜索失败。")


# 运行主程序
main()


正在搜索作者: haofen wang
使用代理：http://127.0.0.1:33210

找到以下作者候选项：
1. Haofeng Wang
   ID: 3457502
   URL: https://dblp.org/pid/121/0989
   别名: Hao-Feng Wang
   单位: 未知
--------------------------------------------------------------------------------
2. Haofen Wang
   ID: 3457501
   URL: https://dblp.org/pid/63/4317
   单位: Tongji University, College of Design and Innovation, Shanghai, China; Shanghai Jiao Tong University, Shanghai, China
--------------------------------------------------------------------------------

您选择了: Haofen Wang

最终选择的作者信息：
{
  "name": "Haofen Wang",
  "id": "3457501",
  "url": "https://dblp.org/pid/63/4317",
  "affiliations": [
    "Tongji University, College of Design and Innovation, Shanghai, China",
    "Shanghai Jiao Tong University, Shanghai, China"
  ],
  "aliases": []
}
创建作者目录: /Users/gyf/Documents/workplace/AcademicAssistant/PubPeek/users/Haofen_Wang
创建raw子目录: /Users/gyf/Documents/workplace/AcademicAssistant/PubPeek/users/Haofen_Wang/raw
正在下载bibtex文件: https://db