In [11]:
import csv
import requests

# 设置你的GitHub个人token
github_token = '#已注释'

# 读取CSV文件，CSV文件包含'actor_id'和'label'两列
csv_filename = 'github_bot_label_data.csv'
output_csv_filename = 'github_bot_raw_data.csv'  # 新的CSV文件

# 用于记录已处理的用户数量
processed_count = 0

# 检查是否存在保存进度的文件，如果存在，从中获取processed_count
progress_file = 'progress.txt'
try:
    with open(progress_file, 'r') as progress:
        processed_count = int(progress.read())
except FileNotFoundError:
    pass

# 打开CSV文件并创建新的CSV文件
with open(csv_filename, mode='r', newline='') as file, \
     open(output_csv_filename, mode='a', newline='', encoding='utf-8') as output_file:
    csv_reader = csv.DictReader(file)
    
    # 创建CSV写入器，并指定列名
    fieldnames = ['actor_id', 'label', 'login', 'id', 'node_id', 'avatar_url', 'gravatar_id', 'url', 'html_url',
                  'followers_url', 'following_url', 'gists_url', 'starred_url', 'subscriptions_url', 'organizations_url',
                  'repos_url', 'events_url', 'received_events_url', 'type', 'site_admin', 'name', 'company', 'blog',
                  'location', 'email', 'hireable', 'bio', 'twitter_username', 'public_repos', 'public_gists',
                  'followers', 'following', 'created_at', 'updated_at']
    csv_writer = csv.DictWriter(output_file, fieldnames=fieldnames)
    
    # 如果已经处理的数量大于0，将CSV写入器移到文件的正确位置
    if processed_count > 0:
        for _ in range(processed_count):
            next(csv_reader)  # 跳过已处理的行
    
    # 如果已经处理的数量大于0，将CSV写入器移到文件的正确位置
    csv_writer.writeheader()
    
    # 计数器
    count = 0
    
    for row in csv_reader:
        
        
        actor_id = row['actor_id']
        label = row['label']
        
        # 构建GitHub API请求URL
        user_url = f'https://api.github.com/user/{actor_id}'  # 注意URL的修改
        
        # 设置HTTP头部，包括你的GitHub个人token
        headers = {
            'Authorization': f'token {github_token}',
            'Accept': 'application/vnd.github.v3+json'
        }
        
        print(f'Processing actor_id: {actor_id}')
        
        # 发送请求获取用户信息
        user_response = requests.get(user_url, headers=headers)
        
        if user_response.status_code == 200:
            user_data = user_response.json()
            
            # 将信息写入新的CSV文件
            csv_writer.writerow({
                'actor_id': actor_id,
                'label': label,
                'login': user_data['login'],
                'id': user_data['id'],
                'node_id': user_data['node_id'],
                'avatar_url': user_data['avatar_url'],
                'gravatar_id': user_data['gravatar_id'],
                'url': user_data['url'],
                'html_url': user_data['html_url'],
                'followers_url': user_data['followers_url'],
                'following_url': user_data['following_url'],
                'gists_url': user_data['gists_url'],
                'starred_url': user_data['starred_url'],
                'subscriptions_url': user_data['subscriptions_url'],
                'organizations_url': user_data['organizations_url'],
                'repos_url': user_data['repos_url'],
                'events_url': user_data['events_url'],
                'received_events_url': user_data['received_events_url'],
                'type': user_data['type'],
                'site_admin': user_data['site_admin'],
                'name': user_data['name'],
                'company': user_data['company'],
                'blog': user_data['blog'],
                'location': user_data['location'],
                'email': user_data['email'],
                'hireable': user_data['hireable'],
                'bio': user_data['bio'],
                'twitter_username': user_data['twitter_username'],
                'public_repos': user_data['public_repos'],
                'public_gists': user_data['public_gists'],
                'followers': user_data['followers'],
                'following': user_data['following'],
                'created_at': user_data['created_at'],
                'updated_at': user_data['updated_at']
            })
            
            count += 1
            processed_count += 1
            if processed_count % 10 == 0:  # 每处理10个用户保存一次进度
                with open(progress_file, 'w') as progress:
                    progress.write(str(processed_count))
        else:
            print(f'Error processing actor_id: {actor_id}')
            print(user_response.status_code)
        
        print('------------------------------------')
    
    print(f'Processed {count} actor_ids')

Processing actor_id: 2606959
------------------------------------
Processing actor_id: 3027310
------------------------------------
Processing actor_id: 9423774
------------------------------------
Processing actor_id: 2960492
------------------------------------
Processing actor_id: 46681084
------------------------------------
Processing actor_id: 36771401
------------------------------------
Processing actor_id: 31901150
------------------------------------
Processing actor_id: 79102
------------------------------------
Processing actor_id: 16718781
------------------------------------
Processing actor_id: 46564150
------------------------------------
Processing actor_id: 24581081
------------------------------------
Processing actor_id: 7117978
------------------------------------
Processing actor_id: 13301940
------------------------------------
Processing actor_id: 40892567
------------------------------------
Processing actor_id: 75001577
------------------------------------
Pro

------------------------------------
Processing actor_id: 22865991
------------------------------------
Processing actor_id: 468751
------------------------------------
Processing actor_id: 495429
------------------------------------
Processing actor_id: 465629
------------------------------------
Processing actor_id: 4984825
------------------------------------
Processing actor_id: 8124021
------------------------------------
Processing actor_id: 2046846
------------------------------------
Processing actor_id: 4298407
------------------------------------
Processing actor_id: 417981
------------------------------------
Processing actor_id: 55560129
------------------------------------
Processing actor_id: 10775043
------------------------------------
Processing actor_id: 7897760
------------------------------------
Processing actor_id: 5365358
------------------------------------
Processing actor_id: 67733299
------------------------------------
Processing actor_id: 622699
-----------

------------------------------------
Processing actor_id: 3914230
------------------------------------
Processing actor_id: 51150235
------------------------------------
Processing actor_id: 831648
------------------------------------
Processing actor_id: 59071
------------------------------------
Processing actor_id: 9455094
------------------------------------
Processing actor_id: 9286933
------------------------------------
Processing actor_id: 17270325
------------------------------------
Processing actor_id: 8751635
------------------------------------
Processing actor_id: 51506086
------------------------------------
Processing actor_id: 472311
------------------------------------
Processing actor_id: 13859395
------------------------------------
Processing actor_id: 7804791
------------------------------------
Processing actor_id: 79815764
------------------------------------
Processing actor_id: 18269707
------------------------------------
Processing actor_id: 49533950
-------

------------------------------------
Processing actor_id: 51362316
------------------------------------
Processing actor_id: 24732563
------------------------------------
Processing actor_id: 3030003
------------------------------------
Processing actor_id: 33806646
------------------------------------
Processing actor_id: 30500175
------------------------------------
Processing actor_id: 10523218
------------------------------------
Processing actor_id: 61765381
------------------------------------
Processing actor_id: 19979279
------------------------------------
Processing actor_id: 9317857
------------------------------------
Processing actor_id: 56016372
------------------------------------
Processing actor_id: 1372918
------------------------------------
Processing actor_id: 4258778
------------------------------------
Processing actor_id: 29749331
------------------------------------
Processing actor_id: 9460939
------------------------------------
Processing actor_id: 26720499


------------------------------------
Processing actor_id: 54004431
------------------------------------
Processing actor_id: 681390
------------------------------------
Processing actor_id: 9073706
------------------------------------
Processing actor_id: 2806645
------------------------------------
Processing actor_id: 5239883
------------------------------------
Processing actor_id: 18604620
------------------------------------
Processing actor_id: 16150887
------------------------------------
Processing actor_id: 624195
------------------------------------
Processing actor_id: 23242101
------------------------------------
Processing actor_id: 58790750
------------------------------------
Processing actor_id: 25089914
------------------------------------
Processing actor_id: 3837437
------------------------------------
Processing actor_id: 36676045
------------------------------------
Processing actor_id: 7426149
------------------------------------
Processing actor_id: 27117322
----

KeyboardInterrupt: 