In [9]:
import pandas as pd
import requests
import pickle
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

# 同时调用多个AK
ak_list = [          
            '5XfWZhjzHX1PluMfmVxyF0fYce4G5WeW'
           ]

# 文件路径
path = 'F:/硕士期间资料/TOD/职住OD数据/实验11.csv'

# 选择交通方式(driving, riding, walking, transit)
transport_mode = "riding"

# 读取缓存数据
try:
    with open('OD_result_riding.pkl', 'rb') as f:
        results = pickle.load(f)
    print('成功加载缓存数据')

# 如果缓存数据不存在，则重新爬取
except FileNotFoundError:
    print('缓存数据不存在，开始爬取数据')
    results = []


    # 定义任务函数
    def route_planning(row, ak_index=0):
        # 获取当前 AK
        current_ak = ak_list[ak_index]

       # 获取起点和终点经纬度坐标
        destination = str(row['lat_y']) + ',' + str(row['long_y'])
        origin = str(row['lat_x']) + ',' + str(row['long_x'])
        destinationID = row['Tid_y']
        originID = row['Tid_x']

        # 发送请求到百度路径规划 API
        target = f"https://api.map.baidu.com/directionlite/v1/{transport_mode}?origin={origin}&destination={destination}&coord_type=wgs84&ak={current_ak}"
        req = requests.get(url=target)
        response = req.json()

        # 处理不同状态码的响应
        if response['status'] == 0:
            res_routes = response['result']['routes']
            # 遍历路线列表，求平均距离和时间
            distance = sum([route['distance'] for route in res_routes]) / len(res_routes)
            duration = sum([route['duration'] for route in res_routes]) / len(res_routes)

            status = response['status']
            message = response['message']
            return {
                'origin': originID,
                'destination': destinationID,
                'distance_m': distance,
                'duration_s': duration,
                'status': status,
                'message': message,
                'used_ak': current_ak,
            }

        # 状态码为302，尝试下一个 AK
        elif response['status'] == 302:
            if ak_index + 1 < len(ak_list):
                return route_planning(row, ak_index=ak_index + 1)
            else:
                return {
                    'origin': originID,
                    'destination': destinationID,
                    'distance_m': None,
                    'duration_s': None,
                    'status': None,
                    'message': '所有 AK 都已经用完了',
                    'used_ak': current_ak,
                }

        # 若非预期状态码，则跳过该行
        else:
            return {
                'origin': originID,
                'destination': destinationID,
                'distance_m': None,
                'duration_s': None,
                'status': None,
                'message': 'other error',
                'used_ak': current_ak,
            }


    # 使用多线程执行任务
    with ThreadPoolExecutor(max_workers=4) as executor:
        future_list = []
        # 读取 CSV 文件
        df = pd.read_csv(path)
        for index, row in df.iterrows():
            future = executor.submit(route_planning, row)
            future_list.append(future)

        # 获取每个任务的结果
        for future in tqdm(future_list, total=len(future_list)):
            result = future.result()
            if result['message'] == 'AK 用完了':
                print('AK已经用完,停止爬取')
                break
            results.append(result)

    # 将结果保存到缓存文件中
    with open('OD_result_riding.pkl', 'wb') as f:
        pickle.dump(results, f)
    print('数据已缓存')
# 将结果转化为 DataFrame 格式，并保存到 CSV 文件中
result_df = pd.DataFrame(results)
result_df.to_csv('F:/硕士期间资料/TOD/职住OD数据/OD_riding1.csv', index=False)
print('爬取完毕！！！！！！！！！')

缓存数据不存在，开始爬取数据


100%|██████████| 8/8 [00:00<00:00, 13.30it/s]

数据已缓存
爬取完毕！！！！！！！！！





In [None]:
import pandas as pd
import requests
import pickle
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

ak_list = [          
            '请输入您的AK密钥:'
           ]
path = '实验.csv'

transport_mode = "driving"
try:
    with open('OD_result_driving.pkl', 'rb') as f:
        results = pickle.load(f)
    print('成功加载缓存数据')

except FileNotFoundError:
    print('缓存数据不存在，开始爬取数据')
    results = []

    def route_planning(row, ak_index=0):
        current_ak = ak_list[ak_index]

        destination = str(row['lat_y']) + ',' + str(row['long_y'])
        origin = str(row['lat_x']) + ',' + str(row['long_x'])
        destinationID = row['Tid_y']
        originID = row['Tid_x']

        target = f"https://api.map.baidu.com/directionlite/v1/{transport_mode}?origin={origin}&destination={destination}&coord_type=wgs84&ak={current_ak}"
        req = requests.get(url=target)
        response = req.json()

        if response['status'] == 0:
            res_routes = response['result']['routes']
            distance = sum([route['distance'] for route in res_routes]) / len(res_routes)
            duration = sum([route['duration'] for route in res_routes]) / len(res_routes)

            status = response['status']
            message = response['message']
            return {
                'origin': originID,
                'destination': destinationID,
                'distance_m': distance,
                'duration_s': duration,
                'status': status,
                'message': message,
                'used_ak': current_ak,
            }

        # 状态码为302，尝试下一个 AK
        elif response['status'] == 302:
            if ak_index + 1 < len(ak_list):
                return route_planning(row, ak_index=ak_index + 1)
            else:
                return {
                    'origin': originID,
                    'destination': destinationID,
                    'distance_m': None,
                    'duration_s': None,
                    'status': None,
                    'message': '所有 AK 都已经用完了',
                    'used_ak': current_ak,
                }

        # 若非预期状态码，则跳过该行
        else:
            return {
                'origin': originID,
                'destination': destinationID,
                'distance_m': None,
                'duration_s': None,
                'status': None,
                'message': 'other error',
                'used_ak': current_ak,
            }


    # 使用多线程执行任务
    with ThreadPoolExecutor(max_workers=8) as executor:
        future_list = []
        # 读取 CSV 文件
        df = pd.read_csv(path)
        for index, row in df.iterrows():
            future = executor.submit(route_planning, row)
            future_list.append(future)

        # 获取每个任务的结果
        for future in tqdm(future_list, total=len(future_list)):
            result = future.result()
            if result['message'] == 'AK 用完了':
                print('AK已经用完,停止爬取')
                break
            results.append(result)


    with open('OD_result_driving.pkl', 'wb') as f:
        pickle.dump(results, f)
    print('数据已缓存')

result_df = pd.DataFrame(results)
result_df.to_csv('OD_driving.csv', index=False)
print('爬取完毕！！！！！！！！！')