In [9]:
import pandas as pd
import os
import requests
import pickle
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

# 同时调用多个AK
ak_list = [
    'a921560a711ecd02e4e011483cbb39b2',
    '5dcb1fd4f235717c3c410efb5df7afa8',
    '0ea4341b29e6c4816145ea86547cb2fe',
    '6ec305e257b196456bb402e3edcbbc86',
    'a0dcf4222f981a34f7a40e36d26fb2fb',
    '9ebcb8c71702d58b7e671afdfe98bbc2',
    'a5493569e4e4a7fa18be63325439ab12',
    '94814996c9d28133ed7f6fd9298be06c',
    'b5a24a736053008b41ac71fe84d9808d',
    '43cb0d60e1529a436bc1908e27b64731',
    '494a231b7d01bae8c8d46b07c2d26399',
    '5909b04d3e153d587099c40a028e327c',
    '560377c89c058d660d1f5213f3dff434',
]

In [11]:
# 文件路径(换文件名)
path = r'D:\新建文件夹\划分数据\划分数据\2_刘富强Part2至Part3\Part18.csv'

# 选择交通方式
transport_mode = "transit"

# 读取缓存数据
try:
    with open('cached.pkl', 'rb') as f:
        results = pickle.load(f)
    print('Successfully loaded cached data')

# 如果缓存数据不存在，则开始爬取
except FileNotFoundError:
    print('Start crawling data')
    results = []

    # 定义任务函数
    def route_planning(row, ak_index=0):
        # 获取当前 AK
        current_ak = ak_list[ak_index]

        # 获取起点和终点经纬度坐标
        origin = str(row['O_Lat']) + ',' + str(row['O_Lon'])
        destination = str(row['D_Lat']) + ',' + str(row['D_Lon'])
        O_Lat, O_Lon, D_Lat, D_Lon = row['O_Lat'], row['O_Lon'], row['D_Lat'], row['D_Lon']
        OID = row['origin']
        DID = row['destination']

        # 发送请求到百度路径规划 API
        target = f"https://api.map.baidu.com/directionlite/v1/{transport_mode}?origin={origin}&destination={destination}&coord_type=wgs84&ak={current_ak}"
        req = requests.get(url=target)
        response = req.json()

        # 处理不同状态码的响应
        if response['status'] == 0:
            # 计算总路程和时间
            Total_Dis = response['result']['routes'][0]['distance']
            Total_Dur = response['result']['routes'][0]['duration']
            # 计算实际路程和时间
            Actual_Dis = 0
            Actual_Dur = 0
            Steps = response['result']['routes'][0]['steps']
            for i in range(len(Steps)):
                Actual_Dis += Steps[i][0]['distance']
                Actual_Dur += Steps[i][0]['duration']
            # 计算前后步行路程和时间
            Walk_1_Dis = response['result']['routes'][0]['steps'][0][0]['distance']
            Walk_1_Dur = response['result']['routes'][0]['steps'][0][0]['duration']
            Walk_2_Dis = response['result']['routes'][0]['steps'][-1][0]['distance']
            Walk_2_Dur = response['result']['routes'][0]['steps'][-1][0]['duration']
            # 获取前后步行的起终点坐标
            Start_1_Lng = response['result']['routes'][0]['steps'][0][0]['start_location']['lng']
            Start_1_Lat = response['result']['routes'][0]['steps'][0][0]['start_location']['lat']
            End_1_Lng = response['result']['routes'][0]['steps'][0][0]['end_location']['lng']
            End_1_Lat = response['result']['routes'][0]['steps'][0][0]['end_location']['lat']
            Start_2_Lng = response['result']['routes'][0]['steps'][-1][0]['start_location']['lng']
            Start_2_Lat = response['result']['routes'][0]['steps'][-1][0]['start_location']['lat']
            End_2_Lng = response['result']['routes'][0]['steps'][-1][0]['end_location']['lng']
            End_2_Lat = response['result']['routes'][0]['steps'][-1][0]['end_location']['lat']
            # 计算换乘时间和距离
            Transfer_Dis = 0
            Transfer_Dur = 0
            for i in range(1, len(Steps) - 1):
                if '换乘' in Steps[i][0]['instruction'] or '步行' in Steps[i][0]['instruction']:
                    Transfer_Dis += Steps[i][0]['distance']
                    Transfer_Dur += Steps[i][0]['duration']
            # 计算票价和等车时间
            Ticket_Price = response['result']['routes'][0]['price']
            Equal_Time = Total_Dur - Actual_Dur
            # 出租车出行成本
            Taxi_Cost = response['result']['taxi']['detail'][0]['total_price']
            Taxi_Distance = response['result']['taxi']['distance']
            Taxi_Duration = response['result']['taxi']['duration']
            status = response['status']
            message = response['message']
            return {
                'OID': OID,
                'DID': DID,
                'O_Lat': O_Lat,
                'O_Lon': O_Lon,
                'D_Lat': D_Lat,
                'D_Lon': D_Lon,
                'Total_Dis': Total_Dis,
                'Total_Dur': Total_Dur,
                'Actual_Dis': Actual_Dis,
                'Actual_Dur': Actual_Dur,
                'Walk_1_Dis': Walk_1_Dis,
                'Walk_1_Dur': Walk_1_Dur,
                'Walk_2_Dis': Walk_2_Dis,
                'Walk_2_Dur': Walk_2_Dur,
                'Start_1_Lng': Start_1_Lng,
                'Start_1_Lat': Start_1_Lat,
                'End_1_Lng': End_1_Lng,
                'End_1_Lat': End_1_Lat,
                'Start_2_Lng': Start_2_Lng,
                'Start_2_Lat': Start_2_Lat,
                'End_2_Lng': End_2_Lng,
                'End_2_Lat': End_2_Lat,
                'Transfer_Dis':Transfer_Dis,
                'Transfer_Dur':Transfer_Dur,
                'Ticket_Price':Ticket_Price,
                'Equal_Time':Equal_Time,
                'Taxi_Cost':Taxi_Cost,
                'Taxi_Distance':Taxi_Distance,
                'Taxi_Duration':Taxi_Duration,
                'status': status,
                'message': message,
                'used_ak': current_ak,
            }

        # 状态码为302，尝试下一个 AK
        elif response['status'] == 302:
            if ak_index + 1 < len(ak_list):
                return route_planning(row, ak_index=ak_index + 1)
            else:
                return {
                    'OID': OID,
                    'DID': DID,
                    'O_Lat': O_Lat,
                    'O_Lon': O_Lon,
                    'D_Lat': D_Lat,
                    'D_Lon': D_Lon,
                    'Total_Dis': 0,
                    'Total_Dur': 0,
                    'Actual_Dis': 0,
                    'Actual_Dur': 0,
                    'Walk_1_Dis': 0,
                    'Walk_1_Dur': 0,
                    'Walk_2_Dis': 0,
                    'Walk_2_Dur': 0,
                    'Start_1_Lng': 0,
                    'Start_1_Lat': 0,
                    'End_1_Lng': 0,
                    'End_1_Lat': 0,
                    'Start_2_Lng': 0,
                    'Start_2_Lat': 0,
                    'End_2_Lng': 0,
                    'End_2_Lat': 0,
                    'Transfer_Dis':0,
                    'Transfer_Dur':0,
                    'Ticket_Price':0,
                    'Equal_Time':0,
                    'Taxi_Cost':0,
                    'Taxi_Distance':0,
                    'Taxi_Duration':0,
                    'status': None,
                    'message': 'error',
                    'used_ak': current_ak,
                }

        # 若非预期状态码，则跳过该行
        else:
            status = response['status']
            return {
                'OID': OID,
                'DID': DID,
                'O_Lat': O_Lat,
                'O_Lon': O_Lon,
                'D_Lat': D_Lat,
                'D_Lon': D_Lon,
                'Total_Dis': 0,
                'Total_Dur': 0,
                'Actual_Dis': 0,
                'Actual_Dur': 0,
                'Walk_1_Dis': 0,
                'Walk_1_Dur': 0,
                'Walk_2_Dis': 0,
                'Walk_2_Dur': 0,
                'Start_1_Lng': 0,
                'Start_1_Lat': 0,
                'End_1_Lng': 0,
                'End_1_Lat': 0,
                'Start_2_Lng': 0,
                'Start_2_Lat': 0,
                'End_2_Lng': 0,
                'End_2_Lat': 0,
                'Transfer_Dis':0,
                'Transfer_Dur':0,
                'Ticket_Price':0,
                'Equal_Time':0,
                'Taxi_Cost':0,
                'Taxi_Distance':0,
                'Taxi_Duration':0,
                'status': status,
                'message': 'error',
                'used_ak': current_ak,
            }

    # 使用多线程执行任务
    with ThreadPoolExecutor(max_workers=8) as executor:
        future_list = []
        # 读取 CSV 文件
        df = pd.read_csv(path,encoding='gbk')
        for index, row in df.iterrows():
            future = executor.submit(route_planning, row)
            future_list.append(future)

        # 获取每个任务的结果
        for future in tqdm(future_list, total=len(future_list)):
            result = future.result()
            if result['message'] == 'AK has run out':
                print('AK has run out and stopped crawling')
                break
            results.append(result)

    # 将结果保存到缓存文件中
    with open('cached.pkl', 'wb') as f:
        pickle.dump(results, f)
    print('Data is cached')
# 将结果转化为 DataFrame 格式，并保存到 CSV 文件中
result_df = pd.DataFrame(results)
# 创建result文件夹
if not os.path.exists('result'):
    os.mkdir('result')
# 输出与输入数据相同的文件名
file_name = os.path.basename(path).split('.')[0]
result_df.to_csv('./result/'+file_name+'.csv', index=False)
print('Calculation completed')

Start crawling data


100%|██████████| 10000/10000 [02:44<00:00, 60.92it/s]


Data is cached
Calculation completed
