In [15]:
import pandas as pd
import requests
import pickle
import os
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# 定义百度地图 AK 列表
ak_list = [
'87QdwKOjoNYpMgIT7SO1GJgGBv2esAzF'
]


In [16]:
# 初始化结果列表
results = []

# 定义路径规划函数
def route_planning(row, ak_index=0, transport_mode='transit'):
    # 获取当前 AK
    current_ak = ak_list[ak_index]
    
    # 获取起点和终点坐标
    O_Lat = row['O_Lat']
    O_Lon = row['O_Lon']
    D_Lat = row['D_Lat']
    D_Lon = row['D_Lon']
    
    # 构建起点和终点字符串
    Origin = str(O_Lat) + ',' + str(O_Lon)
    Destination = str(D_Lat) + ',' + str(D_Lon)
    
    # 获取 OID 和 DID
    OID = row['OID']
    DID = row['DID']

    # 发送请求到百度路径规划 API
    target = f"https://api.map.baidu.com/directionlite/v2/{transport_mode}?origin={Origin}&destination={Destination}&coord_type=wgs84&ak={current_ak}"
    try:
        req = requests.get(url=target, timeout=10)  # 增加10秒超时
        response = req.json()
    except Exception as e:
        print(f"请求异常: {e}, OID={OID}, DID={DID}")
        return {
            'OID': OID, 'DID': DID, 'O_Lat': O_Lat, 'O_Lon': O_Lon, 'D_Lat': D_Lat, 'D_Lon': D_Lon,
            'Total_Dis': 0, 'Total_Dur': 0, 'Actual_Dis': 0, 'Actual_Dur': 0, 'Walk_1_Dis': 0, 'Walk_1_Dur': 0,
            'Walk_2_Dis': 0, 'Walk_2_Dur': 0, 'Start_1_Lng': 0, 'Start_1_Lat': 0, 'End_1_Lng': 0, 'End_1_Lat': 0,
            'Start_2_Lng': 0, 'Start_2_Lat': 0, 'End_2_Lng': 0, 'End_2_Lat': 0, 'Transfer_Dis': 0, 'Transfer_Dur': 0,
            'Ticket_Price': 0, 'Equal_Time': 0, 'Taxi_Cost': 0, 'Taxi_Distance': 0, 'Taxi_Duration': 0,
            'status': -1, 'message': f'Request Error: {str(e)}', 'used_ak': current_ak
        }

    # 处理不同状态码的响应
    if response['status'] == 0:
        try:
            # 计算总路程和时间
            Total_Dis = response['result']['routes'][0]['distance']
            Total_Dur = response['result']['routes'][0]['duration']
            # 计算实际路程和时间
            Actual_Dis = 0
            Actual_Dur = 0
            Steps = response['result']['routes'][0]['steps']
            for i in range(len(Steps)):
                # 确保Steps[i]是列表且非空
                if isinstance(Steps[i], list) and len(Steps[i]) > 0:
                    Actual_Dis += Steps[i][0]['distance']
                    Actual_Dur += Steps[i][0]['duration']
                else:
                    print(f"警告: Steps[{i}]不是列表或为空: {Steps[i]}")
                    # 如果不是列表，尝试直接访问
                    try:
                        if isinstance(Steps[i], dict) and 'distance' in Steps[i]:
                            Actual_Dis += Steps[i]['distance']
                            Actual_Dur += Steps[i]['duration']
                    except Exception as e:
                        print(f"处理Steps[{i}]时出错: {e}")

            # 计算前后步行路程和时间 - 增加安全检查
            try:
                if isinstance(Steps[0], list) and len(Steps[0]) > 0:
                    Walk_1_Dis = Steps[0][0]['distance']
                    Walk_1_Dur = Steps[0][0]['duration']
                    Start_1_Lng = Steps[0][0]['start_location']['lng']
                    Start_1_Lat = Steps[0][0]['start_location']['lat']
                    End_1_Lng = Steps[0][0]['end_location']['lng']
                    End_1_Lat = Steps[0][0]['end_location']['lat']
                else:
                    Walk_1_Dis = Steps[0]['distance'] if isinstance(Steps[0], dict) and 'distance' in Steps[0] else 0
                    Walk_1_Dur = Steps[0]['duration'] if isinstance(Steps[0], dict) and 'duration' in Steps[0] else 0
                    Start_1_Lng = Steps[0]['start_location']['lng'] if isinstance(Steps[0], dict) and 'start_location' in Steps[0] else 0
                    Start_1_Lat = Steps[0]['start_location']['lat'] if isinstance(Steps[0], dict) and 'start_location' in Steps[0] else 0
                    End_1_Lng = Steps[0]['end_location']['lng'] if isinstance(Steps[0], dict) and 'end_location' in Steps[0] else 0
                    End_1_Lat = Steps[0]['end_location']['lat'] if isinstance(Steps[0], dict) and 'end_location' in Steps[0] else 0
                    
                if isinstance(Steps[-1], list) and len(Steps[-1]) > 0:
                    Walk_2_Dis = Steps[-1][0]['distance']
                    Walk_2_Dur = Steps[-1][0]['duration']
                    Start_2_Lng = Steps[-1][0]['start_location']['lng']
                    Start_2_Lat = Steps[-1][0]['start_location']['lat']
                    End_2_Lng = Steps[-1][0]['end_location']['lng']
                    End_2_Lat = Steps[-1][0]['end_location']['lat']
                else:
                    Walk_2_Dis = Steps[-1]['distance'] if isinstance(Steps[-1], dict) and 'distance' in Steps[-1] else 0
                    Walk_2_Dur = Steps[-1]['duration'] if isinstance(Steps[-1], dict) and 'duration' in Steps[-1] else 0
                    Start_2_Lng = Steps[-1]['start_location']['lng'] if isinstance(Steps[-1], dict) and 'start_location' in Steps[-1] else 0
                    Start_2_Lat = Steps[-1]['start_location']['lat'] if isinstance(Steps[-1], dict) and 'start_location' in Steps[-1] else 0
                    End_2_Lng = Steps[-1]['end_location']['lng'] if isinstance(Steps[-1], dict) and 'end_location' in Steps[-1] else 0
                    End_2_Lat = Steps[-1]['end_location']['lat'] if isinstance(Steps[-1], dict) and 'end_location' in Steps[-1] else 0
            except Exception as e:
                print(f"处理步行数据时出错: {e}")
                Walk_1_Dis = Walk_1_Dur = Walk_2_Dis = Walk_2_Dur = 0
                Start_1_Lng = Start_1_Lat = End_1_Lng = End_1_Lat = 0
                Start_2_Lng = Start_2_Lat = End_2_Lng = End_2_Lat = 0

            # 计算换乘时间和距离
            Transfer_Dis = 0
            Transfer_Dur = 0
            try:
                for i in range(1, len(Steps) - 1):
                    if isinstance(Steps[i], list) and len(Steps[i]) > 0:
                        if '换乘' in Steps[i][0]['instruction'] or '步行' in Steps[i][0]['instruction']:
                            Transfer_Dis += Steps[i][0]['distance']
                            Transfer_Dur += Steps[i][0]['duration']
                    elif isinstance(Steps[i], dict) and 'instruction' in Steps[i]:
                        if '换乘' in Steps[i]['instruction'] or '步行' in Steps[i]['instruction']:
                            Transfer_Dis += Steps[i]['distance'] if 'distance' in Steps[i] else 0
                            Transfer_Dur += Steps[i]['duration'] if 'duration' in Steps[i] else 0
            except Exception as e:
                print(f"计算换乘数据时出错: {e}")

            # 计算票价和等车时间
            try:
                Ticket_Price = response['result']['routes'][0]['price']
                Equal_Time = Total_Dur - Actual_Dur
            except Exception as e:
                print(f"计算票价和等车时间时出错: {e}")
                Ticket_Price = 0
                Equal_Time = 0

            # 出租车出行成本
            try:
                Taxi_Cost = response['result']['taxi']['detail'][0]['total_price']
                Taxi_Distance = response['result']['taxi']['distance']
                Taxi_Duration = response['result']['taxi']['duration']
            except Exception as e:
                print(f"计算出租车数据时出错: {e}")
                Taxi_Cost = Taxi_Distance = Taxi_Duration = 0

            status = response['status']
            message = response['message']
        except Exception as e:
            print(f"处理API响应时出错: {e}, OID={OID}, DID={DID}")
            return {
                'OID': OID, 'DID': DID, 'O_Lat': O_Lat, 'O_Lon': O_Lon, 'D_Lat': D_Lat, 'D_Lon': D_Lon,
                'Total_Dis': 0, 'Total_Dur': 0, 'Actual_Dis': 0, 'Actual_Dur': 0, 'Walk_1_Dis': 0, 'Walk_1_Dur': 0,
                'Walk_2_Dis': 0, 'Walk_2_Dur': 0, 'Start_1_Lng': 0, 'Start_1_Lat': 0, 'End_1_Lng': 0, 'End_1_Lat': 0,
                'Start_2_Lng': 0, 'Start_2_Lat': 0, 'End_2_Lng': 0, 'End_2_Lat': 0, 'Transfer_Dis': 0, 'Transfer_Dur': 0,
                'Ticket_Price': 0, 'Equal_Time': 0, 'Taxi_Cost': 0, 'Taxi_Distance': 0, 'Taxi_Duration': 0,
                'status': -1, 'message': f'Processing Error: {str(e)}', 'used_ak': current_ak
            }

        return {
            'OID': OID,
            'DID': DID,
            'O_Lat': O_Lat,
            'O_Lon': O_Lon,
            'D_Lat': D_Lat,
            'D_Lon': D_Lon,
            'Total_Dis': Total_Dis,
            'Total_Dur': Total_Dur,
            'Actual_Dis': Actual_Dis,
            'Actual_Dur': Actual_Dur,
            'Walk_1_Dis': Walk_1_Dis,
            'Walk_1_Dur': Walk_1_Dur,
            'Walk_2_Dis': Walk_2_Dis,
            'Walk_2_Dur': Walk_2_Dur,
            'Start_1_Lng': Start_1_Lng,
            'Start_1_Lat': Start_1_Lat,
            'End_1_Lng': End_1_Lng,
            'End_1_Lat': End_1_Lat,
            'Start_2_Lng': Start_2_Lng,
            'Start_2_Lat': Start_2_Lat,
            'End_2_Lng': End_2_Lng,
            'End_2_Lat': End_2_Lat,
            'Transfer_Dis':Transfer_Dis,
            'Transfer_Dur':Transfer_Dur,
            'Ticket_Price':Ticket_Price,
            'Equal_Time':Equal_Time,
            'Taxi_Cost':Taxi_Cost,
            'Taxi_Distance':Taxi_Distance,
            'Taxi_Duration':Taxi_Duration,
            'status': status,
            'message': message,
            'used_ak': current_ak,
        }

    # 状态码为302，尝试下一个 AK
    elif response['status'] == 302:
        if ak_index + 1 < len(ak_list):
            return route_planning(row, ak_index=ak_index + 1)
        else:
            return {
                'OID': OID,
                'DID': DID,
                'O_Lat': O_Lat,
                'O_Lon': O_Lon,
                'D_Lat': D_Lat,
                'D_Lon': D_Lon,
                'Total_Dis': 0,
                'Total_Dur': 0,
                'Actual_Dis': 0,
                'Actual_Dur': 0,
                'Walk_1_Dis': 0,
                'Walk_1_Dur': 0,
                'Walk_2_Dis': 0,
                'Walk_2_Dur': 0,
                'Start_1_Lng': 0,
                'Start_1_Lat': 0,
                'End_1_Lng': 0,
                'End_1_Lat': 0,
                'Start_2_Lng': 0,
                'Start_2_Lat': 0,
                'End_2_Lng': 0,
                'End_2_Lat': 0,
                'Transfer_Dis':0,
                'Transfer_Dur':0,
                'Ticket_Price':0,
                'Equal_Time':0,
                'Taxi_Cost':0,
                'Taxi_Distance':0,
                'Taxi_Duration':0,
                'status': None,
                'message': 'error',
                'used_ak': current_ak,
            }

    # 若非预期状态码，则跳过该行
    else:
        status = response['status']
        return {
            'OID': OID,
            'DID': DID,
            'O_Lat': O_Lat,
            'O_Lon': O_Lon,
            'D_Lat': D_Lat,
            'D_Lon': D_Lon,
            'Total_Dis': 0,
            'Total_Dur': 0,
            'Actual_Dis': 0,
            'Actual_Dur': 0,
            'Walk_1_Dis': 0,
            'Walk_1_Dur': 0,
            'Walk_2_Dis': 0,
            'Walk_2_Dur': 0,
            'Start_1_Lng': 0,
            'Start_1_Lat': 0,
            'End_1_Lng': 0,
            'End_1_Lat': 0,
            'Start_2_Lng': 0,
            'Start_2_Lat': 0,
            'End_2_Lng': 0,
            'End_2_Lat': 0,
            'Transfer_Dis':0,
            'Transfer_Dur':0,
            'Ticket_Price':0,
            'Equal_Time':0,
            'Taxi_Cost':0,
            'Taxi_Distance':0,
            'Taxi_Duration':0,
            'status': status,
            'message': 'error',
            'used_ak': current_ak,
        }

# 定义主函数，用于处理 CSV 文件
def process_csv(path):
    # 初始化结果列表
    global results
    results = []
    
    # 检查是否存在缓存文件，如果存在则加载
    if os.path.exists('cached_temp.pkl'):
        try:
            with open('cached_temp.pkl', 'rb') as f:
                results = pickle.load(f)
            print(f'已从缓存文件加载 {len(results)} 条数据')
            # 询问用户是否继续处理
            continue_processing = input('是否继续处理? (y/n): ')
            if continue_processing.lower() != 'y':
                print('使用缓存数据生成结果')
                return results
        except Exception as e:
            print(f'加载缓存文件失败: {e}')
            results = []
    
    # 使用多线程执行任务
    with ThreadPoolExecutor(max_workers=8) as executor:
        future_list = []
        # 读取 CSV 文件
        df = pd.read_csv(path, encoding='gbk')
        
        # 如果已有缓存数据，跳过已处理的部分
        processed_oids = set()
        if results:
            processed_oids = {result['OID'] for result in results}
            print(f'跳过已处理的 {len(processed_oids)} 条数据')
        
        # 提交任务
        for index, row in df.iterrows():
            if row['OID'] not in processed_oids:
                future = executor.submit(route_planning, row)
                future_list.append(future)
        
        print(f'提交了 {len(future_list)} 个新任务')
        
        # 获取每个任务的结果
        count = 0  # 计数器，用于跟踪处理的任务数量
        for future in tqdm(future_list, total=len(future_list)):
            result = future.result()
            if result['message'] == 'AK has run out':
                print('AK has run out and stopped crawling')
                break
            results.append(result)
            
            # 每处理100条数据保存一次缓存，防止数据丢失
            count += 1
            if count % 1000 == 0:
                print(f'已处理 {count}/{len(future_list)} 条数据，正在保存缓存...')
                with open('cached_temp.pkl', 'wb') as f:
                    pickle.dump(results, f)
                print(f'缓存已保存到 cached_temp.pkl')
    
    # 将最终结果保存到缓存文件中
    with open('cached.pkl', 'wb') as f:
        pickle.dump(results, f)
    print('Data is cached')
    
    return results

# 主程序入口
def main(path):
    # 处理CSV文件
    results = process_csv(path)
    
    # 将结果转化为 DataFrame 格式，并保存到 CSV 文件中
    result_df = pd.DataFrame(results)
    
    # 创建result文件夹
    if not os.path.exists('result'):
        os.mkdir('result')
    
    # 输出与输入数据相同的文件名
    file_name = os.path.basename(path).split('.')[0]
    result_df.to_csv('./result/'+file_name+'.csv', index=False)
    print('Calculation completed')
    
    return result_df

# 示例使用
# 替换为你的CSV文件路径
path = r"D:\新建文件夹\工作簿1.csv"  # 或 'Part9.csv'

# 执行主程序
result_df = main(path)


KeyError: 'OID'