In [None]:
from settings import params
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set()
# sns.set_theme(style="whitegrid")
# plt.rcParams['text.usetex'] = True
# plt.rcParams["font.family"] = 'Arial Unicode MS'

In [None]:
# 该单元格和下面一个单元格为了生成相关的OD.pickle文件
cluster_id = 0

In [None]:
OD_data = pd.read_csv("./clusters/cluster_" + str(cluster_id) + ".csv")  # OD文件
OD_num = OD_data.shape[0]  # OD数
OD_dict = dict()

for i in range(OD_num):
    OD_dict[int(OD_data.iloc[i].loc["OD_id"])] = [int(OD_data.iloc[i].loc["origin_id"]), int(OD_data.iloc[i].loc["destination_id"]), OD_data.iloc[i].loc["lambda"]]  # 列表[起点id，终点id，lambda]
f = open('tmp/OD.pickle', 'wb')
pickle.dump(OD_dict, f)
f.close()
cluster_id += 1

In [None]:
# 加载局部和全局预测结果
global_result = pd.read_csv("./result/predict_result.csv", index_col="OD_id")
local_result = pd.read_csv("./result/simu_stat.csv", index_col="OD_id")
# local_result = pd.DataFrame()
# for i in range(4): # Cluster数
#     df = pd.read_csv("./clusters/predict_result_" + str(i) + ".csv", index_col="OD_id")
#     local_result = local_result.append(df)

In [None]:
global_result_chosen = global_result.loc[local_result.index, :]

In [None]:
# Optional，可根据需求筛选OD
global_result_chosen = global_result_chosen.sort_values("ride_distance", ascending=True)[:2000]
local_result = local_result.loc[global_result_chosen.index, :]

In [None]:
absolute_error = np.abs(global_result_chosen - local_result)
relative_error = absolute_error / global_result_chosen

In [None]:
absolute_error = absolute_error.loc[:, ["matching_prob", "ride_distance", "detour_distance", "shared_distance"]]
absolute_error.describe()

In [None]:
relative_error = relative_error.loc[:, ["matching_prob", "ride_distance", "detour_distance", "shared_distance"]]
relative_error.describe()

In [None]:
# 用于显示某项指标的分布
plt.figure(dpi=300)
plt.hist(global_result_chosen.loc[:, "shared_distance"] / 1000, bins=25)
plt.ylabel("Number of OD Pairs")
plt.xlabel("Shared Distance of OD Pairs (km)")
plt.savefig("result/shared_distance_distribution.png", dpi=300, bbox_inches = "tight")

In [None]:
# 用于查看误差排序后的结果
relative_error.sort_values("shared_distance", ascending=False)[:204]

In [None]:
# 用于查看误差较大的一些OD的情况
large_index = relative_error.sort_values("shared_distance", ascending=False)[:204].index
plt.hist(global_result_chosen.loc[large_index, "ride_distance"], bins=25)

In [None]:
# 用于查看有较大误差的OD
np.argmax(relative_error["shared_distance"], axis=0)

In [None]:
global_result_chosen.iloc[286, :]

In [None]:
local_result.iloc[286, :]

In [None]:
# 作小提琴图
pw_error = 100 * np.sort(relative_error.iloc[:, 0])
ew_error = 100 * np.sort(relative_error.iloc[:, 1])
lw_error = 100 * np.sort(relative_error.iloc[:, 2])
plt.figure(figsize=(6.4 * 3, 6.4), dpi=200)
plt.violinplot([pw_error[:], ew_error[:], lw_error[:]], positions=[0, 2, 4], showmeans=True, showextrema=True, showmedians=True, widths=2)
plt.xticks(np.arange(3) * 2, ['pw', 'lw', 'ew'])
plt.ylabel("Relative error /%")
plt.show()

In [None]:
plt.figure(figsize=(2.4, 4.8), dpi=300)
plt.ylabel("Relative Error (%)")
sns.violinplot(y=100*(relative_error.iloc[:, 0]).values)
plt.xlabel("Matching Probability")
plt.savefig("result/rel_p_error_rand.png", dpi=300, bbox_inches = "tight")

In [None]:
plt.figure(figsize=(2.4, 4.8), dpi=300)
plt.ylabel("Absolute Error (%)")
sns.violinplot(y=100*(absolute_error.iloc[:, 0]).values)
plt.xlabel("Matching Probability")
plt.savefig("result/abs_p_error_rand.png", dpi=300, bbox_inches = "tight")

In [None]:
plt.figure(figsize=(2.4, 4.8), dpi=300)
plt.ylabel("Relative Error (%)")
sns.violinplot(y=100*(relative_error.iloc[:, 1]).values)
plt.xlabel("Ride Distance")
plt.savefig("result/rel_l_error_rand.png", dpi=300, bbox_inches = "tight")

In [None]:
plt.figure(figsize=(2.4, 4.8), dpi=300)
plt.ylabel("Absolute Error (m)")
sns.violinplot(y=1*(absolute_error.iloc[:, 1]).values)
plt.xlabel("Ride Distance")
plt.savefig("result/abs_l_error_rand.png", dpi=300, bbox_inches = "tight")

In [None]:
plt.figure(figsize=(2.4, 4.8), dpi=300)
plt.ylabel("Relative Error (%)")
sns.violinplot(y=100*(relative_error.iloc[:, 3]).values)
plt.xlabel("Shared Distance")
plt.savefig("result/rel_e_error_rand.png", dpi=300, bbox_inches = "tight")

In [None]:
plt.figure(figsize=(2.4, 4.8), dpi=300)
plt.ylabel("Absolute Error (m)")
sns.violinplot(y=1*(absolute_error.iloc[:, 3]).values)
plt.xlabel("Shared Distance")
plt.savefig("result/abs_e_error_rand.png", dpi=300, bbox_inches = "tight")

In [None]:
plt.figure(figsize=(14.4, 4.8))
plt.subplot(241)
plt.ylabel("绝对误差/%")
plt.violinplot(100*absolute_error.iloc[:, 0])
plt.xticks([1], ["匹配成功率"])
plt.subplot(242)
plt.ylabel("绝对误差/m")
plt.violinplot(absolute_error.iloc[:, 1])
plt.xticks([1], ["行驶里程"])
plt.subplot(243)
plt.ylabel("绝对误差/m")
plt.violinplot(absolute_error.iloc[:, 2])
plt.xticks([1], ["绕行里程"])
plt.subplot(244)
plt.ylabel("绝对误差/m")
plt.violinplot(absolute_error.iloc[:, 3])
plt.xticks([1], ["共乘里程"])
plt.subplot(245)
plt.ylabel("相对误差/%")
plt.violinplot(100*relative_error.iloc[:, 0])
plt.xticks([1], ["匹配成功率"])
plt.subplot(246)
plt.ylabel("相对误差/%")
plt.violinplot(100*relative_error.iloc[:, 1])
plt.xticks([1], ["行驶里程"])
plt.subplot(247)
plt.ylabel("相对误差/%")
plt.violinplot(100*relative_error.iloc[:, 2])
plt.xticks([1], ["绕行里程"])
plt.subplot(248)
plt.ylabel("相对误差/%")
plt.violinplot(100*relative_error.iloc[:, 3])
plt.xticks([1], ["共乘里程"])
plt.show()

In [None]:
plt.figure(figsize=(14.4, 4.8))
plt.subplot(241)
plt.ylabel("绝对误差/%")
plt.boxplot(100*absolute_error.iloc[:, 0])
plt.xticks([1], ["匹配成功率"])
plt.subplot(242)
plt.ylabel("绝对误差/m")
plt.boxplot(absolute_error.iloc[:, 1])
plt.xticks([1], ["行驶里程"])
plt.subplot(243)
plt.ylabel("绝对误差/m")
plt.boxplot(absolute_error.iloc[:, 2])
plt.xticks([1], ["绕行里程"])
plt.subplot(244)
plt.ylabel("绝对误差/m")
plt.boxplot(absolute_error.iloc[:, 3])
plt.xticks([1], ["共乘里程"])
plt.subplot(245)
plt.ylabel("相对误差/%")
plt.boxplot(100*relative_error.iloc[:, 0])
plt.xticks([1], ["匹配成功率"])
plt.subplot(246)
plt.ylabel("相对误差/%")
plt.boxplot(100*relative_error.iloc[:, 1])
plt.xticks([1], ["行驶里程"])
plt.subplot(247)
plt.ylabel("相对误差/%")
plt.boxplot(100*relative_error.iloc[:, 2])
plt.xticks([1], ["绕行里程"])
plt.subplot(248)
plt.ylabel("相对误差/%")
plt.boxplot(100*relative_error.iloc[:, 3])
plt.xticks([1], ["共乘里程"])
plt.show()

In [None]:
OD_data = pd.read_csv("data/OD.csv", index_col=0)
# OD_data.loc[:, "lambda"] += (np.random.random_sample(OD_data.shape[0]) * (0.5 - (-0.5)) - 0.5) * OD_data.loc[:, "lambda"] # 用于给需求一定波动

In [None]:
OD_data.to_csv("data/OD.csv")

In [None]:
# 随机将OD进行分组
rand_ints = np.random.choice(np.arange(OD_data.shape[0]), OD_data.shape[0], False)
num_of_clusters = [1997,3691,2521,1784]
total = 0
for i in range(len(num_of_clusters)):
    cluster = OD_data.iloc[rand_ints[total: total + num_of_clusters[i]], :]
    total += num_of_clusters[0]
    cluster.to_csv("clusters/cluster_" + str(i) + ".csv")

In [None]:
plt.figure(dpi=300)
sns.displot(OD_data.loc[:, "lambda"], kde=True)
plt.ylabel("Number of OD Pairs")
plt.xlabel("Arrival Rate of the Orders (#/min)")
plt.savefig("result/arrival_rate_distribution.png", dpi=300)

In [None]:
with open("tmp/shortest_path.pickle", 'rb') as f:
    path_dict: dict = pickle.load(f)
distance = {}
for id, path in path_dict.items():
    distance[id] = path[-1]
dist_df = pd.DataFrame.from_dict(distance, orient="index")

In [None]:
plt.figure(dpi=300)
sns.displot(dist_df.iloc[:, 0] / 1000, kde=True)
plt.ylabel("Number of OD Pairs")
plt.xlabel("Ride Distance of OD Pairs (km)")
plt.xticks(np.arange(0, 180, step=15))
plt.savefig("result/ride_distance_distribution.png", dpi=300)