In [58]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import folium
import argparse

In [59]:
import os
import geopandas as gpd
import pandas as pd
import ast, pickle
from shapely.geometry import Point, LineString

from dataloader import *
from model import MLP
from torch.utils.data import DataLoader

In [60]:
def load_geo_file(geo_file_path):
    df = pd.read_csv(geo_file_path)
    geometry = [LineString(ast.literal_eval(line)) for line in df['coordinates']]
    return gpd.GeoDataFrame(df, geometry=geometry)

def load_rel_file(rel_file_path):
    return pd.read_csv(rel_file_path)
    
def load_pickled_gps_file(pickle_file_path):
    with open(pickle_file_path, 'rb') as f:
        gps_points = pickle.load(f)
    return gps_points

def load_pickled_emb_file(pickle_file_path):
    with open(pickle_file_path, 'rb') as f:
        embs = pickle.load(f)
    return embs

def evaluate(test_loader, model, device, args):
    output_list = []
    segment_list = []
    model = model.to(device)
    model.eval()
    with torch.no_grad():
        for i, (emb, road_seg) in enumerate(test_loader):
            emb, road_seg = emb.to(device), road_seg.to(device)
            output = model(emb)
            output_list.append(output.cpu().numpy())
            segment_list.append(road_seg.cpu().numpy())

    output_list = np.concatenate(np.array(output_list), axis=0)
    segment_list = np.concatenate(np.array(segment_list), axis=0)
    
    return segment_list, output_list
            

def load_model(model_path):
    model = MLP(128, 512, 11095)
    model.load_state_dict(torch.load(model_path))
    return model


In [148]:
def get_gps_from_segment(selected_labels, selected_outputs, road_segments, rel_table):
    label_gpslist = []
    output_gpslist = []
    for i in range(len(selected_labels)):
        label_gps, output_gps = segment2gps(selected_labels[i],
                                             selected_outputs[i], road_segments, rel_table)
        # output_gps = segment2gps(selected_outputs[i], road_segments, rel_table)
        label_gpslist.append(label_gps)
        output_gpslist.append(output_gps)
    return label_gpslist, output_gpslist


def segment2gps(labels, segments, road_segments, rel_table):
    labels_idxes = np.where(labels == 1)[0]
    labels_num = np.sum(labels).astype(int)    # total number of segments
    predicted_idxes = torch.topk(torch.tensor(segments), labels_num).indices.cpu().numpy()

    # get all road segments with indexes
    label_gps_list = []
    output_gps_list = []
    for index in labels_idxes:
        segment = road_segments.iloc[index]
        # get all gps points in the segment
        segment_gps = ast.literal_eval(segment['coordinates'])
        label_gps_list.append(np.array(segment_gps))
    label_gps_list = np.concatenate(np.array(label_gps_list), axis=0)
    
    for index in predicted_idxes:
        segment = road_segments.iloc[index]
        # get all gps points in the segment
        segment_gps = ast.literal_eval(segment['coordinates'])
        output_gps_list.append(np.array(segment_gps))
    output_gps_list = np.concatenate(np.array(output_gps_list), axis=0)
    
    return label_gps_list, output_gps_list
    print('labels_idxes:', labels_idxes)
    print('predicted_idxes:', predicted_idxes)
    print("accuracy:", len(set(labels_idxes) & set(predicted_idxes)) / len(set(labels_idxes) | set(predicted_idxes)))
    # gps_list = []
    # segments = np.array(segments)
    # # get all indexes greater than 0.9
    # indexes = np.where(segments > 0.9)[0]
    # print('read_indexes:', indexes)
    # if len(indexes) < 100:
    #     # get top 40 indexes
    #     indexes = np.argsort(-segments)[:40]
    #     print('Top 40 indexes:', indexes)

    # # get all road segments with indexes
    # for index in indexes:
    #     segment = road_segments.iloc[index]
    #     # get all gps points in the segment
    #     segment_gps = ast.literal_eval(segment['coordinates'])
    #     gps_list.append(np.array(segment_gps))
    # gps_list = np.concatenate(np.array(gps_list), axis=0)
    
    # return gps_list


In [149]:
def get_args():
    # get parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--aligned_path', type=str, default='align_data/aligned_gps')
    parser.add_argument('--emb_path', type=str, default='data/')
    parser.add_argument('--data_name', type=str, default='aligned_testgps')
    parser.add_argument('--emb_name', type=str, default='start_128_test')
    parser.add_argument('--model_path', type=str, default='models/start128_')

    parser.add_argument('--emb_size', type=int, default=128)
    parser.add_argument('--hidden_size', type=int, default=512)
    parser.add_argument('--mode', type=str, default='test')
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--num_epochs', type=int, default=50)
    parser.add_argument('--lr', type=float, default=0.001)

    # args = parser.parse_args()    # 调用parser.parse_args()会读取系统参数：sys.argv[]，仅命令行调用时是正确参数
    args = parser.parse_known_args()[0]
    return args

In [150]:
def load_data(geo_file_path, rel_file_path, gps_path):
    road_segments = load_geo_file(geo_file_path)
    rel_table = load_rel_file(rel_file_path)
    gps_lists = load_pickled_gps_file(gps_path)    # list of gps trajectories
    return road_segments, rel_table, gps_lists

In [151]:
geo_file_path = 'align_data/porto_roadmap_edge/porto_roadmap_edge.geo'
rel_file_path = 'align_data/porto_roadmap_edge/porto_roadmap_edge.rel'
gps_path = 'data/gps/testgps'
emb_model = 'start';  emb_dim = 128
emb_path = 'data/{}_{}_test'.format(emb_model, emb_dim)
model_path = 'models/{}{}_model.ckpt/'.format(emb_model, emb_dim)
# model_path = 'models/eph50_{}{}.ckpt/'.format(emb_model, emb_dim)

### ===== load data ===== ###
args = get_args()
test_dataset = TrajDataset(args.aligned_path, args.emb_path, args.data_name, args.emb_name, args.mode)
test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=True)
model = MLP(args.emb_size, args.hidden_size, 11095)

geo_file, rel_table, gps_lists = load_data(geo_file_path, rel_file_path, gps_path)
labels, outputs = evaluate(test_loader, model, 'cuda:0', args)


  output_list = np.concatenate(np.array(output_list), axis=0)
  segment_list = np.concatenate(np.array(segment_list), axis=0)


In [152]:
# get the heads of the rel_table
rel_table.head()
print(geo_file.head())    # coordinates: list of [lon, lat]

   geo_id        type                                        coordinates  \
0       0  LineString  [[-8.6406364, 41.1660713], [-8.6409114, 41.166...   
1       1  LineString  [[-8.6406364, 41.1660713], [-8.6407202, 41.166...   
2       2  LineString  [[-8.6420446, 41.168347], [-8.6423385, 41.1682...   
3       3  LineString  [[-8.6420446, 41.168347], [-8.6422246, 41.1683...   
4       4  LineString  [[-8.6415867, 41.1668853], [-8.6417347, 41.166...   

   highway  lanes   length  maxspeed  \
0        7      0   32.388         0   
1        7      0  224.655         0   
2        7      1  115.105         6   
3        7      1  191.892         0   
4        7      2  118.915         4   

                                            geometry  
0  LINESTRING (-8.64064 41.16607, -8.64091 41.16628)  
1  LINESTRING (-8.64064 41.16607, -8.64072 41.166...  
2  LINESTRING (-8.64204 41.16835, -8.64234 41.168...  
3  LINESTRING (-8.64204 41.16835, -8.64222 41.168...  
4  LINESTRING (-8.64159 41.

In [153]:
# select 5 index from the test trajectories
idx_list = np.random.choice(len(gps_lists), 5, replace=False)
print(np.shape(labels), np.shape(outputs))
selected_labels = labels[idx_list]
selected_outputs = outputs[idx_list]
selected_gps = np.array(gps_lists)[idx_list]

label_gps, outputs_gps = get_gps_from_segment(selected_labels, selected_outputs, geo_file, rel_table)


(10000, 11095) (10000, 11095)


  selected_gps = np.array(gps_lists)[idx_list]
  label_gps_list = np.concatenate(np.array(label_gps_list), axis=0)
  output_gps_list = np.concatenate(np.array(output_gps_list), axis=0)


In [154]:
print('shape of label_gps:', np.shape(label_gps))
print('shape of outputs_gps:', np.shape(outputs_gps))
print(len(label_gps[0]), len(outputs_gps[0]))

shape of label_gps: (5,)
shape of outputs_gps: (5,)
430 220


  result = asarray(a).shape


In [155]:
# plot the gps points by folium
def plot_on_map(gps_list, color, m):
    for gps in gps_list:
        # reverse the two values
        # print(gps[0], gps[1])
        gps = [float(gps[1]), float(gps[0])]
        folium.CircleMarker(gps, radius=1, color=color).add_to(m)
    return m

In [156]:
for i in range(len(selected_gps)):
    m = folium.Map(location=[selected_gps[i][0][1], selected_gps[i][0][0]], zoom_start=15)
    print(len(label_gps[i]), len(outputs_gps[i])) 
    m = plot_on_map(label_gps[i], 'blue', m)
    m = plot_on_map(outputs_gps[i], 'red', m)
    m.save('results/{}_{}.html'.format(emb_model, i))
    print('results/{}_{}.html'.format(emb_model, i))

430 220
-8.5810576 41.1626066
-8.5810984 41.1628018
-8.5811456 41.1630178
-8.5812089 41.1631812
-8.5812642 41.1632991
-8.5814151 41.1636211
-8.582343 41.1653233
-8.5793775 41.1483683
-8.5792124 41.1485993
-8.5789795 41.1489896
-8.5788626 41.1493052
-8.5788068 41.1495113
-8.5787907 41.1496759
-8.5787812 41.1498356
-8.5938865 41.1713532
-8.5942438 41.1715311
-8.5945238 41.1716668
-8.5948342 41.1717775
-8.595151 41.1718512
-8.5953139 41.1718772
-8.5954794 41.1718939
-8.5958202 41.1719027
-8.59615 41.1718873
-8.5962995 41.1718764
-8.5964826 41.171863
-8.5967824 41.1718256
-8.5970968 41.1717841
-8.5970968 41.1717841
-8.5972761 41.1717442
-8.5976502 41.1716611
-8.5984665 41.171497
-8.5990121 41.1713899
-8.5996552 41.17129
-8.5999122 41.1712674
-8.6002377 41.1712524
-8.6002844 41.1712502
-8.6005164 41.1712527
-8.6008317 41.1712629
-8.6248431 41.1732342
-8.625445 41.1731729
-8.6262534 41.1730907
-8.6267878 41.1730336
-8.6270731 41.1730093
-8.6275707 41.1729486
-8.628169 41.1728587
-8.6285898 4