In [1]:
import pickle, csv
import numpy as np
import folium
from sklearn.isotonic import IsotonicRegression
import traj_dist.distance as tdist

In [2]:
# load t2vec cells and embeddings
def load_t2vec():
    with open("data/embeddings/t2vec/128/t2vec_train", "rb") as f:
        t2vecTrain_Embed = pickle.load(f)
    with open("data/embeddings/t2vec/128/t2vec_test", "rb") as f:
        t2vecTest_Embed = pickle.load(f)
    with open("data/embeddings/t2vec/128/t2vec_val", "rb") as f:
        t2vecVal_Embed = pickle.load(f)

    with open("data/cell_seqs/t2vec_partition/traincell", "rb") as f:
        t2vecTrain_Cell = pickle.load(f)
    with open("data/cell_seqs/t2vec_partition/testcell", "rb") as f:
        t2vecTest_Cell = pickle.load(f)
    with open("data/cell_seqs/t2vec_partition/valcell", "rb") as f:
        t2vecVal_Cell = pickle.load(f)
    
    with open("data/cell_seqs/t2vec_partition/hotcell2gps", "rb") as f:
        hotcell2gps = pickle.load(f)

    data = [t2vecTrain_Embed, t2vecTest_Embed, t2vecVal_Embed, t2vecTrain_Cell, t2vecTest_Cell, t2vecVal_Cell, hotcell2gps]
    return data

# load trajectories (cell_seqs)
def load_trajs():
    with open("data/gps_seqs/traingps", "rb") as f:
        trainTrajs = pickle.load(f)
    with open("data/gps_seqs/testgps", "rb") as f:
        testTrajs = pickle.load(f)
    with open("data/gps_seqs/valgps", "rb") as f:
        valTrajs = pickle.load(f)

    data = [trainTrajs, testTrajs, valTrajs]
    return data

In [3]:
# load trajectories and embeddings<t2vec>
# t2vec Embedding, gps seqs, cell seqs
t2vec_data, gps_data = [], []
t2vec_data = load_t2vec(); gps_data = load_trajs()
t2vecTrain_Embed, t2vecTest_Embed, t2vecVal_Embed, t2vecTrain_Cell, t2vecTest_Cell, t2vecVal_Cell, hotcell2gps = t2vec_data
trainTrajs, testTrajs, valTrajs = gps_data


'''
**************the shape of all datums***************
t2vecTrain_Embed: (180000, 128) (seqs, dim)
t2vecTest_Embed: (10000, 128)
t2vecVal_Embed: (10000, 128)
t2vecTrain_Cell: (180000, cell_lenth) (seqs, cell_lenth)
trainTrajs: (180000, traj_length, 2) (seqs, traj_length, (lng,lat))
cells - vocab_size: 18862 + 4 special characters, the hotcell index is from 4 to 18865
hotcell2gps: a dict, (18862, 2), (hotcell, [lat,lng]) which is different from the trainTrajs
'''


'\n**************the shape of all datums***************\nt2vecTrain_Embed: (180000, 128) (seqs, dim)\nt2vecTest_Embed: (10000, 128)\nt2vecVal_Embed: (10000, 128)\nt2vecTrain_Cell: (180000, cell_lenth) (seqs, cell_lenth)\ntrainTrajs: (180000, traj_length, 2) (seqs, traj_length, (lng,lat))\ncells - vocab_size: 18862 + 4 special characters, the hotcell index is from 4 to 18865\nhotcell2gps: a dict, (18862, 2), (hotcell, [lat,lng]) which is different from the trainTrajs\n'

In [4]:
# anchor trajs & landmarks
landmarks = [520, 1607, 5819, 4532, 4865, 2892, 4064, 3132, 6772,  111]    # 10 landmarks at first
t2vecTrain_Cell = t2vecTrain_Cell[:1000]    # use 1000 train trajs at first
trainTrajs = trainTrajs[:1000]
t2vecTrain_Embed = t2vecTrain_Embed[:1000]

# t2vecVal_Cell = t2vecVal_Cell[:1000]
# valTrajs = valTrajs[:1000]
# t2vecVal_Embed = t2vecVal_Embed[:1000]


In [5]:
'''
*********compute the distance and similarity between two anchor trajs*********
1. we want to take the val set as the auxiliary set (and ), and the train set as the set to be attacked
2. we need to compute the distance and similarity between two anchor trajs
3. we need to simulate the distance and similarity by using isotonic regression, distance = f(similarity)
3. we need to compute the distance and similarity between the anchor trajs and the train trajs
*********convert cell seqs to gps seqs -> compute distance by traj_dist -> compute similarity by cosine similarity*********
'''

anchor_dist = np.zeros((len(landmarks) * len(landmarks))); anchor_sim = np.zeros((len(landmarks) * len(landmarks)))
# use gps seqs and embeddings to compute the distance and similarity
# compute the distance and similarity between two anchor trajs
for i in range(len(landmarks)):
    for j in range(len(landmarks)):
        idx1 = landmarks[i]; idx2 = landmarks[j]
        # compute the hashuff distance
        # print(len(valTrajs), idx1, idx2)
        traj1 = valTrajs[idx1]; traj2 = valTrajs[idx2]
        dist = tdist.hausdorff(traj1, traj2)
        anchor_dist[i*len(landmarks)+j] = dist
        # compute the cosine similarity
        embed1 = t2vecVal_Embed[idx1]; embed2 = t2vecVal_Embed[idx2]
        sim = np.dot(embed1, embed2) / (np.linalg.norm(embed1) * np.linalg.norm(embed2))
        if sim > 1:
            sim = 1.0
        elif sim < 0:
            sim = 1e-6
        anchor_sim[i*len(landmarks)+j] = sim

# simulate the distance and similarity by using isotonic regression, distance = f(similarity)
unique_dist = np.unique(anchor_dist); unique_sim = np.array([np.mean(anchor_sim[anchor_dist==d]) for d in unique_dist])
iso_reg = IsotonicRegression(out_of_bounds="clip", increasing=False); iso_reg.fit(unique_sim, unique_dist)    # similarity -> distance
# print(iso_reg.predict([0.5, 0.6, 0.7, 0.8, 0.9, 1.0]))    # test the iso_reg

# we have 1000 train trajs, we need to compute the distance and similarity between the anchor trajs and the train trajs
# compute the distance and similarity between the anchor trajs and the train trajs
anchor2train_dist = np.zeros((len(t2vecTrain_Cell), len(landmarks))); anchor2train_sim = np.zeros((len(t2vecTrain_Cell), len(landmarks)))
for i in range(len(t2vecTrain_Cell)):
    for j in range(len(landmarks)):
        idx1 = i; idx2 = landmarks[j]
        # ccompute the similarity
        embed1 = t2vecTrain_Embed[idx1]; embed2 = t2vecVal_Embed[idx2]
        sim = np.dot(embed1, embed2) / (np.linalg.norm(embed1) * np.linalg.norm(embed2))
        # predict the distance
        if sim > 1:
            sim = 1.0
        elif sim < 0:
            sim = 1e-6
        dist = iso_reg.predict([sim])
        # print("anchor: %d, train: %d, similarity: %.4f, distance: %.4f" % (idx1, idx2, sim, dist))
        anchor2train_dist[i][j] = dist; anchor2train_sim[i][j] = sim
print(np.shape(anchor2train_dist), np.shape(anchor2train_sim))

# compute the distance and similarity between the anchor trajs and the val trajs
anchor2val_dist = np.zeros((len(t2vecVal_Cell), len(landmarks))); anchor2val_sim = np.zeros((len(t2vecVal_Cell), len(landmarks)))
for i in range(len(t2vecVal_Cell)):
    for j in range(len(landmarks)):
        idx1 = i; idx2 = landmarks[j]
        # ccompute the similarity
        embed1 = t2vecVal_Embed[idx1]; embed2 = t2vecVal_Embed[idx2]
        sim = np.dot(embed1, embed2) / (np.linalg.norm(embed1) * np.linalg.norm(embed2))
        # predict the distance
        if sim > 1:
            sim = 1.0
        elif sim < 0:
            sim = 1e-6
        dist = iso_reg.predict([sim])
        # print("anchor: %d, train: %d, similarity: %.4f, distance: %.4f" % (idx1, idx2, sim, dist))
        anchor2val_dist[i][j] = dist; anchor2val_sim[i][j] = sim
print(np.shape(anchor2val_dist), np.shape(anchor2val_sim))



(1000, 10) (1000, 10)
(10000, 10) (10000, 10)


In [6]:
# find the best val traj for each train traj
diff_dist_list = np.zeros((len(anchor2train_dist), len(anchor2val_dist))); diff_sim_list = np.zeros((len(anchor2train_sim), len(anchor2val_sim)))
for i in range(len(anchor2train_dist)):
    for j in range(len(anchor2val_dist)):
        diff_dist = np.sum(np.abs(anchor2train_dist[i] - anchor2val_dist[j]))
        diff_sim = np.sum(np.abs(anchor2train_sim[i] - anchor2val_sim[j]))
        diff_dist_list[i][j] = diff_dist; diff_sim_list[i][j] = diff_sim
print(np.shape(diff_dist_list), np.shape(diff_sim_list))

(1000, 10000) (1000, 10000)


In [16]:
relate_dist_list = diff_dist_list; relate_sim_list = diff_sim_list # use relate_dist_list replace diff_dist_list

# find the index list of val trajs for each train traj
val_idx_list = np.zeros((len(anchor2train_dist), len(anchor2val_dist))); val_sim_list = np.zeros((len(anchor2train_sim), len(anchor2val_sim)))
for i in range(len(anchor2train_dist)):
    val_idx_list[i] = np.argsort(relate_dist_list[i])
    val_sim_list[i] = np.argsort(relate_sim_list[i])
val_idx_list = val_idx_list.astype(np.int32); val_sim_list = val_sim_list.astype(np.int32)

0.0 0.02873859728467551 0.032332606599789454


In [19]:
choose_num = 10
# show the best val traj for each train traj on the map
my_map = folium.Map(location = [trainTrajs[500][0][1], trainTrajs[500][-1][0]], zoom_start = 15, 
                    tiles='https://mt.google.com/vt/lyrs=m&x={x}&y={y}&z={z}', attr='default')#, tiles = 'https://{s}.tile-cyclosm.openstreetmap.fr/cyclosm/{z}/{x}/{y}.png', attr= 'default')
for t in [choose_num]:
    for i in range(1):
        # print("anchor traj: %d, val traj: %d" % (t, val_idx_list[t][i]))
        traj1 = trainTrajs[t]; traj2 = valTrajs[val_idx_list[t][i]]
        for j in range(len(traj1)):
            folium.CircleMarker(location = [traj1[j][1], traj1[j][0]], radius = 3, color = 'red').add_to(my_map)
        for j in range(len(traj2)):
            folium.CircleMarker(location = [traj2[j][1], traj2[j][0]], radius = 3, color = 'blue').add_to(my_map)
my_map

In [8]:
print("t2vecTrain_Embed: ", np.shape(t2vecTrain_Embed))
print("t2vecTest_Embed: ", np.shape(t2vecTest_Embed))
print("t2vecVal_Embed: ", np.shape(t2vecVal_Embed))
print("t2vecTrain_Cell: ", np.shape(t2vecTrain_Cell))
print("t2vecTest_Cell: ", np.shape(t2vecTest_Cell))
print("t2vecVal_Cell: ", np.shape(t2vecVal_Cell))
print("trainTrajs: ", np.shape(trainTrajs))
print("testTrajs: ", np.shape(testTrajs))
print("valTrajs: ", np.shape(valTrajs))

print(type(t2vecTrain_Cell[0]))
print(type(trainTrajs[0]))
print(np.shape(t2vecTrain_Cell[0]))
print(np.shape(trainTrajs[0]))
print(t2vecTrain_Cell[0][0])
print(trainTrajs[0][0])

print(len(hotcell2gps))
print(len(hotcell2gps[str(4)]))
print(hotcell2gps[str(4)])

t2vecTrain_Embed:  (1000, 128)
t2vecTest_Embed:  (10000, 128)
t2vecVal_Embed:  (10000, 128)
t2vecTrain_Cell:  (1000,)
t2vecTest_Cell:  (10000,)
t2vecVal_Cell:  (10000,)
trainTrajs:  (1000,)
testTrajs:  (10000,)
valTrajs:  (10000,)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(17,)
(38, 2)
83
[-8.627103 41.166954]
18862
2
[41.145731452148006, -8.610735333149446]


  result = asarray(a).shape
