In [20]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import gmplot
import torch
from torch import nn
from tqdm import tqdm
from tqdm import trange
import time
from tqdm import trange
import h3
from collections import OrderedDict
from sklearn.preprocessing import  OneHotEncoder

def geo_t_h3(data):
    h3_list =OrderedDict()

    for i in data:
        a = h3.geo_to_h3(i[0], i[1], 10)
        h3_list.setdefault(a)
    return h3_list
def h3_t_geo(data):
    new_list = []
    for i in data:
        i =h3.h3_to_geo(i)
        new_list.append(i)
    return new_list

# draw(,130,10)
# list,文件名，放大率
def draw(list,number,b):
    new_list = torch.tensor(list)
    lat = []
    lng = []
    for i in new_list:
        lat.append(i[0])
        lng.append(i[1])
    lat = torch.stack(lat)
    lng =torch.stack(lng)
    gmap = gmplot.GoogleMapPlotter(lat[0], lng[0], b)
    gmap.plot(lat, lng,color='r',lw=10)  #描绘轨迹点
    gmap.draw("user{}.html".format(number))   #显示图
    print("over")
# 这个提取出来有5个维度
def dataset(user,start,end,step):
    # user:第几个用户
    # filenumber:取前面几天数据
    # step：隔了多少步取一次
    userdata =  '/Users/zhuhe/PycharmProjects/python_spring01/Geolife Trajectories 1.3/Data/' + user + '/Trajectory/'
    filelist = os.listdir(userdata)  #返回指定路径下所有文件和文件夹的名字，并存放于一个列表中
    filelist.sort()
    names = ['lat','lng','zero','alt','days','date','time']
    df_list = [# f为文件索引号，header为列数，names为列表列名，index_col为行索引的列编号或列名
    pd.read_csv(userdata + f,header=6,names=names,index_col=False)
    for f in filelist[start:end]]
    df = pd.concat(df_list, ignore_index=True) #表格列字段不同的表合并
    df.drop(['zero', 'days'], axis=1, inplace=True) #drop函数默认删除行，列需要加axis = 1
    df_min = df.iloc[::step, :]
    return df_min
# 这个提取出来有2个维度
def synthetic_data(df_min):
    a =df_min['lat'].tolist()
    b = df_min['lng'].tolist()
    a = torch.tensor(a,dtype=torch.float,requires_grad=True).reshape((-1, 1))
    b = torch.tensor(b,dtype=torch.float,requires_grad=True).reshape((-1, 1))
    features = torch.concat([a,b],1)
    return features
# 将(lat,lng)变成哈希值
def generate_h3_list(data):
    alist = geo_t_h3(data)
    LIST = list(alist.keys())
    return np.array(LIST)


# 这个提取出来有5个维度
train_dataset = dataset("006",0,20,60)
test_dataset =  dataset("006",20,25,60)

# 这个提取出来有2个维度
train_data = synthetic_data(train_dataset)
test_data = synthetic_data(test_dataset)
# concat()
all_data =torch.concat([train_data,test_data],0)



Train_h3_list  = generate_h3_list(train_data)
TESETLIST  = generate_h3_list(test_data)
all_data_h3_list = generate_h3_list(all_data)
# 这个all_data_h3_list可以先shuffle一下


# 可以通过idnex找到对应的h3
vocab_reverse  = dict(enumerate(all_data_h3_list))
# 可以通过h3编码找到对应的index
vocab ={h3:i for i ,h3 in vocab_reverse.items()}

encoder = OneHotEncoder(sparse=False).fit(all_data_h3_list.reshape(-1,1))

#y是一个列表，k是步长
def dataloader(y,k):
    data =[]
    for i in range(len(y)-k+1):
        indata = y[i:i+k]
        outdata = y[i+k:i+k+1]
        data.append((indata,outdata))
    return data
train_dataloader = dataloader(Train_h3_list,10)
test_dataloader =dataloader(test_data,10)




# 提升：
# 1。如何生成能够进行批训练的dataloader
# 2。修改学习率,步子长短,取样的时间达到更好的效果
# 3。现在的预测只能预测下一个点，如果想预测轨迹，应当把预测的点也当作数据输入模型









In [21]:
class RNN(nn.Module):
    def __init__(self,vocab) -> None:
        super().__init__()
        self.vocab = vocab
        self.model =nn.LSTM(
            input_size=vocab,
            hidden_size=320,
            batch_first=True,
            bias=True
        )
        self.output = nn.Sequential(
            nn.Linear(320,vocab),
            nn.ReLU()
        )

    def forward(self,x):
        r_out,(h_out,c_out) = self.model(x.view(1,10,self.vocab) ,None)
        outdata = self.output(r_out[:,-1,:])
        return outdata
net = RNN(len(all_data_h3_list))
optimizer = torch.optim.SGD(net.parameters(),lr = 0.1,momentum=0.8)
optimizer_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
loss_function = nn.CrossEntropyLoss()


In [22]:
for epoch in trange(1000):
    loss_ =0
    for i , k in train_dataloader[:-1]:
        optimizer.zero_grad()
        a = encoder.transform(i.reshape(-1,1))
        k = encoder.transform(k.reshape(-1,1))
        a =torch.tensor(a).to(torch.float32)
        k =torch.tensor(k).to(torch.float32)
        loss = loss_function(net(a),k)
        loss.backward()
        optimizer.step()
        loss_ = loss_+loss
    print(loss_)

  0%|          | 1/1000 [00:02<40:31,  2.43s/it]

tensor(1585.1748, grad_fn=<AddBackward0>)


  0%|          | 2/1000 [00:04<37:42,  2.27s/it]

tensor(1584.8154, grad_fn=<AddBackward0>)


  0%|          | 3/1000 [00:06<36:46,  2.21s/it]

tensor(1584.4099, grad_fn=<AddBackward0>)


  0%|          | 4/1000 [00:08<36:22,  2.19s/it]

tensor(1584.1794, grad_fn=<AddBackward0>)


  0%|          | 5/1000 [00:11<36:02,  2.17s/it]

tensor(1583.7769, grad_fn=<AddBackward0>)


  1%|          | 6/1000 [00:13<35:49,  2.16s/it]

tensor(1583.3961, grad_fn=<AddBackward0>)


  1%|          | 7/1000 [00:15<35:39,  2.15s/it]

tensor(1582.9591, grad_fn=<AddBackward0>)


  1%|          | 8/1000 [00:17<35:45,  2.16s/it]

tensor(1582.4237, grad_fn=<AddBackward0>)


  1%|          | 9/1000 [00:19<36:06,  2.19s/it]

tensor(1581.7617, grad_fn=<AddBackward0>)


  1%|          | 10/1000 [00:21<35:50,  2.17s/it]

tensor(1580.8951, grad_fn=<AddBackward0>)


  1%|          | 11/1000 [00:24<35:37,  2.16s/it]

tensor(1579.6691, grad_fn=<AddBackward0>)


  1%|          | 12/1000 [00:26<35:28,  2.15s/it]

tensor(1577.9634, grad_fn=<AddBackward0>)


  1%|▏         | 13/1000 [00:28<36:26,  2.22s/it]

tensor(1575.8523, grad_fn=<AddBackward0>)


  1%|▏         | 14/1000 [00:30<36:37,  2.23s/it]

tensor(1571.4502, grad_fn=<AddBackward0>)


  2%|▏         | 15/1000 [00:33<37:09,  2.26s/it]

tensor(1561.0111, grad_fn=<AddBackward0>)


  2%|▏         | 15/1000 [00:33<36:41,  2.24s/it]


KeyboardInterrupt: 

In [23]:

test_dataloader = dataloader(TESETLIST,10)

# 能否根据当前的预测取进行下一步的预测
lat_l =[]
for i , k in test_dataloader[:-1]:
        a = encoder.transform(i.reshape(-1,1))
        k = encoder.transform(k.reshape(-1,1))
        a =torch.tensor(a).to(torch.float32)
        k =torch.tensor(k).to(torch.float32)
        out = net(a)
        out = out.detach().numpy()
        index = np.argmax(out)
        print(index)
        d =vocab_reverse.get(index)
        f = h3.h3_to_geo(str(d))
        lat_l.append(f)

draw(lat_l,150,10)





162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
162
over
