## 数据导入

输入数据：用户的包括经度和纬度在内的位置信息，用户信息 = {LATITUDE, LONGTITUDE}

输出数据：用户信息 = {USER_ID, LATITUDE, LONGTITUDE, theta, v, H, L}

In [2]:
import pandas as pd
from random import *

user_data = pd.read_csv('./data/users-melbcbd-generated.csv').rename(columns=
{'Latitude': 'LATITUDE', 'Longitude': 'LONGITUDE'})

### 车辆和行人的标注

0：行人，占80%

1：车辆，占20%

In [3]:
# 0行人 1 车辆
data_with_label = pd.DataFrame(user_data)
data_with_label['what'] = 0

In [4]:
cars = data_with_label.sample(frac=0.2, random_state=None)
data_with_label.iloc[cars.index, 2] = 1
data_with_label[data_with_label['what']==1]

Unnamed: 0,LATITUDE,LONGITUDE,what
2,-37.819892,144.957305,1
3,-37.814524,144.953632,1
11,-37.815119,144.967232,1
20,-37.811498,144.957851,1
30,-37.814437,144.964244,1
...,...,...,...
781,-37.813241,144.959806,1
782,-37.812790,144.966180,1
793,-37.812350,144.970861,1
794,-37.814218,144.956719,1


### 生成用户运动初始角度

theta∈(0,2pi)

In [4]:
import numpy as np
thetas = []
for i in range(len(data_with_label)):
    thetas.append(np.random.random()*2*np.pi)
data_with_label['theta'] = thetas
data_with_label

Unnamed: 0,LATITUDE,LONGITUDE,what,theta
0,-37.814619,144.974443,0,3.911996
1,-37.810140,144.970454,1,0.541161
2,-37.819892,144.957305,1,2.542401
3,-37.814524,144.953632,0,5.392975
4,-37.814100,144.963000,0,4.857976
...,...,...,...,...
811,-37.813049,144.957795,0,5.816375
812,-37.815577,144.963441,0,2.616584
813,-37.808372,144.969200,0,4.173688
814,-37.809588,144.968573,0,4.787927


### 生成速度大小

1. 行人 0.44704m/s~1.34112m/s
2. 车辆 2.68224m/s~10.72896m/s

In [5]:
max_p = 1.34112
min_p = 0.44704
max_c = 10.72896
min_c = 2.68224
def set_random_v(row):
    if(row['what'] == 0):
        return np.random.random() * (max_p - min_p) + min_p
    else:
        return np.random.random() * (max_c - min_c) + min_c

data_with_label['v'] = data_with_label.apply(set_random_v, axis=1)
data_with_label

Unnamed: 0,LATITUDE,LONGITUDE,what,theta,v
0,-37.814619,144.974443,0,3.911996,0.771104
1,-37.810140,144.970454,1,0.541161,9.747556
2,-37.819892,144.957305,1,2.542401,5.441698
3,-37.814524,144.953632,0,5.392975,0.758916
4,-37.814100,144.963000,0,4.857976,1.181466
...,...,...,...,...,...
811,-37.813049,144.957795,0,5.816375,1.112096
812,-37.815577,144.963441,0,2.616584,0.753113
813,-37.808372,144.969200,0,4.173688,0.920727
814,-37.809588,144.968573,0,4.787927,1.111458


### 生成用户初始QoS等级

设等级分为1~3三个等级，H为用户期待的最高等级，L为最低下限

In [6]:
min_level = 1
max_level = 3
H = []
L = []

def get_low(high):
    min_level = 1
    return int(round(random()*(high - min_level) + 1))

for i in range(data_with_label.shape[0]):
    h_num = int(round(random()*(max_level - min_level) + 1))
    H.append(h_num)
    L.append(get_low(h_num))
    
data_with_label["H"] = H
data_with_label["L"] = L

In [7]:
result = data_with_label.drop(['what'], axis=1)

### 数据导出

In [8]:
result.insert(0, 'USER_ID', range(1, 817))   # 加入标号
result.to_csv("./data/行人_id_速度_角度.csv", sep=',', index = False, header = True, encoding='utf-8-sig')