In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error
color_pal = sns.color_palette()
plt.style.use('fivethirtyeight')
# Configuration
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
import folium
from folium.plugins import HeatMap,HeatMapWithTime

In [2]:
gps = pd.read_csv('junction_name_gps.csv')

In [3]:
gps.head()

Unnamed: 0,junction_name,longitude,latitude
0,青龙港路-民乐路,120.633223,31.421653
1,青龙港路-汇流街,120.645592,31.415857
2,民乐路-南天成路,120.634308,31.424413
3,北天成路-吴韵路,120.642567,31.424669
4,水景路-南天成路,120.638275,31.422365


In [4]:
df = pd.read_csv('交通出行优化赛题数据集(交叉口流量).csv',encoding="GBK")

In [5]:
df.head()


Unnamed: 0,junction_name,junction_approach,veh_turn_dir,phase_id,timestamp,volume
0,青龙港路-民乐路,A,l,7,230612000000,0
1,青龙港路-民乐路,A,r,13,230612000000,3
2,青龙港路-民乐路,B,r,14,230612000000,3
3,青龙港路-民乐路,B,s,2,230612000000,11
4,青龙港路-民乐路,D,l,10,230612000000,6


In [6]:
## preprocess the data
categorical_feature = ['junction_name','phase_id','junction_approach','veh_turn_dir']
def preprocess(df):
    df['timestamp'] = int(20e12)+df['timestamp']
    df['timestamp'] = df['timestamp'].astype('string')
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.sort_values(by = ['junction_name','phase_id','timestamp'],inplace = True)
    #df.set_index('timestamp',inplace = True)
    ##lgb can only handle categorical features
    for c in categorical_feature:
        df[c] = df[c].astype('category')
    return df

df = preprocess(df)

In [7]:
df.head()

Unnamed: 0,junction_name,junction_approach,veh_turn_dir,phase_id,timestamp,volume
26,北天成路-吴韵路,A,s,1,2023-06-12 00:00:00,0
140,北天成路-吴韵路,A,s,1,2023-06-12 00:15:00,0
254,北天成路-吴韵路,A,s,1,2023-06-12 00:30:00,0
368,北天成路-吴韵路,A,s,1,2023-06-12 00:45:00,0
482,北天成路-吴韵路,A,s,1,2023-06-12 01:00:00,0


In [9]:
traffic_flow = df.groupby(['junction_name','timestamp']).volume.sum().reset_index()
traffic_flow.head()

Unnamed: 0,junction_name,timestamp,volume
0,北天成路-吴韵路,2023-06-12 00:00:00,1
1,北天成路-吴韵路,2023-06-12 00:15:00,6
2,北天成路-吴韵路,2023-06-12 00:30:00,3
3,北天成路-吴韵路,2023-06-12 00:45:00,1
4,北天成路-吴韵路,2023-06-12 01:00:00,1


In [10]:
traffic_flow = traffic_flow.merge(gps, on = 'junction_name')
traffic_flow.head()

Unnamed: 0,junction_name,timestamp,volume,longitude,latitude
0,北天成路-吴韵路,2023-06-12 00:00:00,1,120.642567,31.424669
1,北天成路-吴韵路,2023-06-12 00:15:00,6,120.642567,31.424669
2,北天成路-吴韵路,2023-06-12 00:30:00,3,120.642567,31.424669
3,北天成路-吴韵路,2023-06-12 00:45:00,1,120.642567,31.424669
4,北天成路-吴韵路,2023-06-12 01:00:00,1,120.642567,31.424669


In [11]:
traffic_flow.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27930 entries, 0 to 27929
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   junction_name  27930 non-null  object        
 1   timestamp      27930 non-null  datetime64[ns]
 2   volume         27930 non-null  int64         
 3   longitude      27930 non-null  float64       
 4   latitude       27930 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(1), object(1)
memory usage: 1.3+ MB


In [62]:
map_hooray = folium.Map(location=[31.369189, 120.642391],
                    zoom_start = 15) 
heat_df = traffic_flow[traffic_flow.timestamp == '2023-06-22 19:30:00']
# Filter the DF for rows, then columns, then remove NaNs
heat_df = heat_df[['latitude', 'longitude']]
heat_df = heat_df.dropna(axis=0, subset=['latitude','longitude'])
# List comprehension to make out list of lists
heat_data = [[row['latitude'],row['longitude']] for index, row in heat_df.iterrows()]
# Plot it on the map
HeatMap(heat_data).add_to(map_hooray)
# Display the map
map_hooray

In [56]:
traffic_flow.head()

Unnamed: 0,junction_name,timestamp,volume,longitude,latitude
0,北天成路-吴韵路,2023-06-12 00:00:00,1,120.642567,31.424669
1,北天成路-吴韵路,2023-06-12 00:15:00,6,120.642567,31.424669
2,北天成路-吴韵路,2023-06-12 00:30:00,3,120.642567,31.424669
3,北天成路-吴韵路,2023-06-12 00:45:00,1,120.642567,31.424669
4,北天成路-吴韵路,2023-06-12 01:00:00,1,120.642567,31.424669


In [23]:
heat_data = [[[row['latitude'],row['longitude']] for index, row in traffic_flow[traffic_flow['volume'] == i].iterrows()] for i in range(0,200)]

In [25]:
# Create weight column, using date
# heat_df['Weight'] = df_acc['Date'].str[3:5]
# heat_df['Weight'] = heat_df['Weight'].astype(float)
# heat_df = heat_df.dropna(axis=0, subset=['Latitude','Longitude', 'Weight'])

# # List comprehension to make out list of lists
# heat_data = [[[row['Latitude'],row['Longitude']] for index, row in heat_df[heat_df['Weight'] == i].iterrows()] for i in range(0,13)]

# Plot it on the map
map_hooray = folium.Map(location=[31.369189, 120.642391],
                    zoom_start = 15) 
hm = HeatMapWithTime(heat_data,auto_play=True,max_opacity=0.8)
hm.add_to(map_hooray)
# Display the map
map_hooray
#map_hooray.save('./test.html') 


In [12]:
data = [[[traffic_flow['latitude'],traffic_flow['longitude']] for index, row in traffic_flow[traffic_flow['timestamp'] == i].iterrows()] for i in traffic_flow['timestamp'].unique()]

In [18]:
len(data)

1995

In [22]:
map_hooray = folium.Map(location=[31.369189, 120.642391],
                    zoom_start = 15) 
hm = HeatMapWithTime(data,auto_play=True,max_opacity=0.8)
hm.add_to(map_hooray)
# Display the map
map_hooray

In [12]:
list_index = traffic_flow.timestamp
weight_list = traffic_flow[['longitude','latitude','volume']].values.tolist()

In [13]:
weight_list

[[120.642567, 31.424669, 1.0],
 [120.642567, 31.424669, 6.0],
 [120.642567, 31.424669, 3.0],
 [120.642567, 31.424669, 1.0],
 [120.642567, 31.424669, 1.0],
 [120.642567, 31.424669, 3.0],
 [120.642567, 31.424669, 1.0],
 [120.642567, 31.424669, 4.0],
 [120.642567, 31.424669, 4.0],
 [120.642567, 31.424669, 1.0],
 [120.642567, 31.424669, 3.0],
 [120.642567, 31.424669, 0.0],
 [120.642567, 31.424669, 2.0],
 [120.642567, 31.424669, 0.0],
 [120.642567, 31.424669, 0.0],
 [120.642567, 31.424669, 0.0],
 [120.642567, 31.424669, 0.0],
 [120.642567, 31.424669, 0.0],
 [120.642567, 31.424669, 1.0],
 [120.642567, 31.424669, 0.0],
 [120.642567, 31.424669, 6.0],
 [120.642567, 31.424669, 7.0],
 [120.642567, 31.424669, 7.0],
 [120.642567, 31.424669, 8.0],
 [120.642567, 31.424669, 21.0],
 [120.642567, 31.424669, 14.0],
 [120.642567, 31.424669, 26.0],
 [120.642567, 31.424669, 28.0],
 [120.642567, 31.424669, 37.0],
 [120.642567, 31.424669, 50.0],
 [120.642567, 31.424669, 43.0],
 [120.642567, 31.424669, 41.0],


In [91]:
# m = folium.Map([31.369189, 120.642391], zoom_start=15)

# hm = plugins.HeatMapWithTime(weight_list)

# hm.add_to(m)

# m
hm = plugins.HeatMapWithTime(heat_data,auto_play=True,max_opacity=0.8)
hm.add_to(map_hooray)
# Display the map
map_hooray

In [58]:
lat_long_list = []
for i in traffic_flow['timestamp'].unique():
    temp=[]
    for index, instance in traffic_flow[traffic_flow['timestamp'] == i].iterrows():
        temp.append([instance['latitude'],instance['longitude']])
    lat_long_list.append(temp)

In [75]:
#converted Timestamp into datetime format and using it as an index
traffic_flow['timestamp']= pd.to_datetime(traffic_flow['timestamp'])
time_index = []
for i in traffic_flow['timestamp'].unique():
    time_index.append(i)
date_strings = [pd.to_datetime(str(d)).strftime('%d/%m/%Y, %H:%M:%S') for d in time_index]

In [78]:
lat_long_list

[[[31.424669, 120.642567],
  [31.423231, 120.636657],
  [31.421364, 120.640191],
  [31.424413, 120.634308],
  [31.422365, 120.638275],
  [31.420145, 120.637137],
  [31.423213, 120.627128],
  [31.419227, 120.640792],
  [31.419227, 120.640792],
  [31.415857, 120.645592],
  [31.415857, 120.645592],
  [31.421653, 120.633223],
  [31.415857, 120.645592],
  [31.415857, 120.645592]],
 [[31.424669, 120.642567],
  [31.423231, 120.636657],
  [31.421364, 120.640191],
  [31.424413, 120.634308],
  [31.422365, 120.638275],
  [31.420145, 120.637137],
  [31.423213, 120.627128],
  [31.419227, 120.640792],
  [31.419227, 120.640792],
  [31.415857, 120.645592],
  [31.415857, 120.645592],
  [31.421653, 120.633223],
  [31.415857, 120.645592],
  [31.415857, 120.645592]],
 [[31.424669, 120.642567],
  [31.423231, 120.636657],
  [31.421364, 120.640191],
  [31.424413, 120.634308],
  [31.422365, 120.638275],
  [31.420145, 120.637137],
  [31.423213, 120.627128],
  [31.419227, 120.640792],
  [31.419227, 120.640792],

In [77]:
#Choosing the map type 
m = folium.Map(location=[31.369189, 120.642391],zoom_start = 12)
#Plot it on the map
HeatMapWithTime(lat_long_list,radius=5,auto_play=True,position='bottomright',name="cluster",index=time_index,max_opacity=0.7).add_to(m)
# Display the map
m

## Try out the heatmap with folium

In [74]:
import folium
from folium.plugins import HeatMap,HeatMapWithTime
# data=[[ 39.90403 , 116.407526 , 1500 ]]
# 数据：经纬度和权重
data = [[31.421653, 120.633223, 400], [31.415857, 120.645592, 1], [31.424413, 120.634308, 350],[31.424669, 120.642567, 300],[31.422365,120.638275,390],[31.421364,120.640191,270],[31.440828,120.634992,410],[31.423213,120.627128,1],
        [31.415857,120.645592,1],[31.423231,120.636657,290],[31.420145,120.637137,380],[31.415857,120.645592,450]]
map_osm = folium.Map(location=[31.369189, 120.642391], zoom_start=20)    # 绘制Map，开始缩放程度是12倍
HeatMap(data).add_to(map_osm)                       # 将热力图添加到前面建立的map里
map_osm
#map_osm.save('./文章提及城市分布.html')       # 将绘制好的地图保存为html文件



In [48]:
map_osm


## heatmap with time plugin example

In [28]:
import folium
import folium.plugins as plugins
import numpy as np

np.random.seed(3141592)
initial_data = np.random.normal(size=(100, 2)) * np.array([[1, 1]]) + np.array(
    [[48, 5]]
)

move_data = np.random.normal(size=(100, 2)) * 0.01

data = [(initial_data + move_data * i).tolist() for i in range(100)]

In [29]:
data

[[[47.504539036025584, 4.292407029209247],
  [47.69040904564529, 4.252424765823684],
  [46.56622363785289, 3.169893299473773],
  [48.512345988527414, 3.5575115701151936],
  [47.912356200438545, 4.541688488218782],
  [47.7449114720645, 6.410987139161764],
  [49.27575216702049, 3.583654594233588],
  [47.82845322406549, 3.8219354061097293],
  [48.04493261439567, 6.31593606743332],
  [46.476548975848104, 4.6875193522227105],
  [48.22998111187808, 4.330735827816389],
  [49.09176755111292, 5.097273923224413],
  [48.27319149417738, 5.658018973640203],
  [48.122983644592075, 5.780950867261258],
  [46.44643679370932, 4.755224680829496],
  [49.29166945103235, 6.129019031044898],
  [47.001037365055055, 6.290837811317019],
  [46.8662712198302, 5.128384470003526],
  [48.60872159648292, 4.685247191774352],
  [48.847472424367105, 4.528416690851091],
  [49.82969363846115, 3.7616826932379714],
  [47.41209462975057, 5.286337622398916],
  [47.61135491854893, 4.894699684890315],
  [47.977061435131525, 5.5

In [30]:
data[0]

[[47.504539036025584, 4.292407029209247],
 [47.69040904564529, 4.252424765823684],
 [46.56622363785289, 3.169893299473773],
 [48.512345988527414, 3.5575115701151936],
 [47.912356200438545, 4.541688488218782],
 [47.7449114720645, 6.410987139161764],
 [49.27575216702049, 3.583654594233588],
 [47.82845322406549, 3.8219354061097293],
 [48.04493261439567, 6.31593606743332],
 [46.476548975848104, 4.6875193522227105],
 [48.22998111187808, 4.330735827816389],
 [49.09176755111292, 5.097273923224413],
 [48.27319149417738, 5.658018973640203],
 [48.122983644592075, 5.780950867261258],
 [46.44643679370932, 4.755224680829496],
 [49.29166945103235, 6.129019031044898],
 [47.001037365055055, 6.290837811317019],
 [46.8662712198302, 5.128384470003526],
 [48.60872159648292, 4.685247191774352],
 [48.847472424367105, 4.528416690851091],
 [49.82969363846115, 3.7616826932379714],
 [47.41209462975057, 5.286337622398916],
 [47.61135491854893, 4.894699684890315],
 [47.977061435131525, 5.546227988747274],
 [47.59

In [31]:
data[0][0]

[47.504539036025584, 4.292407029209247]

In [32]:
time_ = 0
N = len(data)
itensify_factor = 30
for time_entry in data:
    time_ = time_+1
    for row in time_entry:
        weight = min(np.random.uniform()*(time_/(N))*itensify_factor, 1)
        row.append(weight)

In [33]:
traffic_flow.head()

Unnamed: 0,junction_name,timestamp,volume,longitude,latitude
0,北天成路-吴韵路,2023-06-12 00:00:00,1,120.642567,31.424669
1,北天成路-吴韵路,2023-06-12 00:15:00,6,120.642567,31.424669
2,北天成路-吴韵路,2023-06-12 00:30:00,3,120.642567,31.424669
3,北天成路-吴韵路,2023-06-12 00:45:00,1,120.642567,31.424669
4,北天成路-吴韵路,2023-06-12 01:00:00,1,120.642567,31.424669


In [34]:
data

[[[47.504539036025584, 4.292407029209247, 0.08119206228563171],
  [47.69040904564529, 4.252424765823684, 0.029459762729677666],
  [46.56622363785289, 3.169893299473773, 0.28671650142980554],
  [48.512345988527414, 3.5575115701151936, 0.21243902523354166],
  [47.912356200438545, 4.541688488218782, 0.04877536410729145],
  [47.7449114720645, 6.410987139161764, 0.02380908606846566],
  [49.27575216702049, 3.583654594233588, 0.2112170384204783],
  [47.82845322406549, 3.8219354061097293, 0.10040553756656544],
  [48.04493261439567, 6.31593606743332, 0.1773643063085945],
  [46.476548975848104, 4.6875193522227105, 0.15691874165796654],
  [48.22998111187808, 4.330735827816389, 0.06613829999277396],
  [49.09176755111292, 5.097273923224413, 0.27716615457028265],
  [48.27319149417738, 5.658018973640203, 0.20379769696940994],
  [48.122983644592075, 5.780950867261258, 0.12713709875190354],
  [46.44643679370932, 4.755224680829496, 0.08973629121939901],
  [49.29166945103235, 6.129019031044898, 0.2457472

In [39]:
data = [[[traffic_flow['latitude'],traffic_flow['longitude'],traffic_flow['volume']] for index, row in traffic_flow[traffic_flow['timestamp'] == i].iterrows()] for i in traffic_flow['timestamp'].unique()]

In [46]:
m = folium.Map([31.369189, 120.642391], zoom_start=6)

hm = plugins.HeatMapWithTime(data[0])

hm.add_to(m)

m

In [86]:
import time
import requests
#the loop will pull live data every 30 secs and append it into a dataframe
cumulative = pd.DataFrame()
startTime = time.time()
for i in range(int(number_of_times)):
    url = ("https://api.data.gov.sg/v1/transport/taxi-availability")
    response = requests.get(url)
    data = response.json()
    df = pd.io.json.json_normalize(data['features'])
    coordinateslist = df['geometry.coordinates'].tolist()
    df1 = pd.DataFrame(coordinateslist)
    result = df1.transpose()
    result.columns = ['coordinates']
    result['Timestamp'] = (df['properties.timestamp'][0])
    cumulative = cumulative.append(result) 
    time.sleep(30)
endTime = time.time()
elapsedTime = endTime - startTime
print("Elapsed Time = %s" % elapsedTime,'seconds')

Elapsed Time = 305.72159600257874 seconds


In [84]:
#input how long you wish to see
number_of_times = 10

In [80]:
cumulative