# Heat map for each week's clusters

In [10]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.cluster as cluster
import time
import pandas as pd
from datetime import datetime as dt
%matplotlib inline
sns.set_context('poster')
sns.set_color_codes()
plot_kwds = {'alpha' : 0.25, 's' : 80, 'linewidths':0}

#load the dataset
#convert the unix time into the format of YearMonthDay
data=pd.read_csv('nari_dynamic.csv')
data['weeks'] = data['time'].map(lambda x: dt.fromtimestamp(x).strftime('%Y%m%d'))
data.drop(['id','time','speedoverground','courseoverground','trueheading','navigationalstatus','rateofturn'], axis=1, inplace=True)
data.head()

Unnamed: 0,lon,lat,weeks
0,-4.465718,48.38249,20151001
1,-4.496571,48.38242,20151001
2,-4.644325,48.092247,20151001
3,-4.485108,48.38132,20151001
4,-4.495441,48.38366,20151001


In [11]:
#creatign a dataframe with the weeks as rows and the days columns
weeks=data['weeks']
weeks=weeks.unique()
weeks_df=pd.DataFrame(weeks[:182].reshape(26,7))
weeks_df

Unnamed: 0,0,1,2,3,4,5,6
0,20151001,20151002,20151003,20151004,20151005,20151006,20151007
1,20151008,20151009,20151010,20151011,20151012,20151013,20151014
2,20151015,20151016,20151017,20151018,20151019,20151020,20151021
3,20151022,20151023,20151024,20151025,20151026,20151027,20151028
4,20151029,20151030,20151031,20151101,20151102,20151103,20151104
5,20151105,20151106,20151107,20151108,20151109,20151110,20151111
6,20151112,20151113,20151114,20151115,20151116,20151117,20151118
7,20151119,20151120,20151121,20151122,20151123,20151124,20151125
8,20151126,20151127,20151128,20151129,20151130,20151201,20151202
9,20151203,20151204,20151205,20151206,20151207,20151208,20151209


In [41]:
import hdbscan

#clustering algorithm from scitkit learn
def plot_clusters(data, algorithm, args, kwds):
    labels = algorithm(*args, **kwds).fit_predict(data)
    return labels

In [42]:
import folium 
from folium import plugins
from folium.plugins import HeatMap
from collections import Counter

fm = folium.Map(location=[48.38249, -4.4657183],
                        zoom_start=8)

ht_lst=[]

In [43]:
#loop throught all the weeks
for index, row in weeks_df.iterrows():

    #create a subset of each week at each iteration with the attributes lan and lon
    df_week1=data.copy()

    df_week1 = df_week1[df_week1['weeks'].astype(int)>=int(row[0])] 
    df_week1 = df_week1[['lat', 'lon','weeks']]
    df_week1 = df_week1.dropna(axis=0, subset=['lat', 'lon','weeks'])

    df_week1 = df_week1[df_week1['weeks'].astype(int)<=int(row[6])] 
    df_week1 = df_week1[['lat', 'lon']]
    df_week1 = df_week1.dropna(axis=0, subset=['lat', 'lon'])
    
    df_week1=df_week1.values

    d=df_week1
    
    #cluster each week with HDBSCAN
    x=plot_clusters(d, hdbscan.HDBSCAN, (), {'min_cluster_size':50})
    xs=pd.Series(x)
    
    temp=pd.DataFrame(xs.value_counts())
    temp['l']=temp.index
    temp.columns = ['count', 'label']
    
    #Filter the data of the clusteing 
    temp=temp[temp['count'] >=1000]
    temp=temp[temp['label']!=-1]
    
    df=pd.DataFrame(df_week1)
    df.columns = ['lat', 'lon']
    
    #new dataset for the clustered data of each week
    df['label']=x
    df = df[df.label != -1]
    ht_map=pd.DataFrame()
    for i in temp['label']:
        df_=df.copy()

        df_ = df_[df_['label']==i]
        df_ = df_[['lat', 'lon']]
        df_ = df_.dropna(axis=0, subset=['lat','lon'])
        
        ht_map=ht_map.append(df_)
    ht_lst.append(ht_map.values.tolist())
    print(index)



0




1




2




3




4




5




6




7




8




9




10




11




12




13




14




15




16




17




18




19




20




21




22




23




24




25


In [44]:
ht_lst

[[[48.066086, -4.752425],
  [48.065834, -4.752868],
  [48.06556, -4.7533417],
  [48.065304, -4.753762999999999],
  [48.065094, -4.754096499999999],
  [48.0648, -4.754569999999999],
  [48.06454, -4.755007],
  [48.064293, -4.755456400000001],
  [48.064026, -4.755962],
  [48.063774, -4.756407],
  [48.06358, -4.756757],
  [48.06328, -4.757185],
  [48.062977000000004, -4.7575335999999995],
  [48.06266, -4.757865],
  [48.062313, -4.758215],
  [48.061993, -4.75855],
  [48.061737, -4.75882],
  [48.06138, -4.7591934],
  [48.061073, -4.759525],
  [48.060764, -4.7598286],
  [48.060413, -4.7602],
  [48.060093, -4.760546700000001],
  [48.059845, -4.76082],
  [48.059486, -4.7612185],
  [48.05916, -4.7615266],
  [48.058807, -4.761825],
  [48.058456, -4.7621082999999995],
  [48.058075, -4.7623935],
  [48.057724, -4.762630000000001],
  [48.05742, -4.76285],
  [48.05707, -4.763105],
  [48.05672, -4.7633667],
  [48.05637, -4.76361],
  [48.056057, -4.7638370000000005],
  [48.055653, -4.764141599999999],
 

In [45]:
#interactive heat map with layer for each week' clusters
hm = plugins.HeatMapWithTime(ht_lst,auto_play=True,max_opacity=0.8)
print('0')
hm.add_to(fm)
print('1')
fm.save("all_weeks26_clust1000.html")
print('2')

0
1
2
