In [1]:
import pandas as pd
from geopy import distance
from tqdm import tqdm
import gmplot
from matplotlib import pyplot as plt
from sklearn.cluster import DBSCAN
import numpy as np
import requests

In [4]:
filepath = "/home/yash/Documents/CB/location_data/"
filename = "original_points/User0.json"

# Less -> More accuracy
accuracy_threshold = 50

# In seconds
tdiff_threshold = 1200

# In metres
geodist_threshold = 50

visit_threshold = 300

In [5]:
# Read file into dataframe
df = pd.read_json(filepath+filename, orient='records')

# Drop useless columns
df.drop(['activity', 'altitude', 'verticalAccuracy', 'heading', 'velocity'], axis=1, inplace=True)

# Rename weirdly named columns
df.rename(index = int, columns = {'latitudeE7':'latitude',
                                  'longitudeE7': 'longitude',
                                  'timestampMs' : 'timestamp'}, inplace =True)

# Apply accuracy filter
df = df[df['accuracy'] <= accuracy_threshold]

# Apply the time range filter
df = df[df['timestamp'].dt.year==2018]

# Arrange the dataframe in ascending time order
df = df.reindex(index=df.index[::-1])
df['index'] = list(range(df.shape[0])) 
df.set_index('index', inplace=True)

# Initialise time difference and geodist columns
df['tdiff'] = 0
df['geodist'] = 0

# Lat and Long in the form 19045972, 73025802
# Convert to 19.045972, 73.025802
df['latitude'] = df['latitude'].apply(lambda x: x/10000000)
df['longitude'] = df['longitude'].apply(lambda x: x/10000000)
df['location'] = list(zip(df.latitude, df.longitude))

# Add additional date and time related columns
df['day'] = df['timestamp'].dt.weekday
df['week'] = df['timestamp'].dt.week
df['month'] = df['timestamp'].dt.month
df['year'] = df['timestamp'].dt.year

df

Unnamed: 0_level_0,accuracy,latitude,longitude,timestamp,tdiff,geodist,location,day,week,month,year
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,50,19.053380,72.847517,2018-01-03 10:57:29.239,0,0,"(19.0533798, 72.8475175)",2,1,1,2018
1,19,19.053036,72.847732,2018-01-03 10:57:45.000,0,0,"(19.0530355, 72.8477323)",2,1,1,2018
2,4,19.052977,72.847690,2018-01-03 10:58:01.000,0,0,"(19.0529768, 72.8476905)",2,1,1,2018
3,3,19.052977,72.847691,2018-01-03 10:58:17.000,0,0,"(19.0529767, 72.8476906)",2,1,1,2018
4,3,19.052975,72.847690,2018-01-03 10:58:33.000,0,0,"(19.0529748, 72.84769)",2,1,1,2018
5,3,19.052863,72.847347,2018-01-03 10:59:14.000,0,0,"(19.052863, 72.8473465)",2,1,1,2018
6,4,19.053597,72.846182,2018-01-03 10:59:36.000,0,0,"(19.0535968, 72.8461824)",2,1,1,2018
7,15,19.042293,72.840299,2018-01-03 11:01:41.000,0,0,"(19.0422935, 72.8402987)",2,1,1,2018
8,8,19.040786,72.841125,2018-01-03 11:01:56.000,0,0,"(19.0407862, 72.841125)",2,1,1,2018
9,23,19.033604,72.838718,2018-01-03 11:05:58.000,0,0,"(19.0336035, 72.8387185)",2,1,1,2018


In [6]:
# Returns time difference between 2 timestamps in seconds
def timeDiff(t1, t2):
    return round(pd.Timedelta(t2 - t1).seconds)

def geoDist(l1, l2):
    return round(distance.vincenty(l1, l2).m )

In [7]:
# Generate two series objects from the timestamps in such a way that both can be subtracts(timeDiff)
# Combine the two series objects wuth the function tdiff

ts = df['timestamp']
ts1 = ts.shift(periods=1)[1:]
ts2 = ts[1:]

df['tdiff'] = ts1.astype(object).combine(ts2, func=timeDiff)
df['tdiff'][0] = 0

location = df['location']
location1 = location.shift(periods=1)[1:]
location2 = location[1:]

df['geodist'] = location1.astype(object).combine(location2, func=geoDist)
df['geodist'][0] = 0

df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


Unnamed: 0_level_0,accuracy,latitude,longitude,timestamp,tdiff,geodist,location,day,week,month,year
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,50,19.053380,72.847517,2018-01-03 10:57:29.239,0,0,"(19.0533798, 72.8475175)",2,1,1,2018
1,19,19.053036,72.847732,2018-01-03 10:57:45.000,15,44,"(19.0530355, 72.8477323)",2,1,1,2018
2,4,19.052977,72.847690,2018-01-03 10:58:01.000,16,8,"(19.0529768, 72.8476905)",2,1,1,2018
3,3,19.052977,72.847691,2018-01-03 10:58:17.000,16,0,"(19.0529767, 72.8476906)",2,1,1,2018
4,3,19.052975,72.847690,2018-01-03 10:58:33.000,16,0,"(19.0529748, 72.84769)",2,1,1,2018
5,3,19.052863,72.847347,2018-01-03 10:59:14.000,41,38,"(19.052863, 72.8473465)",2,1,1,2018
6,4,19.053597,72.846182,2018-01-03 10:59:36.000,22,147,"(19.0535968, 72.8461824)",2,1,1,2018
7,15,19.042293,72.840299,2018-01-03 11:01:41.000,125,1396,"(19.0422935, 72.8402987)",2,1,1,2018
8,8,19.040786,72.841125,2018-01-03 11:01:56.000,15,188,"(19.0407862, 72.841125)",2,1,1,2018
9,23,19.033604,72.838718,2018-01-03 11:05:58.000,242,834,"(19.0336035, 72.8387185)",2,1,1,2018


In [8]:
# Start processing each row and compare with the previous row's tdiff and geodist
# If it fits within tdiff and geodist bounds, consider the point to be a part of the staypoint

# Set the first entry to be the parent SP

df['visit_time'] = 0

parent = 0
visit_time = 0

for i in tqdm(df.index[1:]):
    
    # Check if the entry's tdiff and geodist fall within constraints
    
    # If yes, add its time to total time at that SP
    if (df['geodist'][i]<=geodist_threshold and df['tdiff'][i]<=tdiff_threshold):
        visit_time += df['tdiff'][i]
        df.drop(i, inplace=True)
    
    # If no, make the parent entry SP and set this to be the parent entry
    else:
        
        if visit_time < visit_threshold:
            df.drop(parent, inplace=True)
            
        else:
            # append parent to final df
            df['visit_time'][parent] = visit_time

        # Set new parent
        parent = i
        visit_time = 0

df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
100%|██████████| 32188/32188 [01:46<00:00, 301.05it/s]


Unnamed: 0_level_0,accuracy,latitude,longitude,timestamp,tdiff,geodist,location,day,week,month,year,visit_time
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
10,28,19.033205,72.838990,2018-01-03 11:12:44.470,406,53,"(19.0332048, 72.8389898)",2,1,1,2018,876
14,25,19.033091,72.838848,2018-01-03 11:48:32.737,1271,20,"(19.033091, 72.838848)",2,1,1,2018,1197
22,20,19.055118,72.852087,2018-01-03 12:45:27.283,145,74,"(19.0551184, 72.8520868)",2,1,1,2018,1571
33,20,19.055118,72.852087,2018-01-03 14:45:06.261,5604,0,"(19.0551184, 72.8520868)",2,1,1,2018,5716
67,27,19.136537,72.831541,2018-01-06 11:34:30.178,29,102,"(19.1365367, 72.8315405)",5,1,1,2018,407
98,5,19.098726,72.845997,2018-01-10 09:03:26.000,15,66,"(19.0987264, 72.8459972)",2,2,1,2018,2849
252,24,19.097369,72.850002,2018-01-10 09:51:56.578,58,80,"(19.097369, 72.8500018)",2,2,1,2018,803
265,50,19.054599,72.852072,2018-01-10 10:25:34.945,405,63,"(19.054599, 72.852072)",2,2,1,2018,9039
320,26,19.054540,72.852506,2018-01-10 14:10:53.768,141,84,"(19.05454, 72.8525063)",2,2,1,2018,5083
344,35,19.054768,72.852029,2018-01-10 20:37:02.385,18073,28,"(19.0547683, 72.8520291)",2,2,1,2018,13675


In [16]:
df['index'] = list(range(df.shape[0])) 
df.set_index('index', inplace=True)

# Generate two series objects from the timestamps in such a way that both can be subtracts(timeDiff)
# Combine the two series objects with the function tdiff

ts = df['timestamp']
ts1 = ts.shift(periods=1)[1:]
ts2 = ts[1:]

df['tdiff'] = ts1.astype(object).combine(ts2, func=timeDiff)
df['tdiff'] = df['tdiff'] - df['visit_time'].shift(1)[1:]
df['tdiff'][0] = 0

location = df['location']
location1 = location.shift(periods=1)[1:]
location2 = location[1:]

df['geodist'] = location1.astype(object).combine(location2, func=geoDist)
df['geodist'][0] = 0

df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,accuracy,latitude,longitude,timestamp,tdiff,geodist,location,day,week,month,year,visit_time,cluster_id,tag
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,28,19.033205,72.838990,2018-01-03 11:12:44.470,0,0,"(19.0332048, 72.8389898)",2,1,1,2018,876,0,Unknown
1,25,19.033091,72.838848,2018-01-03 11:48:32.737,1272,20,"(19.033091, 72.838848)",2,1,1,2018,1197,0,Unknown
2,20,19.055118,72.852087,2018-01-03 12:45:27.283,2217,2808,"(19.0551184, 72.8520868)",2,1,1,2018,1571,-1,Unknown
3,20,19.055118,72.852087,2018-01-03 14:45:06.261,5607,0,"(19.0551184, 72.8520868)",2,1,1,2018,5716,0,Unknown
4,27,19.136537,72.831541,2018-01-06 11:34:30.178,69247,9268,"(19.1365367, 72.8315405)",5,1,1,2018,407,-1,Unknown
5,5,19.098726,72.845997,2018-01-10 09:03:26.000,76928,4453,"(19.0987264, 72.8459972)",2,2,1,2018,2849,-1,Unknown
6,24,19.097369,72.850002,2018-01-10 09:51:56.578,61,447,"(19.097369, 72.8500018)",2,2,1,2018,803,0,Unknown
7,50,19.054599,72.852072,2018-01-10 10:25:34.945,1215,4739,"(19.054599, 72.852072)",2,2,1,2018,9039,-1,Unknown
8,26,19.054540,72.852506,2018-01-10 14:10:53.768,4479,46,"(19.05454, 72.8525063)",2,2,1,2018,5083,0,Unknown
9,35,19.054768,72.852029,2018-01-10 20:37:02.385,18085,56,"(19.0547683, 72.8520291)",2,2,1,2018,13675,0,Unknown


In [43]:
"""
df.to_csv(filepath+'stay_points/Lx2018SP.csv')

gmap = gmplot.GoogleMapPlotter(19.05, 72.8, 16)
gmap.scatter(df['latitude'], df['longitude'], 'black' , marker=True)
gmap.draw('CB2018SP.html')
del gmap
"""

"\ngmap = gmplot.GoogleMapPlotter(19.05, 72.8, 16)\ngmap.scatter(df['latitude'], df['longitude'], 'black' , marker=True)\ngmap.draw('CB2018SP.html')\ndel gmap\n"

In [10]:
# Form clusters
# Takes dataframe of staypoints as parameter and adds a column cluster_id to it
# Uses DBSCAN, param: min_smaples and epsilon
def form_clusters(df, index):
  coords = df.iloc[index].as_matrix(columns=['latitude', 'longitude'])
  
  kms_per_radian = 6371.0088
  epsilon = 0.03 / kms_per_radian
  db = DBSCAN(eps=epsilon, min_samples=2, algorithm='ball_tree', metric='haversine').fit(np.radians(coords))
  cluster_labels = db.labels_
  num_clusters = len(set(cluster_labels))
  
  df.loc[index,'cluster_id'] = cluster_labels
  
  print('Number of clusters: {}'.format(num_clusters))

In [19]:
df['cluster_id'] = 0
df['tag'] = "Unknown"

dic = {}

for year in df.year.drop_duplicates():
    
    for week in df[df['year']==year].week.drop_duplicates():
        
        temp = df.loc[ (df['year']==year) & (df['week']==week) ]
        form_clusters(df, temp.index)
        temp = df.loc[ (df['year']==year) & (df['week']==week) ]
        
        if temp.cluster_id.max() >= 1:
        
            # Create a df to reduce the clusters to centroids

            centroids = pd.DataFrame()
            centroids['cluster_id'] = list(range(temp.cluster_id.max()+1)) 
            centroids.set_index('cluster_id', inplace=True)
            centroids['latitude'] = 0.0
            centroids['longitude'] = 0.0
            centroids['week_visit_time'] = 0
            centroids['week_frequency'] = 0
            centroids['tag'] = "Unknown"

            for i in centroids.index:
                rows = temp[temp['cluster_id']==i]
                centroids['latitude'][i] = rows['latitude'].mean()
                centroids['longitude'][i] = rows['longitude'].mean()
                centroids['week_visit_time'][i] = rows['visit_time'].sum()
                centroids['week_frequency'][i] = rows.shape[0]

            print(centroids)
            
            home_id = centroids['week_visit_time'].idxmax()
            t = centroids.drop(home_id)
            work_id = t['week_visit_time'].idxmax()
            print(home_id, work_id)
            df.loc[(df['year']==year) & (df['week']==week) & (df['cluster_id']==home_id) , 'tag'] = 'Home'
            df.loc[(df['year']==year) & (df['week']==week) & (df['cluster_id']==work_id) , 'tag']= 'Work'
            
df['tag'].value_counts()

Number of clusters: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.033148  72.838919             2073               2
1           19.055118  72.852087             7287               2
1 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a

Number of clusters: 2
Number of clusters: 6
             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.107226  72.837036            94478               8
1           19.054746  72.852201            50781              23
2           19.054610  72.851601            20438               7
3           19.015464  72.828173             1445               2
4           19.086206  72.888870             5000               2
0 1
Number of clusters: 8
             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054748  72.852132            34317              19
1           19.107224  72.837042            67470               6
2           19.108114  72.839013             2972               3
3           19.054550  72.851562            15475               6
4           19.106899  72.836874            27364               2
5     

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054643  72.852210            12929               8
1           19.107241  72.837057            63457               4
2           19.055170  72.852116            34842               6
1 2
Number of clusters: 5

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054652  72.851935             2644               2
1           19.107148  72.836966            49758               3
2           19.055058  72.852136            55643               6
3           19.054447  72.852502             2273               2
2 1
Number of clusters: 5
             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.055184  72.852148           157012              23
1           19.054657  72.852239            12595               6
2           19.012815  72.825158              970               2
3           19.107178  72.836979            70151               5
0 3
Number of clusters: 7


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a

             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.055022  72.852093            71058              12
1           19.055455  72.851010            39959               8
2           19.054634  72.852246            48821              12
3           19.065363  72.858951              721               2
4           19.012720  72.825550             5656               5
5           19.107239  72.837087            67674               5
0 5
Number of clusters: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054639  72.852164           182064              30
1           19.107212  72.837115            72800               9
0 1
Number of clusters: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054690  72.852166           120130              25
1           19.107207  72.837108           123592              11
2           19.045290  73.082461            48811               2
1 0
Number of clusters: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054749  72.852114           149080              46
1           19.107182  72.836989           115965              12
2           19.022509  72.856394             4772               2
0 1
Number of clusters: 6


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.107359  72.837154           115999              14
1           19.054705  72.852090           113830              24
2           19.086460  72.889545             6619               4
3           19.110029  72.837517            10790               4
4           19.086271  72.888677             3865               2
0 1
Number of clusters: 7


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054772  72.852115           239246              42
1           19.022230  72.855739            34975               6
2           19.086426  72.889464             4966               2
3           19.086698  72.889922             5322               2
4           19.022153  72.856304            14405               3
5           19.022358  72.855432            21835               2
0 1
Number of clusters: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054698  72.852189           290293              42
1           19.107087  72.836876            78491               6
2           19.107192  72.837208            38043               3
0 1
Number of clusters: 3

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.107179  72.837028            91667               7
1           19.054665  72.852149           200621              37
1 0
Number of clusters: 5
             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054631  72.852174           171226              42
1           19.107147  72.837051            99702              13
2           19.110054  72.837600             5742               2
3           19.108375  72.838605             4787               2
0 1
Number of clusters: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a

             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054634  72.852110            70542              19
1           19.107181  72.837114           130382              11
1 0
Number of clusters: 3
             latitude  longitude  week_visit_time  week_frequency
cluster_id                                                       
0           19.054751  72.852060            48349              16
1           19.107123  72.837125            64151               6
1 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [27]:
df['cluster_id'] = 0
df['tag'] = "Unknown"

dic = 0

for year in df.year.drop_duplicates():
    
    for week in df[df['year']==year].week.drop_duplicates():
        
        temp = df.loc[ (df['year']==year) & (df['week']==week) ]
        form_clusters(df, temp.index)
        temp = df.loc[ (df['year']==year) & (df['week']==week) ]
        
        if temp.cluster_id.max() >= 1:
        
            # Create a df to reduce the clusters to centroids

            centroids = pd.DataFrame()
            centroids['cluster_id'] = list(range(temp.cluster_id.max()+1)) 
            centroids.set_index('cluster_id', inplace=True)
            centroids['latitude'] = 0.0
            centroids['longitude'] = 0.0
            centroids['week_visit_time'] = 0
            centroids['week_frequency'] = 0
            centroids['tag'] = "Unknown"

            for i in centroids.index:
                rows = temp[temp['cluster_id']==i]
                lat = centroids['latitude'][i] = rows['latitude'].mean()
                long = centroids['longitude'][i] = rows['longitude'].mean()
                centroids['week_visit_time'][i] = rows['visit_time'].sum()
                centroids['week_frequency'][i] = rows.shape[0]
                
                dic+=1
                places = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=' + str(lat) + ',' + str(long) 
                       + '&rankby=distance'+'&key=AIzaSyBdqlVT2vWz9wu_olDovWYI561da6QtKyk').json()['results']

            
            
            
print(dic)            
#df['tag'].value_counts()

Number of clusters: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 2
Number of clusters: 6


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 8


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 7


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 6


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 7


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of clusters: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


63


In [35]:
requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=19.054737,72.852247&radius=50&key=AIzaSyBdqlVT2vWz9wu_olDovWYI561da6QtKyk').json()['results']

[{'geometry': {'location': {'lat': 19.0759837, 'lng': 72.8776559},
   'viewport': {'northeast': {'lat': 19.2716339, 'lng': 72.9864994},
    'southwest': {'lat': 18.8928676, 'lng': 72.7758729}}},
  'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/geocode-71.png',
  'id': '2e66c82c2e8d7149bd7e026089ff5dedb8346bd5',
  'name': 'Mumbai',
  'photos': [{'height': 1360,
    'html_attributions': ['<a href="https://maps.google.com/maps/contrib/112673460654122926685/photos">Jaideep Chaudhary</a>'],
    'photo_reference': 'CmRaAAAAMKLgWkkrGrm2z0MbzRfZLMnsTdDo5FO4Pl1vbixBnLXV_NJMc9DkpfCY5d--aZ0R5yBtW1c_mJsOw1ouzm1HG36E-6VpTuX5E_fM0Tt7xQhML16oS-NDeCg9H3jfKH8yEhBIf0-snZSjbdqYXD5CdwvoGhQsrkk3AoDFN9IwgUHFc_4pA_GZcg',
    'width': 2048}],
  'place_id': 'ChIJwe1EZjDG5zsRaYxkjY_tpF0',
  'reference': 'CmRbAAAAEO64paVp2sljMyjY2aMsWMtxY6NpQb-ys1HrVTRE65pWgdbzREQgQLns_fWIfs98XT9CCPE5OppebbW4_nfOMPfNAFM5b2o02sXYAySPCW0c9BlCQn3mAB-yWOMpf0IiEhD62Q_8z5gpUp8PPbtxw3TzGhT7QQHnz2yofmTX731dKBo1ahGpNw',
  'sc