In [10]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import anvil
import geohash
from locationpublic import *
import os
import sys
import eurekacluster
import math
from motifanalysis import *
import motif2
import folium
from geopy.distance import vincenty
from collections import Counter

In [53]:
def compute_gyration_radius(significant_places_geo_hash, location_history_geohash):
    """
    This function computes the total radius of gyration and k-radius gyration for an individual
    based on visitied locations.
    
    The total radius of gyration is used to characterize the typical distance travelled by an individual.
    The k-radius of gyration is the radius of gyration computed over the k-th most frequent locations.
    
    Parameters:
    -----------
    
    significant_places_geo_hash: a list of strings
        Geohash strings that store the visited locations/significant locations.
    
    location_history_geohash: a list of strings
        A list of geohash values that represents the full location history/gps record of an indiviual in time series.
    
    returns:
    --------
    float:
        The total radius of gyration
    DataFrame:
        k-th radius of gyration in descending order of frequented locations
    DataFrame:
        info for significant locations including geohash values, gps coordinates, count, k-th radius of gyration
        
    """
    significant_places_gps = [geohash.decode(x) for x in significant_places_geo_hash]
    num_places = len(significant_places_gps)
    num_points = len(location_history_geohash)
    
    # construct dataframe to store significant places' info
    significant_places = pd.DataFrame(columns =['geo_hash','gps','count','cnt_pct','gyration_radius','k'])
    significant_places.geo_hash = significant_places_geo_hash
    significant_places.gps = significant_places_gps
    significant_places.ix[:,'count'] = [0] * num_places
    significant_places.ix[:,'gyration_radius'] = [0] * num_places
    significant_places = significant_places.set_index('geo_hash')
    
    cnt = Counter(location_history_geohash)
    for v in cnt:
        if v in significant_places.index.tolist():
            significant_places.ix[v,'count'] = cnt[v]
            significant_places.ix[v,'cnt_pct'] = cnt[v] / num_points
            
    significant_places = significant_places.sort_values(by = 'count',ascending = False)
    significant_places.ix[:,'k'] = [ x+1 for x in range(num_places)]
    
    # compute tatal radius of gyration
    N = sum(significant_places['count'].tolist())
    r_cm = np.mean(significant_places.gps.tolist(),axis = 0)
    temp_sum = 0
    for i in range(num_places):
        r = significant_places.ix[i,'gps']
        temp_sum += significant_places.ix[i,'count'] * vincenty(r,r_cm).km**2
    r_total = math.sqrt(1/N * temp_sum)
    
    # compute k-radius of gyration

    for i in range(1,num_places+1):
        N = sum(significant_places.ix[:i,'count'])
        r_cm = np.mean(significant_places.ix[:i,'gps'].tolist(),axis = 0)
        temp_sum = 0
        for j in range(i):
            r = significant_places.ix[j,'gps']
            temp_sum += significant_places.ix[j,'count'] * vincenty(r,r_cm).km**2
        significant_places.ix[i-1,'gyration_radius'] = math.sqrt(1/N * temp_sum)
    
    significant_places['ratio_k'] = [ x/r_total for x in significant_places.gyration_radius.tolist()]
    
    return(r_total,significant_places.ix[:,['gyration_radius','k','ratio_k']].set_index('k'),significant_places)

In [13]:
uid = 'u066_rct@eureka.csv'
eureka = pd.read_csv(uid, usecols=['time', 'longitude', 'latitude'])
num_points = len(eureka)

In [16]:
# eureka = anvil.api.convert_time_zone(df = eureka,column_name = 'time',should_localize = 'America/New_York',
#                                     sort_index = True, to_timezone = 'America/New_York')
eureka = anvil.api.convert_time_zone(df = eureka,column_name = 'time')

In [17]:
eureka['geo_hash'] = compute_geo_hash(eureka, lat_c='latitude',lon_c='longitude', precision=7)
eureka_hash = pd.Series(filter_out_rare_points(eureka.geo_hash))
l = eureka_hash.dropna().unique().tolist()

In [54]:
r_total, r_k, df = compute_gyration_radius(l,eureka.geo_hash.tolist())

In [55]:
print(r_total)
print()
print(r_k)
print()
print(df)

141.45636369259844

   gyration_radius   ratio_k
k                           
1         0.000000  0.000000
2       174.433175  1.233124
3       118.816577  0.839952
4        92.143053  0.651389
5       141.456364  1.000000

                                              gps  count     cnt_pct  \
geo_hash                                                               
dr5xdrx   (40.73524475097656, -73.71894836425781)  10205     0.92445   
dru87xe   (43.67958068847656, -72.26325988769531)    152   0.0137694   
dr5xdrw   (40.73524475097656, -73.72032165527344)    129   0.0116858   
dr5xdrz   (40.73661804199219, -73.71894836425781)    108  0.00978349   
dru87xd   (43.67958068847656, -72.26463317871094)     79  0.00715645   

          gyration_radius  k   ratio_k  
geo_hash                                
dr5xdrx          0.000000  1  0.000000  
dru87xe        174.433175  2  1.233124  
dr5xdrw        118.816577  3  0.839952  
dr5xdrz         92.143053  4  0.651389  
dru87xd        141.456364