In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import anvil
import geohash
from locationpublic import *
import os
import sys
import eurekacluster
import math
from motifanalysis import *
import motif2
import folium
from geopy.distance import vincenty
from collections import Counter

In [352]:
def compute_gyration_radius(significant_places_geo_hash, location_history_geohash):
    
    significant_places_gps = [geohash.decode(x) for x in significant_places_geo_hash]
    num_places = len(significant_places_gps)
    num_points = len(location_history)
    
    # construct dataframe to store significant places' info
    significant_places = pd.DataFrame(columns =['geo_hash','gps','count','gyration_radius'])
    significant_places.geo_hash = significant_places_geo_hash
    significant_places.gps = significant_places_gps
    significant_places.ix[:,'count'] = [0] * num_places
    significant_places.ix[:,'gyration_radius'] = [0] * num_places
    significant_places = significant_places.set_index('geo_hash')
    
    cnt = Counter(location_history_geohash)
    for v in cnt:
        if v in significant_places.index.tolist():
            significant_places.ix[curr_loc,'count'] = cnt[v]
            
    significant_places = significant_places.sort_values(by = 'count',ascending = False)
    
    # compute tatal radius of gyration
    N = sum(significant_places['count'].tolist())
    r_cm = np.mean(significant_places.gps.tolist(),axis = 0)
    temp_sum = 0
    for i in range(num_places):
        r = significant_places.ix[i,'gps']
        temp_sum += significant_places.ix[i,'count'] * vincenty(r,r_cm).km**2
    r_total = math.sqrt(1/N * temp_sum)
    
    # compute k-radius of gyration

    for i in range(1,num_places+1):
        N = sum(significant_places.ix[:i,'count'])
        r_cm = np.mean(significant_places.ix[:i,'gps'].tolist(),axis = 0)
        temp_sum = 0
        for j in range(i):
            r = significant_places.ix[j,'gps']
            temp_sum += significant_places.ix[j,'count'] * vincenty(r,r_cm).km**2
        significant_places.ix[i-1,'gyration_radius'] = math.sqrt(1/N * temp_sum)
    
    return(r_total, significant_places.gyration_radius.tolist(),significant_places)

In [33]:
uid = 'u066_rct@eureka.csv'
eureka = pd.read_csv(uid, usecols=['time', 'longitude', 'latitude'])
num_points = len(eureka)

In [43]:
eureka = anvil.api.convert_time_zone(df = eureka,column_name = 'time',should_localize = 'America/New_York',
                                    sort_index = True, to_timezone = 'America/New_York')

In [65]:
eureka['geo_hash'] = compute_geo_hash(eureka, lat_c='latitude',lon_c='longitude', precision=7)
eureka_hash = pd.Series(filter_out_rare_points(eureka.geo_hash))
l = eureka_hash.dropna().unique().tolist()

In [110]:
x = [geohash.decode(t) for t in l]
num_places = len(l)

In [340]:
r_total, r_k, df = compute_gyration_radius(l,eureka.geo_hash.tolist())

In [344]:
print(r_total)
print()
print(df)

141.45636369259844

                                              gps  count       gyration_radius
geo_hash                                                                      
dr5xdrx   (40.73524475097656, -73.71894836425781)  10205  NaN         0.000000
dru87xe   (43.67958068847656, -72.26325988769531)    152  NaN       174.433175
dr5xdrw   (40.73524475097656, -73.72032165527344)    129  NaN       118.816577
dr5xdrz   (40.73661804199219, -73.71894836425781)    108  NaN        92.143053
dru87xd   (43.67958068847656, -72.26463317871094)     79  NaN       141.456364
