# Preparing Google Location History Data for Spatial Analysis

In [12]:
import os
import sys
import json
from pandas import DataFrame
from pprint import pprint
from datetime import datetime as dt
from zipfile import ZipFile, ZIP_DEFLATED

In [34]:
import arcpy
from arcpy import env
arcpy_version = arcpy.GetInstallInfo()['Version']
print ( 'arcpy version: {0}'.format(arcpy_version) )

arcpy version: 1.4


In [4]:
# Path to your downloaded data
location_history = os.path.join(r"C:\Users\admin\Documents\GitHub\location-history-heatmap\takeout-20170424T162558Z-001.zip")

location_history_exists = os.path.exists(location_history)
print ( 'Location History Exists: {0}'.format(location_history_exists) )
print ( 'Absolute Path to Location History:\n\t{0}'.format(location_history) )
dir_path = os.path.dirname(location_history)
print ( 'Path to Location History Parent Directory:\n\t{0}'.format(dir_path) )

Location History Exists: True
Absolute Path to Location History:
	C:\Users\admin\Documents\GitHub\location-history-heatmap\takeout-20170424T162558Z-001.zip
Path to Location History Parent Directory:
	C:\Users\admin\Documents\GitHub\location-history-heatmap


In [33]:
uncompressed_folder_name = "google_location_history"
path_to_uncompressed_folder = os.path.join(dir_path, uncompressed_folder_name)

if not os.path.exists(path_to_uncompressed_folder):
    os.mkdir(path_to_uncompressed_folder)

uncompress_folder_valid = os.path.exists(path_to_uncompressed_folder)
print ( 'Path to New Uncompressed Folder is Valid: {0}'.format(uncompress_folder_valid) )
print ( 'Path to New Uncompressed Folder:\n\t{0}'.format(path_to_uncompressed_folder) )
os.chdir(path_to_uncompressed_folder)
print ( 'Successfully Changed Current Working Directory: {0}'.format(os.getcwd() == path_to_uncompressed_folder))

Path to New Uncompressed Folder is Valid: True
Path to New Uncompressed Folder:
	C:\Users\admin\Documents\GitHub\location-history-heatmap\google_location_history
Successfully Changed Current Working Directory: True


In [13]:
# Unzip compressed data folder

compressed_location_history = ZipFile(location_history)
compressed_location_history.extractall()

In [6]:
"""
Function that finds the file LocationHistory.json in the uncompressed directory of downloaded data
"""

def getGoogleLocationHistoryJSON(inDir):
    for root, dirs, files in os.walk(inDir):
        for file in files:
            if file == 'LocationHistory.json':
                return os.path.join(root, file)
    return None


loc_hist_json = getGoogleLocationHistoryJSON(os.getcwd())
print ( 'Path to Location History: {}'.format(loc_hist_json) )

Path to Location History: C:\Users\admin\Documents\GitHub\location-history-heatmap\google_location_history\Takeout\Location History\LocationHistory.json


In [7]:
"""
Using the python std library json. Load json file to python dictionary.
"""

def loadJSONtoDict(inJSON):
    with open(inJSON, 'r') as data_file:
        return json.load(data_file)

# Loading JSON to dict
loc_hist_dict = loadJSONtoDict(loc_hist_json)

# The top level of the data only has one key which is 'locations'
print ( 'JSON Top Keys: {0}'.format(list(loc_hist_dict.copy().keys())) )

# Assign the value (list) of the 'locations' key 
locations_list = loc_hist_dict['locations']

# Find the number of locations present in data
num_locations = len(locations_list)
print ( 'Number of Locations: {0}'.format(num_locations) )

JSON Top Keys: ['locations']
Number of Locations: 282677


In [8]:
'''
This is model that Google uses for tracking your location so let's try to use the most of this data as we can.
'''

model = {
    "timestampMs": "1486008038391",
    "accuracy": 20,
    "altitude": 74,
    "heading": 81,
    "latitudeE7": 38.889282,
    "longitudeE7": -77.049171,
    "velocity": 16,
    "activitys": [{
        "timestampMs": "1486008038391",
        "extras": [ {
            "type" : "value",
            "name" : "vehicle_personal_confidence",
            "intVal" : 100
        }],
        "activities": [{
            "type" : "still",
            "confidence" : 55
        }, {
            "type" : "walking",
            "confidence" : 25
        }, {
            "type" : "running",
            "confidence" : 20
        }, {
            "type" : "onFoot",
            "confidence" : 40
        }, {
            "type" : "onBicycle",
            "confidence" : 80
        }, {
            "type" : "inVehicle",
            "confidence" : 85
        }, {
            "type" : "exitingVehicle",
            "confidence" : 15
        }, {
            "type" : "tilting",
            "confidence" : 50
        }, {
            "type" : "unknown",
            "confidence" : 30
        }]
    }]
}

In [118]:
''' 
Let's setup some getter to pull the information out of the dictionary in a reliable fashion. 
Either we are going to get the expected data or the getter will return a None type.
'''

def getDate(inLoc):
    ''' A shapefile stores dates in a date field with this format: yyyy-mm-dd '''
    try:
        evt_dt = dt.fromtimestamp(int(inLoc['timestampMs']) / 1000)
        return evt_dt.strftime('%Y-%m-%d')
    except:
        return '0000-01-01'
    
def getDateTime(inLoc):
    ''' A geodatabase formats the date as datetime yyyy-mm-dd hh:mm:ss AM or PM '''
    try:
        evt_dt = dt.fromtimestamp(int(inLoc['timestampMs']) / 1000)
        return evt_dt.strftime('%Y-%m-%d %H:%M:%S %p')
    except:
        return None
    
def getAccuracy(inLoc):
    try:
        return inLoc['accuracy']
    except:
        return 0
    
def getAltitude(inLoc):
    try:
        return inLoc['altitude']
    except:
        return 0

def getHeading(inLoc):
    try:
        return inLoc['heading']
    except:
        return 0
    
def getVelocity(inLoc):
    try:
        return inLoc['velocity']
    except:
        return 0

def getLatitude(inLoc):
    try:
        return int(inLoc['latitudeE7']) / 10000000.0
    except:
        return 0.0
    
def getLongitude(inLoc):
    try:
        return int(inLoc['longitudeE7']) / 10000000.0
    except:
        return 0.0

def getActivitys(inLoc):
    try:
        return inLoc['activitys'][0]
    except:
        return None
    
def getActivities(inLoc):
    try:
        activitys = getActivitys(inLoc)
        return activitys['activities']
    except:
        return None
    
def getMostProbableActivity(inLoc):
    try:
        activities = getActivities(inLoc)
        highest_probability = None
        for activity in activities:
            if highest_probability is None or activity['confidence'] > highest_probability['confidence']:
                highest_probability = activity
        return highest_probability
    except:
        return None

def getActivity(inLoc):
    try:
        activity = getMostProbableActivity(inLoc)
        return activity['type']
    except:
        return ''
    
def getActivityConfidence(inLoc):
    try:
        activity = getMostProbableActivity(inLoc)
        return activity['confidence']
    except:
        return 0

def getActivityDate(inLoc):
    ''' A shapefile stores dates in a date field with this format: yyyy-mm-dd '''
    try:
        activitys = getActivitys(inLoc)
        activity_dt = dt.fromtimestamp(int(activitys['timestampMs']) / 1000)
        return activity_dt.strftime('%Y-%m-%d')
    except:
        try:
            return getDate(inLoc)
        except:
            return '0000-01-01'
    
def getActivityDateTime(inLoc):
    ''' A geodatabase formats the date as datetime yyyy-mm-dd hh:mm:ss AM or PM '''
    try:
        activitys = getActivitys(inLoc)
        activity_dt = dt.fromtimestamp(int(activitys['timestampMs']) / 1000)
        return activity_dt.strftime('%Y-%m-%d %H:%M:%S %p')
    except:
        return None


print ( 'Date: {0}'.format(getDate(model)) )
print ( 'DateTime: {0}'.format(getDateTime(model)) )
print ( 'Activity: {0}'.format(getActivity(model)) )
print ( 'Activity Confidence: {0}'.format(getActivityConfidence(model)) )
print ( 'Activity Date: {0}'.format(getActivityDate(model)) )
print ( 'Activity DateTime: {0}'.format(getActivityDateTime(model)) )
print ( 'Accuracy: {0}'.format(getAccuracy(model)) )
print ( 'Lat: {0}'.format(getLatitude(model)) )
print ( 'Lon: {0}'.format(getLongitude(model)) )
print ( 'Altitude: {0}'.format(getAltitude(model)) )
print ( 'Heading: {0}'.format(getHeading(model)) )
print ( 'Velocity: {0}'.format(getVelocity(model)) )

Date: 2017-02-01
DateTime: 2017-02-01 23:00:38 PM
Activity: inVehicle
Activity Confidence: 85
Activity Date: 2017-02-01
Activity DateTime: 2017-02-01 23:00:38 PM
Accuracy: 20
Lat: 3.8e-06
Lon: -7.7e-06
Altitude: 74
Heading: 81
Velocity: 16


In [119]:
# Build rows of event data for insert cursor

def getEventDataList(event):
    result = []
    result.append(getLatitude(event))
    result.append(getLongitude(event))
    result.append(getAltitude(event))
    result.append(getAccuracy(event))
    result.append(getHeading(event))
    result.append(getVelocity(event))
    result.append(getDate(event))
    # result.append(getDateTime(event))  # not needed for shapefile
    result.append(getActivity(event))
    result.append(getActivityConfidence(event))
    result.append(getActivityDate(event))
    # result.append(getActivityDateTime(event))  # not needed for shapefile
    return result


In [120]:
"""
Building a List of Event Lists. This will be helpful when creating our shapefile.
"""

headers = ['Lat', 'Lon', 'Altitude', 'Accuracy', 'Heading', 'Velocity', 'Date', 'Activity', 'Confidence', 'Act_Date']
# excluded = ['DateTime', 'Act_DT']

location_table = []

for loc in locations_list:
    location_table.append(getEventDataList(loc))


In [121]:
"""
Looking at the data in a Pandas DataFrame
"""

df = DataFrame(location_table)
df.columns = headers
df

Unnamed: 0,Lat,Lon,Altitude,Accuracy,Heading,Velocity,Date,Activity,Confidence,Act_Date
0,38.944379,-77.319340,0,1000,0,0,2017-04-24,,0,2017-04-24
1,38.944379,-77.319340,0,1000,0,0,2017-04-24,still,75,2017-04-24
2,38.944379,-77.319340,0,1000,0,0,2017-04-24,,0,2017-04-24
3,38.945535,-77.324007,0,1200,0,0,2017-04-24,still,75,2017-04-24
4,38.945535,-77.324007,0,1200,0,0,2017-04-24,still,75,2017-04-24
5,38.945535,-77.324007,0,1200,0,0,2017-04-24,,0,2017-04-24
6,38.945535,-77.324007,0,1200,0,0,2017-04-24,still,75,2017-04-24
7,38.944379,-77.319340,0,1000,0,0,2017-04-24,still,75,2017-04-24
8,38.944379,-77.319340,0,1000,0,0,2017-04-24,tilting,100,2017-04-24
9,38.945535,-77.324007,0,1200,0,0,2017-04-24,onFoot,80,2017-04-24


In [130]:
'''
Set Spatial Refernece
GCS: WGS 1984 (WKID: 4326)
'''
sr = arcpy.SpatialReference(3395)
print ( sr.name )

WGS_1984_World_Mercator


In [128]:
"""
Creating a new Point Feature Class with the Fields we would like to populate:
http://pro.arcgis.com/en/pro-app/tool-reference/data-management/create-feature-class.htm
"""

# Set workspace
env.workspace = os.getcwd()

# Set local variables
out_path = env.workspace
out_name = "GoogleLocationHistory.shp"
geometry_type = "POINT"
template = "#"
has_m = "DISABLED"
has_z = "DISABLED"
spatial_reference = sr

fc = arcpy.CreateFeatureclass_management(out_path, out_name, geometry_type, template, has_m, has_z, spatial_reference)

arcpy.AddField_management(fc, 'Altitude', 'DOUBLE')
arcpy.AddField_management(fc, 'Accuracy', 'DOUBLE')
arcpy.AddField_management(fc, 'Heading', 'DOUBLE')
arcpy.AddField_management(fc, 'Velocity', 'DOUBLE')
arcpy.AddField_management(fc, 'Date', 'DATE')
arcpy.AddField_management(fc, 'Activity', 'TEXT')
arcpy.AddField_management(fc, 'Confidence', 'DOUBLE')
arcpy.AddField_management(fc, 'Act_Date', 'DATE')

<Result 'C:\\Users\\admin\\Documents\\GitHub\\location-history-heatmap\\google_location_history\\GoogleLocationHistory.shp'>

In [129]:
'''
Insert Location Events into our newly created Point Shapefile
'''

fields = ['SHAPE@Y', 'SHAPE@X', 'Altitude', 'Accuracy', 'Heading', 'Velocity', 'Date', 'Activity', 'Confidence', 'Act_Date']

with arcpy.da.InsertCursor(fc, fields) as iCur:
    for loc_row in location_table:
        iCur.insertRow(loc_row)
del iCur