In [1]:
import pandas as pd
import numpy as np
from pykalman import KalmanFilter
from statsmodels.nonparametric.smoothers_lowess import lowess
from math import cos, asin, sqrt, pi

In [2]:
# read our data
data = pd.read_csv("./original_dataset/GPS_location.csv", parse_dates=['time'])
#ankle_data = pd.read_csv("./ankle.csv", parse_dates=['time'])

In [3]:
# code reference from exercise 3
# https://coursys.sfu.ca/2022fa-cmpt-353-d1/pages/Exercise3
# output the gpx file to show our path
def output_gpx(points, output_filename):
    """
    Output a GPX file with latitude and longitude from the points DataFrame.
    """
    from xml.dom.minidom import getDOMImplementation
    def append_trkpt(pt, trkseg, doc):
        trkpt = doc.createElement('trkpt')
        trkpt.setAttribute('lat', '%.7f' % (pt['lat']))
        trkpt.setAttribute('lon', '%.7f' % (pt['lon']))
        trkseg.appendChild(trkpt)
    
    doc = getDOMImplementation().createDocument(None, 'gpx', None)
    trk = doc.createElement('trk')
    doc.documentElement.appendChild(trk)
    trkseg = doc.createElement('trkseg')
    trk.appendChild(trkseg)
    
    points.apply(append_trkpt, axis=1, trkseg=trkseg, doc=doc)
    
    with open(output_filename, 'w') as fh:
        doc.writexml(fh, indent=' ')

In [4]:
data.columns =['timestamp','lat','lon']
data = data.drop('timestamp', axis=1)
data["lat"].astype(float)
data["lon"].astype(float)
output_gpx(data, './clean_dataset/raw_gps.gpx')
data.head()

Unnamed: 0,lat,lon
0,49.27837,-122.910056
1,49.278377,-122.910004
2,49.278379,-122.909959
3,49.278377,-122.909924
4,49.278375,-122.909902


# KF 1

In [5]:
initial_state = data.iloc[0]

transition_matrix = [[1, 0], [0, 1]]

observation_matrix = [[1, 0], [0, 1]]

kf1 = KalmanFilter(initial_state_mean = initial_state,
                  transition_matrices = transition_matrix,
                  observation_matrices = observation_matrix)

kf1 = kf1.em(data, n_iter=5)  # use EM algorithm to tune parameters from the observation data.

kalman_smoothed1, _ = kf1.smooth(data)

# ref: https://stackoverflow.com/questions/43377626/how-to-use-kalman-filter-in-python-for-location-data

In [6]:
kalman_smoothed1 = pd.DataFrame(kalman_smoothed1)
kalman_smoothed1.columns = ['lat', 'lon']
output_gpx(kalman_smoothed1, './clean_dataset/smoothed_gps1.gpx')

# KF 2

In [7]:
kf2 = KalmanFilter(transition_matrices = transition_matrix,
                  observation_matrices = observation_matrix,
                  initial_state_mean = initial_state,
                  observation_covariance = 10*kf1.observation_covariance,
                  em_vars=['transition_covariance', 'initial_state_covariance'])
kf2 = kf2.em(data, n_iter=5)
kalman_smoothed2, _ = kf2.smooth(data)

In [8]:
kalman_smoothed2 = pd.DataFrame(kalman_smoothed2)
kalman_smoothed2.columns = ['lat', 'lon']
output_gpx(kalman_smoothed2, './clean_dataset/smoothed_gps2.gpx')

# Distance

In [9]:
# reference: exercise 3

def haversine(lat1, lon1, lat2, lon2):
    p = pi/180
    a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p) * cos(lat2*p) * (1-cos((lon2-lon1)*p))/2
    return 12742 * asin(sqrt(a))
# ref: https://stackoverflow.com/questions/27928/calculate-distance-between-two-latitude-longitude-points-haversine-formula/21623206

def distance(points):
    df = points.copy()
    df['lat2'] = df['lat'].shift()
    df['lon2'] = df['lon'].shift()
    #df['haversine'] = haversine(df['lat'], df['lon'], df['lat2'], df['lon2'])
    df['haversine'] = df.apply(lambda x: haversine( x['lat'], x['lon'], x['lat2'], x['lon2']), axis=1)
    #print(df)
    return df['haversine'].sum()*1000

In [10]:
raw_dist = distance(data)
raw_dist

380.0319688041719

In [11]:
smoothed1_dist = distance(kalman_smoothed1)
smoothed1_dist

336.63219371607966

In [12]:
smoothed2_dist = distance(kalman_smoothed2)
smoothed2_dist

318.9676655249059