In [4]:
from datetime import datetime
import itertools
import json
import math
import os
import sys

import importlib
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import scipy
from scipy.spatial import KDTree
import seaborn as sns
from sklearn import metrics
import torch
from torch.utils.data import DataLoader

from utils import data_utils, data_loader, shape_utils

importlib.reload(data_utils)
importlib.reload(data_loader)
importlib.reload(shape_utils)

from dotenv import load_dotenv
load_dotenv()

%matplotlib inline

RUN_FOLDER = "../results/debug/"

In [2]:
kcm_inputs = data_utils.load_all_inputs(RUN_FOLDER, "kcm/")
atb_inputs = data_utils.load_all_inputs(RUN_FOLDER, "atb/")

In [6]:
# Sample shingle ids and choose dist/time ranges to test
shingle_ids = pd.unique(kcm_inputs['test_traces']['shingle_id'])
shingle_id_sample = np.random.choice(shingle_ids, 5)
d_buffers = np.linspace(1,10000,20)
t_buffers = np.linspace(1,60*60*6,20)

# Collect metrics for each combination of d/t
target = np.empty((len(shingle_id_sample)))
adj = np.empty((len(shingle_id_sample), len(d_buffers), len(t_buffers)))
# Filter on shingle id
for i, id in enumerate(shingle_id_sample):
    print(i)
    shingle_data = kcm_inputs['test_traces'][kcm_inputs['test_traces']['shingle_id']==id]
    adjacent_data = kcm_inputs['test_traces'][kcm_inputs['test_traces']['shingle_id']!=id]
    # Filter adjacent points by distance
    for d, dist in enumerate(d_buffers):
        pt_indices = shape_utils.get_points_within_dist(adjacent_data[['x','y']].values, shingle_data[['x','y']].values, dist)
        candidates = adjacent_data.iloc[pt_indices].sort_values(['shingle_id', 'locationtime'])
        # Filter adjacent points by time
        for t, time in enumerate(t_buffers):
            t_min = np.min(shingle_data.locationtime) - time
            t_max = np.min(shingle_data.locationtime)
            candidates = candidates[candidates['locationtime'].between(t_min, t_max)]
            # Keep record of feature for each combination of dist and time buffers
            if candidates is None:
                adj[i,d,t] = np.nan
            else:
                adj[i,d,t] = np.mean(candidates.speed_m_s)
    # Shingle data is same regardless of t_buffer and d_buffer
    target[i] = np.mean(shingle_data.speed_m_s)
# Fill cases with no adjacent points with the average
adj[np.isnan(adj)] = np.mean(adj[~np.isnan(adj)])

0
1
2
3
4


In [7]:
# Rows, Columns
r_values = np.empty((adj.shape[1], adj.shape[2]))
for d in range(adj.shape[1]):
    for t in range(adj.shape[2]):
        R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
        r_values[d,t] = R2
r_values

  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_plt, y_plt = shape_utils.fit_poly(adj[:,d,t], target)
  R2, x_

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.07289969, 0.07289969, 0.07289969, 0.07289969, 0.07289969,
        0.07289969, 0.07289969, 0.07289969, 0.07289969, 0.07289969,
        0.07289969, 0.07289969, 0.07289969, 0.07289969, 0.07289969,
        0.07289969, 0.07289969, 0.07289969, 0.07289969, 0.07289969],
       [0.34438348, 0.34438348, 0.34438348, 0.34438348, 0.34438348,
        0.34438348, 0.34438348, 0.34438348, 0.34438348, 0.34438348,
        0.34438348, 0.34438348, 0.34438348, 0.34438348, 0.34438348,
        0.34438348, 0.34438348, 0.34438348, 0.34438348, 0.34438348],
       [0.3346522 , 0.3346522 , 0.3346522 , 0.3346522 , 0.3346522 ,
        0.3346522 , 0.3346522 , 0.3346522 , 0.3346522 , 0.3346522 ,
        0.3346522 , 0.3346522 , 0.3346522 , 0

In [8]:
fig = px.imshow(r_values, labels={"x":"Time","y":"Dist"})
fig.show()