# PDI Analysis

This script investigates the output of our live stream detections in order to gain useful insights.
The goal is to create a "Physical Distancing Index" (`PDI`) that shows the trends and changes at
a camera feed while removing noise and perturbations from unwanted effects.

In this directory is a CSV file containing historical data for 7 cams.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [None]:
data = pd.read_csv("counts.csv")

data.tail(10)

In [None]:
# TODO
data = data[(data.stream_name != "new_orleans") | (data.timestamp < 1585500000)]

In [None]:
data_dict = {
    stream_name: {
        "timestamp": data[data["stream_name"]==stream_name]["timestamp"],
        "count": data[data["stream_name"]==stream_name]["count"]
    } for stream_name in set(data["stream_name"])
}

print(list(data_dict.keys()))

In [None]:
plt.figure(figsize=(18, 7))
for stream_name, d in data_dict.items():
    timestamps, counts = d["timestamp"], d["count"]
    datetimes = pd.to_datetime(timestamps, unit='s')
    plt.scatter(datetimes, counts, label=stream_name, alpha=0.7)
plt.xlabel("data")
plt.ylabel("object count")
plt.xlim([pd.Timestamp('2020-02-01'), pd.Timestamp('2020-04-03')])
plt.ylim([0, 100])
plt.title("Detected Object Count VS Time for Live Streams Cameras")
plt.legend();

In [None]:
def plot_pdi(data_dict, pdi_func):
    plt.figure(figsize=(18, 7))
    for stream_name, d in data_dict.items():
        timestamps, counts = d["timestamp"], d["count"]
        datetimes = pd.to_datetime(timestamps, unit='s')
        datetimes, pdis = pdi_func(datetimes, counts)
        normalized_pdis = np.array(pdis) * 100 / np.max(pdis)
        plt.plot(datetimes, normalized_pdis, label=stream_name)
    plt.xlabel("data")
    plt.ylabel("number of detected objects")
    plt.xlim([pd.Timestamp('2020-02-01'), pd.Timestamp('2020-04-03')])
    plt.ylim([0, 100])
    plt.legend()

In [None]:
def create_pdi_func(window_days, avg_fcn):
    def pdi_func(datetimes, counts):
        pdis = np.zeros(len(counts))

        for i, time in enumerate(datetimes):
            # get all counts within the temporal window
            start_time = time - pd.Timedelta(days=window_days)
            window_counts = counts[(start_time <= datetimes) & (datetimes <= time)]

            # average over this window
            pdis[i] = avg_fcn(window_counts)

        return datetimes, pdis

    return pdi_func

In [None]:
# number of days to average counts over
WINDOW_DAYS = 1

# a function that averages the counts within a window
AVG_FCN = np.mean

pdi_func_v1 = create_pdi_func(WINDOW_DAYS, AVG_FCN)

plot_pdi(data_dict, pdi_func_v1)

In [None]:
# number of days to average counts over
WINDOW_DAYS = 3

# p-norm value for normalizing (LP=2 -> euclidean norm)
LP = 2

# a function that averages the counts within a window
AVG_FCN = lambda x: np.linalg.norm(x, ord=LP) / (len(x) ** (1 / LP)) 

pdi_func_v2 = create_pdi_func(WINDOW_DAYS, AVG_FCN)

plot_pdi(data_dict, pdi_func_v2)

In [None]:
SMOOTHING_WIDTH = 40

def pdi_func_v3(datetimes, counts):
    datetimes, pdis = pdi_func_v2(datetimes, counts)
    
    if SMOOTHING_WIDTH:
        kernel_size = min(SMOOTHING_WIDTH, len(pdis))
        kernel = np.ones(kernel_size) / kernel_size
        pdis = list(np.convolve(pdis, kernel, mode="same"))

    return datetimes, pdis

plot_pdi(data_dict, pdi_func_v3)