# PDI Analysis

This script investigates the output of our live stream detections in order to gain useful insights.
The goal is to create a "Physical Distancing Index" (`PDI`) that shows the trends and changes at
a camera feed while removing noise and perturbations from unwanted effects.

In this directory is a CSV file containing historical data for 7 cams.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [None]:
data = pd.read_csv("counts.csv")

data.tail(10)

In [None]:
data_dict = {
    stream_name: {
        "timestamp": data[data["stream_name"]==stream_name]["timestamp"],
        "count": data[data["stream_name"]==stream_name]["count"]
    } for stream_name in set(data["stream_name"])
}

print(list(data_dict.keys()))

In [None]:
plt.figure(figsize=(18, 7))
for stream_name, d in data_dict.items():
    timestamps, counts = d["timestamp"], d["count"]
    datetimes = pd.to_datetime(timestamps, unit='s')
    plt.scatter(datetimes, counts, label=stream_name)
plt.xlabel("data")
plt.ylabel("object count")
plt.xlim([pd.Timestamp('2020-02-01'), pd.Timestamp('2020-04-03')])
plt.ylim([0, 100])
plt.title("Detected Object Count VS Time for Live Streams Cameras")
plt.legend();

In [None]:
def plot_pdi(data_dict, pdi_func):
    plt.figure(figsize=(18, 7))
    for stream_name, d in data_dict.items():
        timestamps, counts = d["timestamp"], d["count"]
        datetimes = pd.to_datetime(timestamps, unit='s')
        pdis = pdi_func(datetimes, counts)
        plt.plot(datetimes, pdis, label=stream_name)
    plt.xlabel("data")
    plt.ylabel("number of detected objects")
    plt.xlim([pd.Timestamp('2020-02-01'), pd.Timestamp('2020-04-03')])
    plt.ylim([0, 100])
    plt.legend()

In [None]:
def average_over_time(window_days, avg_fcn):
    '''
    Args:
        window_days: number of days to average counts over
        avg_fcn: a function that averages the counts within a window
    
    Returns:
        a function that computes the average counts over time
    '''
    def func(datetimes, counts):
        averaged_counts = np.zeros(len(counts))

        for i, time in enumerate(datetimes):
            # get all counts within the temporal window
            start_time = time - pd.Timedelta(days=window_days)
            window_counts = counts[(start_time <= datetimes) & (datetimes <= time)]
            
            # average over this window
            averaged_counts[i] = avg_fcn(window_counts)

        return averaged_counts

    return func

In [None]:
window_days = 1
avg_fcn = np.mean

plot_pdi(data_dict, average_over_time(window_days, avg_fcn))

In [None]:
window_days = 3
LP = 1
avg_fcn = lambda x: np.linalg.norm(x, ord=LP) / (len(x) ** (1 / LP))

plot_pdi(data_dict, average_over_time(window_days, avg_fcn))