In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [86]:
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
#init_notebook_mode(connected=True)

import cufflinks as cf
cf.go_offline(connected=True)
cf.set_config_file(colorscale='plotly', world_readable=True)

# Extra options
# pd.options.display.max_rows = 30
# pd.options.display.max_columns = 25

# Show all code cells outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import os
from IPython.display import Image, display, HTML

In [3]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

Write the code for your interactive webpage in this notebook.

In [4]:
# store login data in login.py
%run login.py

In [5]:
# login query as multiline formatted string
# this assumes that login and pwd are defined 
# above

loginquery = f"""
mutation {{
  logIn(
      email:\"{login}\",
      password:\"{pwd}\") {{
    jwt {{
      token
      exp
    }}
  }}
}}
"""

In [6]:
import requests
url = 'https://api.numina.co/graphql'

mylogin = requests.post(url, json={'query': loginquery})
mylogin

<Response [200]>

In [7]:
token = mylogin.json()['data']['logIn']['jwt']['token']

In [8]:
expdate = mylogin.json()
expdate

{'data': {'logIn': {'jwt': {'exp': '2020-03-18T20:23:24.958824',
    'token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE1ODQ1NjMwMDQsImlhdCI6MTU4NDQ3NjYwNCwic3ViIjoyNzR9.EyPJWlqYx79vb1xVCh79lv7ydLQ07Fqq_m9Sr5EiE78'}}}}

In [9]:
device_ids = ['SWLSANDBOX1', 'SWLSANDBOX2', 'SWLSANDBOX3']

In [10]:
def get_zones(device_id):
    
    query_zones = """
    query {{
      behaviorZones (
        serialnos: "{0}"
        ) {{
        count
        edges {{
          node {{
            rawId
            text
          }}
        }}
      }}
    }}
    """.format(device_id)
    
    zones = requests.post(url, json={'query': query_zones}, headers = {'Authorization':token})
    
    df = pd.DataFrame([x['node'] for x in zones.json()['data']['behaviorZones']['edges']])
    df['device'] = device_id
    
    return df

In [11]:
zones_df = pd.concat([get_zones(device_ids[i]) for i in range(3)])

In [12]:
zones_df

Unnamed: 0,rawId,text,device
0,1899,Table,SWLSANDBOX1
1,1947,entrance,SWLSANDBOX1
2,1949,entrance1,SWLSANDBOX1
3,1956,modelzone,SWLSANDBOX1
4,1966,entrence alt,SWLSANDBOX1
5,1969,funny triangle,SWLSANDBOX1
6,1972,S Zone,SWLSANDBOX1
7,1973,stairs,SWLSANDBOX1
8,1975,leftZone,SWLSANDBOX1
9,1976,UpperLeft,SWLSANDBOX1


In [13]:
def get_dwell(func, ID, interval):
    '''
    func is either feedDwellTimeDistribution or zoneDwellTimeDistribution
    '''
    if func == 'feedDwellTimeDistribution':
        arg = 'serialnos: "{0}"'.format(ID)
    else:
        arg = 'zoneIds: {0}'.format(ID)
        
    query = """
    query {{
        {0}(
        {1},
        startTime: "2019-02-20T00:00:00",
        endTime: "2020-01-12T00:00:00",
        timezone: "America/New_York",
        objClasses: ["pedestrian"],
        interval: "{2}"
        ){{
        edges {{
          node {{
            time
            objClass
            pct100
            pct75
            pct50
            pct25
            mean
            count
          }}
        }}
      }}
    }}
    """.format(func, arg, interval)

    dwell = requests.post(url, json={'query': query}, 
                           headers = {'Authorization':token})
    
    df = pd.DataFrame([x['node'] for x in dwell.json()['data'][func]['edges']])
    if func == 'feedDwellTimeDistribution':
        df['device'] = ID
    else:
        df['zone'] = ID
    
    return df

In [14]:
feed_dwell_1d_df = pd.concat([get_dwell('feedDwellTimeDistribution', device_ids[i], '1d') 
                              for i in range(3)])

In [17]:
feed_dwell_1d_df[feed_dwell_1d_df['count']!=0].head()

Unnamed: 0,count,mean,objClass,pct100,pct25,pct50,pct75,time,device
0,3272,15.26,pedestrian,14.03,2.6,2.6,5.7,2019-02-20T00:00:00-05:00,SWLSANDBOX1
1,158,6.4,pedestrian,7.32,2.56,2.56,4.18,2019-02-21T00:00:00-05:00,SWLSANDBOX1
2,83,19.57,pedestrian,15.1,3.62,3.62,7.87,2019-02-22T00:00:00-05:00,SWLSANDBOX1
3,8,4.69,pedestrian,6.8,2.09,2.09,3.66,2019-02-23T00:00:00-05:00,SWLSANDBOX1
4,6,3.99,pedestrian,6.26,1.06,1.06,4.69,2019-02-24T00:00:00-05:00,SWLSANDBOX1


In [134]:
zone_dwell_1d_df = pd.concat([get_dwell('zoneDwellTimeDistribution', z)
                           for z in zones_df['rawId'].values])

In [135]:
zone_dwell_1d_df[zone_dwell_df['count']!=0]

Unnamed: 0,count,mean,objClass,pct100,pct25,pct50,pct75,time,zone
0,924,11.70,pedestrian,252.34,2.07,4.64,11.43,2019-02-20T00:00:00-05:00,1899
1,39,4.39,pedestrian,9.99,2.46,4.18,6.32,2019-02-21T00:00:00-05:00,1899
2,23,13.14,pedestrian,123.07,1.52,3.15,6.33,2019-02-22T00:00:00-05:00,1899
4,1,4.72,pedestrian,4.72,4.72,4.72,4.72,2019-02-24T00:00:00-05:00,1899
5,159,6.88,pedestrian,123.50,1.62,3.57,7.18,2019-02-25T00:00:00-05:00,1899
...,...,...,...,...,...,...,...,...,...
320,11,1.03,pedestrian,2.22,0.57,0.81,1.20,2020-01-06T00:00:00-05:00,1974
321,17,5.08,pedestrian,47.03,0.76,0.94,2.12,2020-01-07T00:00:00-05:00,1974
322,13,2.01,pedestrian,13.08,0.41,0.87,1.35,2020-01-08T00:00:00-05:00,1974
323,9,1.62,pedestrian,3.02,0.73,1.70,2.26,2020-01-09T00:00:00-05:00,1974


In [18]:
'''
def extract_time(df):
    df['year'] = df['time'].str[:4].astype(int)
    df['month'] = df['time'].str[5:7].astype(int)
    df['day'] = df['time'].str[8:10].astype(int)
    df['date'] = pd.to_datetime(df['time'].str[:10])
    df['hour'] = df['time'].str[11:13].astype(int)
    return df.drop('time', axis=1)
''';

In [40]:
'''
feed_dwell_df = extract_time(feed_dwell_df)
zone_dwell_df = extract_time(zone_dwell_df)
''';

In [136]:
# replace NaN with 0
feed_dwell_1d_df = feed_dwell_1d_df.fillna(0)
# zone_dwell_df = zone_dwell_df.fillna(0)

In [19]:
feed_dwell_1d_df['time'] = feed_dwell_1d_df['time'].str[:-6].apply(lambda x : pd.Timestamp(x))

In [138]:
# zone_dwell_1d_df['time'] = zone_dwell_1d_df['time'].str[:-6].apply(lambda x : pd.Timestamp(x))
# zone_dwell_1d_df.zone = zone_dwell_1d_df.zone.astype(str)

In [50]:
feed_dwell_1d_df

Unnamed: 0,count,mean,objClass,pct100,pct25,pct50,pct75,time,device
0,3272,15.26,pedestrian,14.03,2.60,2.60,5.70,2019-02-20,SWLSANDBOX1
1,158,6.40,pedestrian,7.32,2.56,2.56,4.18,2019-02-21,SWLSANDBOX1
2,83,19.57,pedestrian,15.10,3.62,3.62,7.87,2019-02-22,SWLSANDBOX1
3,8,4.69,pedestrian,6.80,2.09,2.09,3.66,2019-02-23,SWLSANDBOX1
4,6,3.99,pedestrian,6.26,1.06,1.06,4.69,2019-02-24,SWLSANDBOX1
...,...,...,...,...,...,...,...,...,...
321,820,10.53,pedestrian,10.36,2.12,2.12,4.68,2020-01-07,SWLSANDBOX3
322,80,5.96,pedestrian,5.23,2.08,2.08,3.64,2020-01-08,SWLSANDBOX3
323,109,5.17,pedestrian,5.30,2.59,2.59,4.14,2020-01-09,SWLSANDBOX3
324,96,8.98,pedestrian,7.85,3.10,3.10,4.71,2020-01-10,SWLSANDBOX3


In [87]:
def get_df(groupby):
    if groupby == 'device':
        return feed_dwell_1d_df.copy()
    else:
        return zone_dwell_1d_df.copy()

In [84]:
def plot_dwell_timeline(groupby, selected, metric, start_date, end_date):
    '''
    device_or_zone is either 'device' or 'zone';
    selected is a list of device rawIds or zone rawIds;
    metric is a value in ['mean', 'pct100', 'pct75', 'pct50', 'pct25']
    '''
    df = get_df(groupby)
        
    plot_df = df.loc[(df.time.dt.date >= start_date) & 
                     (df.time.dt.date <= end_date)].copy()
    
    fig = go.Figure()
    
    for s in selected:
        sub_df = plot_df[plot_df[groupby] == s]
        fig.add_trace(go.Scatter(x=sub_df.time, y=sub_df[metric], mode='lines', name=s))
    
    fig.update_layout(
        title=f"Dwell Time ({metric}) grouped by '{groupby}'",
        xaxis_title="time",
        yaxis_title=metric)
    
    fig.show()
    

In [85]:
_ = interact(plot_dwell_timeline, 
             groupby='device',
             selected=widgets.SelectMultiple(options=device_ids, value=device_ids, disabled=False),
             metric=widgets.Dropdown(options=['mean', 'pct100', 'pct75', 'pct50', 'pct25'], value='mean'), 
             start_date=widgets.DatePicker(value=pd.to_datetime('2019-02-20')),
             end_date=widgets.DatePicker(value=pd.to_datetime('2020-01-12')),
            )

interactive(children=(Text(value='device', description='groupby'), SelectMultiple(description='selected', inde…

- BOX1 = StreetScape;
- BOX2 = Under Raincoat;
- BOX3 = Outside

The days and devices with the highest mean dwell time are the following:
1. BOX1: 2019-08-17
2. BOX1: 2019-05-04
3. BOX3: 2019-11-14

In [67]:
feed_dwell_1d_df[feed_dwell_1d_df['mean'] >= 140].sort_values('mean', ascending=False)

Unnamed: 0,count,mean,objClass,pct100,pct25,pct50,pct75,time,device
178,75,343.66,pedestrian,36.02,3.65,3.65,10.36,2019-08-17,SWLSANDBOX1
73,128,340.78,pedestrian,46.36,6.8,6.8,15.13,2019-05-04,SWLSANDBOX1
267,605,145.88,pedestrian,13.49,3.67,3.67,6.25,2019-11-14,SWLSANDBOX3


In [97]:
feed_dwell_1d_df.groupby('device')['count'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
device,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SWLSANDBOX1,326.0,1161.288344,1552.170562,0.0,111.5,615.0,1721.75,14922.0
SWLSANDBOX2,326.0,129.0,201.519001,0.0,0.0,76.0,180.5,2047.0
SWLSANDBOX3,326.0,311.496933,470.222891,0.0,56.25,196.0,414.5,5625.0


In [101]:
def boxplot_dwell(groupby, column, bound_factor):
    df = get_df(groupby)
    
    q3 = df[column].quantile(0.75) 
    q1 = df[column].quantile(0.25)
    iqr = q3 - q1
    sub_df = df[(df[column] <= q3 + iqr*bound_factor) & 
                  ((df[column] >= q1 - iqr*bound_factor))]
    
    if column == 'count':
        title = f"distribution of count grouped by '{groupby}'" +\
        " with values {bound_factor} * IQR beyond Q1/Q3 removed"
    else:
        title = f"distribution of mean dwell time grouped by '{groupby}'" +\
        f" with values {bound_factor} * IQR beyond Q1/Q3 removed"
    
    fig = px.box(sub_df, x=groupby, y=column, points="all", title=title)

    fig.show()

In [102]:
_ = interact(boxplot_dwell, 
             groupby='device',
             column=widgets.RadioButtons(options=['count', 'mean'], value='count'),
             bound_factor=widgets.FloatSlider(
                 value=1.5,
                 min=-3,
                 max=10,
                 step=0.1,
                 disabled=False,
                 continuous_update=False,
                 orientation='horizontal',
                 readout=True,
                 readout_format='.1f')
            )

interactive(children=(Text(value='device', description='groupby'), RadioButtons(description='column', options=…