## Libraries

In [1]:
!pip install numpy
!pip install pandas
!pip install statsmodels

import pandas as pd # data manipulation library
import numpy as np # math library
import datetime as dt # to discover week day
import statsmodels as sm # statistical models
import statsmodels.api as sma # statistical models api
import time as tm
import matplotlib.pyplot as plt

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


## Open dataset

In [2]:
DATASET_PATH = f"data/all_data_sorted.csv"

col_names = [
  'Sensor',
  'Date',
  'Time',
  'Lane',
  'Speed',
  'Max Speed',
  'Size'
]

alldata = pd.read_csv(DATASET_PATH, ';', header=None, names=col_names)

In [3]:
alldata.head()

Unnamed: 0,Sensor,Date,Time,Lane,Speed,Max Speed,Size
0,RSI128,2016/05/01,00:00:09,1,26.0,60.0,0.0
1,RSI131,2016/05/01,00:00:09,2,20.0,60.0,1.1
2,RSI132,2016/05/01,00:00:09,1,45.0,60.0,0.0
3,RSI131,2016/05/01,00:00:10,1,40.0,60.0,0.5
4,RSI129,2016/05/01,00:00:12,1,35.0,60.0,0.0


## Get sensor and remove unnecessary columns

In [4]:
data = alldata[alldata['Sensor'] == 'RSI128']

In [5]:
data = data.drop(columns=['Sensor','Lane','Max Speed','Size'])

## Create week day column from date

In [6]:
# Get datetime
data['Date'] = pd.to_datetime(data['Date'], format='%Y/%m/%d')

# Adjust type
f = lambda x : tm.strptime(x, '%H:%M:%S')
data['Time'] = data['Time'].apply(f)

g = lambda x : dt.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
data['Time'] = data['Time'].apply(g)

h = lambda x : int(x)
data['Time'] = data['Time'].apply(h)

# Create week day from date
j = lambda x : x.weekday()
data['WeekDay'] = data['Date'].apply(j)

In [7]:
data.head()

Unnamed: 0,Date,Time,Speed,WeekDay
0,2016-05-01,9,26.0,6
7,2016-05-01,18,32.0,6
39,2016-05-01,104,16.0,6
108,2016-05-01,206,22.0,6
160,2016-05-01,305,25.0,6


## Save dataset

In [8]:
data.to_csv(f"dataset/dataset.csv", ";", index=False)

## Flows utils

In [9]:
def get_day_size (flow_interval):
  return (24 * 60 * 60) // flow_interval  

In [10]:
def get_week_size (flow_interval):
  return (7 * 24 * 60 * 60) // flow_interval  

In [11]:
def get_flow_data(n, accSpeed, weekDay):
  avgSpeed = (accSpeed / n) if n else 0
  density = (n / avgSpeed) if avgSpeed else 0
  w = [(1 if weekDay == i else 0) for i in range(7)] # weekday
  
  return (n, density, avgSpeed, w[0], w[1], w[2], w[3], w[4], w[5], w[6])

In [12]:
def get_flow (data, flow_interval):
  date = np.asarray(data['Date'])
  weekDay = np.asarray(data['WeekDay'])
  time = np.asarray(data['Time'])
  speed = np.asarray(data['Speed'])
  
  dateControl = date[0]
  timeBlock = flow_interval
  countFlow = 0
  accSpeed = 0
  flowData = []

  for i in range(len(date)):
    if time[i] >= timeBlock: # init a new time block
      flowData.append(get_flow_data(countFlow, accSpeed, weekDay[i])) 
      timeBlock += flow_interval
      accSpeed = 0
      countFlow = 0
      
    if date[i] > dateControl: # reset on day change
      dateControl = date[i]
      timeBlock = flow_interval 
      countFlow = 0
      accSpeed = 0
      
    if time[i] < timeBlock: # add car on flow
      countFlow += 1
      accSpeed += speed[i]

  day_size = get_day_size(flow_interval)
  k = (day_size - (len(flowData) % day_size)) % day_size

  for i in range(k):
    flowData.append(get_flow_data(0, 0, weekDay[len(date) - 1])) 
      
  cols = [
    'Flow',
    'Density',
    'AveSpeed',
    'Sunday',
    'Monday',
    'Tuesday',
    'Wednesday',
    'Thursday',
    'Friday',
    'Saturday',
  ]
  
  flowData = pd.DataFrame(flowData, columns=cols)
  
  return flowData

## Get Flows

In [13]:
flows_intervals = [150, 300, 450, 900]

for flow_interval in flows_intervals:
  flow_data = get_flow(data, flow_interval)
  week_size = get_week_size(flow_interval)

  flow_data.to_csv(f"dataset/dataset_flow_{flow_interval}.csv", ";", index=False)

## Plot flows

In [14]:
def plot_flow(flow_series, flow_interval):
  """ Plot of Flow
  
  Plot the flow from week to week
  
  Arguments:
    flow_series: an array of flows
    flow_interval: the interval in which the flow was made
  """

  week_size = get_week_size(flow_interval)
  n = len(flow_series) // week_size

  if len(flow_series) % week_size == 0:
    print('Yey')

  for i in range(n):
    s = week_size * i
    e = min(s + week_size, len(flow_series))
    path = f"plots/flow/flow_{flow_interval}_week_{str(i+1).zfill(2)}"

    plt.plot(flow_series[s:e])

    plt.title(f"Fluxo (Intervalo de {flow_interval} segundos) - Semana {i+1}")
    plt.ylabel('Fluxo')
    plt.xlabel('Tempo')
    plt.rcdefaults()
    
    plt.savefig(path + ".png", bbox_inches='tight')
    # plt.savefig(path + ".pdf")
    
    plt.close('all')

In [16]:
flows_intervals = [150, 300, 450, 900]

for flow_interval in flows_intervals:
  flow_data = get_flow(data, flow_interval)
  plot_flow(flow_data['Flow'], flow_interval)