## Libraries

In [1]:
!pip install numpy
!pip install pandas
!pip install statsmodels

import pandas as pd # data manipulation library
import numpy as np # math library
import datetime as dt # to discover week day
import statsmodels as sm # statistical models
import statsmodels.api as sma # statistical models api
import time as tm
import matplotlib.pyplot as plt

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


## Open dataset

In [2]:
DATASET_PATH = f"../../data/all_data_sorted.csv"

col_names = [
  'Sensor',
  'Date',
  'Time',
  'Lane',
  'Speed',
  'Max Speed',
  'Size'
]

alldata = pd.read_csv(DATASET_PATH, ';', header=None, names=col_names)

In [3]:
alldata.head()
#alldata['Lane'].value_counts()

Unnamed: 0,Sensor,Date,Time,Lane,Speed,Max Speed,Size
0,RSI128,2016/05/01,00:00:09,1,26.0,60.0,0.0
1,RSI131,2016/05/01,00:00:09,2,20.0,60.0,1.1
2,RSI132,2016/05/01,00:00:09,1,45.0,60.0,0.0
3,RSI131,2016/05/01,00:00:10,1,40.0,60.0,0.5
4,RSI129,2016/05/01,00:00:12,1,35.0,60.0,0.0


## Get sensor and remove unnecessary columns

In [4]:
data = alldata[alldata['Sensor'] == 'RSI128']
#data['Lane'].value_counts()

In [5]:
data = data.drop(columns=['Sensor','Lane','Max Speed','Size'])

## Create week day column from date

In [6]:
# Get datetime
data['Date'] = pd.to_datetime(data['Date'], format='%Y/%m/%d')

# Adjust type
f = lambda x : tm.strptime(x, '%H:%M:%S')
data['Time'] = data['Time'].apply(f)

g = lambda x : dt.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
data['Time'] = data['Time'].apply(g)

h = lambda x : int(x)
data['Time'] = data['Time'].apply(h)

# Create week day from date
j = lambda x : x.weekday()
data['WeekDay'] = data['Date'].apply(j)

In [7]:
data.head()

Unnamed: 0,Date,Time,Speed,WeekDay
0,2016-05-01,9,26.0,6
7,2016-05-01,18,32.0,6
39,2016-05-01,104,16.0,6
108,2016-05-01,206,22.0,6
160,2016-05-01,305,25.0,6


## Save dataset

In [8]:
data.to_csv(f"dataset/dataset.csv", ";", index=False)

In [9]:
def get_day_size():
  return (24 * 60 * 60)

In [10]:
def get_week_size():
  return (7 * 24 * 60 * 60)

In [11]:
def get_time_data (data):
  weekDay = np.asarray(data['WeekDay'])
  time = np.asarray(data['Time'])
  speed = np.asarray(data['Speed'])
  
  timeData = []

  for i in range(len(time)):
    w = [(1 if weekDay[i] == j else 0) for j in range(7)]

    timeData.append((time[i], speed[i], w[0], w[1], w[2], w[3], w[4], w[5], w[6]))
      
  cols = [
    'Time',
    'Speed',
    'Sunday',
    'Monday',
    'Tuesday',
    'Wednesday',
    'Thursday',
    'Friday',
    'Saturday',
  ]
  
  timeData = pd.DataFrame(timeData, columns=cols)
  
  return timeData

In [12]:
time_data = get_time_data(data)
time_data.to_csv(f"dataset/dataset_time.csv", ";", index=False)

## Plot flows

In [13]:
def plot_time(time_data):
  weekChange = "Friday"
  n = len(time_data)

  w = 1
  i = 0
  while i < n:
    s = i
  
    while (i != n) and (time_data.loc[i][weekChange] == 0):
      i += 1

    while (i != n) and (time_data.loc[i][weekChange] == 1):
      i += 1

    e = i - 1

    print(f"Semana {w} - {s} : {e}")
    path = f"plots/time/week_{str(w).zfill(2)}"

    plt.plot(time_data["Time"][s:e])

    plt.title(f"Tempo - Semana {w}")
    plt.ylabel('T')
    plt.xlabel('N')
    plt.rcdefaults()
    
    plt.savefig(path + ".png", bbox_inches='tight')
    
    plt.close('all')

    w += 1

In [14]:
len(time_data)

536879

In [15]:
plot_time(time_data)

Semana 1 - 0 : 39966
Semana 2 - 39967 : 80953
Semana 3 - 80954 : 119891
Semana 4 - 119892 : 156071
Semana 5 - 156072 : 196511
Semana 6 - 196512 : 240228
Semana 7 - 240229 : 283294
Semana 8 - 283295 : 324453
Semana 9 - 324454 : 365545
Semana 10 - 365546 : 408700
Semana 11 - 408701 : 450801
Semana 12 - 450802 : 492049
Semana 13 - 492050 : 532297
Semana 14 - 532298 : 536878
