## Traffic Data: Every 5 min

In [None]:
data_path = "/home/qiyu/data/"
data_set = "traffic"
file_name = data_path+data_set+".npy"
import numpy as np
dat = np.load(file_name)
print(dat.shape)
print(type(dat))

In [None]:
%matplotlib inline
import matplotlib as mpl
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

params = {
   'axes.labelsize': 12,
   'font.size': 20,
   'legend.fontsize': 15,
   'xtick.labelsize': 15,
   'ytick.labelsize': 15,
   'text.usetex': False,
   'figure.figsize':[10,8]
   }
mpl.rcParams.update(params)

x = np.arange(0,dat.shape[0])

for i in range(200,210):
    plt.figure()
    plt.plot(x, dat[i,:])
    plt.xlabel('Hour')
    plt.ylabel('mph')

### Fill missing (zero) value 

Use the mean of same time, other days to fill in

In [None]:
import numpy as np
def fill_zero_daily(x):
    nz_idx = np.nonzero(x)
  
    zero_idx = np.argwhere(x==0)
    if len(nz_idx[0])==0:
         print('all zero')
    else:
        mean_val = np.mean(x[nz_idx])
        x[zero_idx] = mean_val
    return x

def fill_zero(dat):
    n_dim = dat.shape[1]
    dat_reshape = dat.reshape((-1,12*24,n_dim)) # daily traffic

    mat_nz = np.copy(dat_reshape)
    for i in range(mat.shape[1]):
        for j in range(mat.shape[2]):
            mat_nz[:,i,j] = fill_zero_daily(mat_nz[:,i,j])
    return mat_nz

### Subsample the data  20 min for every day

In [None]:
dat_nz = fill_zero(dat)
n_days = dat_nz.shape[0]
dat_aug = []

def up_sample(x):
    """upsample each day into 72 examples"""
    x_aug = []
    for j in range(24*3):
        x_aug.append(np.roll(x,-j*4, axis=0))
    return np.array(x_aug)
    
for i in range(n_days):
    dat_up = up_sample(dat_nz[i,:,:])
    dat_aug.append(dat_up)
dat_aug = np.concatenate(dat_aug, axis=0)
print(dat_aug.shape)
dat_aug_sub = dat_aug[:,:,:15]
print(dat_aug_sub.shape)
np.save('/home/qiyu/data/traffic_s2s.npy',dat_aug_sub)

### rescale and save

In [None]:
dat = rescale_ts(dat)

import numpy as np
np.save(data_path+data_set, dat)

In [None]:
import pandas as pd
df = pd.DataFrame(dat)
df.describe()

### visualize traffic series

In [None]:
import matplotlib.ticker as ticker

params = {
   'axes.labelsize': 20,
   'text.fontsize': 20,
   'legend.fontsize': 12,
   'xtick.labelsize': 12,
   'ytick.labelsize': 12,
   'text.usetex': False,
   'figure.figsize': [4.5, 4.5]
   }
fig = plt.figure(figsize=(10,8))
ax = fig.gca()

data_mat = dat
T = 600
ax.plot(data_mat[:T,1], 'b')
# ax.plot(data_mat[:T,1], 'c')
ax.plot(data_mat[:T,2], 'c')
ax.set_xlabel("Time (minutes)")
ax.set_ylabel("Speed (m/h)")


x = len(data_mat)
tick_spacing = 144
ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))
plt.xticks([0,144, 288, 432, 576 ], ['00:00','06:00','12:00','18:00', '24:00'])

fig.savefig('traffic.png')

## Climate (TMAX) data: daily

In [None]:
data_path = "/home/qiyu/data/"
data_set = "climate"
file_name = data_path+data_set+".npy"
import numpy as np
dat = np.load(file_name)
print(dat.shape)
print(type(dat))

In [None]:
    
f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True)
ax1.plot(x[:365], dat[10000:10365,1],'b')
ax2.plot(x[:365], dat[10000:10365,5],'g')
ax3.plot(x[:365], dat[10000:10365,10],'m')
ax3.set_xlabel('Day',fontsize = 18.0)
ax2.set_ylabel('$^\circ$ F', fontsize = 18.0)

In [None]:
# remove the anomalous high values
dat[dat>200]=-1

In [None]:
import numpy as np
def fill_missing_daily(x):
    nz_idx = np.nonzero(x+1)
  
    missing_idx = np.argwhere(x==-1)
    if len(nz_idx[0])==0:
         print('all missing')
    else:
        mean_val = np.mean(x[nz_idx])
        x[missing_idx] = mean_val
    return x

def fill_missing(dat):
    n_dim = dat.shape[1]
    dat_reshape = dat.reshape((-1,24,n_dim)) # daily traffic

    mat_nz = np.copy(dat_reshape)
    for i in range(mat_nz.shape[1]):
        for j in range(mat_nz.shape[2]):
            mat_nz[:,i,j] = fill_missing_daily(mat_nz[:,i,j])
    return mat_nz
dat_nz = fill_missing(dat)
print(dat_nz.shape)

In [None]:
%matplotlib inline
import matplotlib as mpl
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

params = {
   'axes.labelsize': 12,
   'text.fontsize': 12,
   'legend.fontsize': 12,
   'xtick.labelsize': 12,
   'ytick.labelsize': 12,
   'text.usetex': False
   }
mpl.rcParams.update(params)

dat_reshape = dat.reshape((-1,24,54)) # daily traffic

for i in range(0,10):
    plt.figure()
    plt.plot(mat_nz[i,:,0], 'r-')

In [None]:
n_days = dat_nz.shape[0]
dat_aug = []

def up_sample(x):
    """upsample each day into 6 examples (every 4 hour)"""
    x_aug = []
    for j in range(6):
        x_aug.append(np.roll(x,-j*4, axis=0))
    return np.array(x_aug)
    
for i in range(n_days):
    dat_up = up_sample(dat_nz[i,:,:])
    dat_aug.append(dat_up)
dat_aug = np.concatenate(dat_aug, axis=0)
print(dat_aug.shape)
# dat_aug_sub = dat_aug[:,:,:15]
# print(dat_aug_sub.shape)
np.save('/home/qiyu/data/climate_s2s.npy',dat_aug)