In [10]:
import torch as t
import numpy as np

In [12]:
# Read data
# converters: convert data(string) to day(float)
bikes_numpy = np.loadtxt('./data/hour-fixed.csv',
                         dtype=np.float32,
                         delimiter=',', 
                         skiprows=1, 
                         converters={1: lambda x: float(x[8:])})

In [20]:
# Form tensor
bikes = t.from_numpy(bikes_numpy)
# Get tensor shape and stride
bikes.shape, bikes.stride()

(torch.Size([17520, 17]), (17, 1))

In [21]:
# Convert tensor shape from 2D to 3D(day, hour, column)
# Here 24 means 24 hours per day(data is sorted in advance)
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 24, 17]), (408, 17, 1))

In [22]:
# Transpose the tensor to desired ordering Depth(channel)*Width*Height
# Here Depth is day, width is hour, height is column
# Before tanspose: Day*column*hour, which could be identified from stride
# After tanspose: Day*hour*column
daily_bikes = daily_bikes.transpose(1, 2)
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 17, 24]), (408, 1, 17))

In [47]:
# Create a 3D zero tensor with shape(730, 4, 24)
# 4 means: 1 for really good weather, and 4 for really bad
daily_weather_onehot = t.zeros(daily_bikes.shape[0], 4,
                              daily_bikes.shape[2])
# Scatter the one-hot encoding into the zero tensor in the second dimension(dim=1)
daily_weather_onehot.scatter_(dim=1,
                             index=daily_bikes[:, 9, :].unsqueeze(1).long()-1,
                             value=1.0)
# Then concatenate along the second dimension(dim=1)
daily_bikes = t.cat((daily_bikes, daily_weather_onehot), dim=1)

In [49]:
# Rescaling or Normalization
temperature = daily_bikes[:, 10, :]
temp_min = t.min(temperature)
temp_max = t.max(temperature)
daily_bikes[:, 10, :] = (daily_bikes[:, 10, :]-temp_min)/(temp_max-temp_min)
# Other methods: subtract the mean and divide by the standard deviation