In [1]:
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, threshold=50, linewidth=75)

In [2]:
bikes_numpy = np.loadtxt(
    "../data/p1ch4/bike-sharing-dataset/hour-fixed.csv", 
    dtype=np.float32, 
    delimiter=",", 
    skiprows=1, 
    converters={1: lambda x: float(x[8:10])}) # <1>
bikes = torch.from_numpy(bikes_numpy)
bikes

tensor([[1.0000e+00, 1.0000e+00,  ..., 1.3000e+01, 1.6000e+01],
        [2.0000e+00, 1.0000e+00,  ..., 3.2000e+01, 4.0000e+01],
        ...,
        [1.7378e+04, 3.1000e+01,  ..., 4.8000e+01, 6.1000e+01],
        [1.7379e+04, 3.1000e+01,  ..., 3.7000e+01, 4.9000e+01]])

In [7]:
# 時間単位の行で17列のデータ
bikes.shape, bikes.stride()

(torch.Size([17520, 17]), (17, 1))

In [9]:
# view を使ってテンソルの次元を変更、-1とすると、その次元は自動計算してくれる 
# bikes.shape[1]はshapeの2つ目の次元 17が入る
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 24, 17]), (408, 17, 1))

In [10]:
# transposeの練習
sample1 = torch.arange(30).reshape(2, 5, 3)
sample1

tensor([[[ 0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8],
         [ 9, 10, 11],
         [12, 13, 14]],

        [[15, 16, 17],
         [18, 19, 20],
         [21, 22, 23],
         [24, 25, 26],
         [27, 28, 29]]])

In [11]:
sample1.shape

torch.Size([2, 5, 3])

In [12]:
sample1.transpose(1,0).shape # 次元0と次元1を入替

torch.Size([5, 2, 3])

In [16]:
sample1.transpose(0, 1).shape # 同じ

torch.Size([5, 2, 3])

In [13]:
sample1.transpose(1, 2).shape # 

torch.Size([2, 3, 5])

In [14]:
sample1.transpose(2, 1).shape # 

torch.Size([2, 3, 5])

In [15]:
sample1.transpose(1, 2)次元

tensor([[[ 0,  3,  6,  9, 12],
         [ 1,  4,  7, 10, 13],
         [ 2,  5,  8, 11, 14]],

        [[15, 18, 21, 24, 27],
         [16, 19, 22, 25, 28],
         [17, 20, 23, 26, 29]]])

In [17]:
# transpose は転置 次元1と次元2を入替
daily_bikes = daily_bikes.transpose(1, 2)
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 17, 24]), (408, 1, 17))

In [19]:
bikes[:24].shape

torch.Size([24, 17])

In [20]:
# 初日の分だけ取り出し
# longは64bit 整数
first_day = bikes[:24].long()
first_day.dtype

torch.int64

In [21]:
first_day.shape

torch.Size([24, 17])

In [22]:
# one-hot vector用 weatherは4つのカテゴリ
weather_onehot = torch.zeros(first_day.shape[0], 4)
first_day[:,9]

tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2,
        2, 2])

In [23]:
# 全ての行の1列の行列
first_day[:,9].unsqueeze(1)

tensor([[1],
        [1],
        [1],
        [1],
        [1],
        [2],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [2],
        [2],
        [2],
        [2],
        [2],
        [3],
        [3],
        [2],
        [2],
        [2],
        [2]])

In [24]:
first_day[:,9].unsqueeze(1).shape

torch.Size([24, 1])

In [25]:
# 1-4のデータを0-3のデータにするので1を引く
weather_onehot.scatter_(
    dim=1, 
    index=first_day[:,9].unsqueeze(1).long() - 1, # <1>
    value=1.0)

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        ...,
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [26]:
weather_onehot.shape

torch.Size([24, 4])

In [27]:
bikes[:24].shape

torch.Size([24, 17])

In [28]:
# torch.cat の勉強
# https://qiita.com/Haaamaaaaa/items/709d774698082e9d342d
input1 = torch.randn(2, 3, 4)
input2 = torch.randn(2, 3, 4)
input3 = torch.randn(5, 3, 4)
input_list = [input1, input2, input3]


In [30]:
# 次元0で連結
output1 = torch.cat(input_list, dim=0)
print(output1.size()) # torch.Size([9, 3, 4])
# 成功する、これは次元1と次元2がそろっているから

torch.Size([9, 3, 4])


In [32]:
# 次元1で連結
output1 = torch.cat(input_list, dim=1)
print(output1.size()) # ERROR

RuntimeError: Sizes of tensors must match except in dimension 1. Got 2 and 5 in dimension 0 (The offending index is 2)

In [33]:
weather_onehot.shape, bikes[:24].shape

(torch.Size([24, 4]), torch.Size([24, 17]))

In [34]:
# どこの変数にも入れていないのはたぶん間違え
torch.cat((bikes[:24], weather_onehot), 1)[:1] # 1行目までを表示

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,
          6.0000,  0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,
          3.0000, 13.0000, 16.0000,  1.0000,  0.0000,  0.0000,  0.0000]])

In [37]:
weather_onehot.shape, bikes[:24].shape

(torch.Size([24, 4]), torch.Size([24, 17]))

In [38]:
# one-hot用のゼロベクトル
daily_weather_onehot = torch.zeros(daily_bikes.shape[0], 4,
                                   daily_bikes.shape[2])
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [39]:
daily_weather_onehot.scatter_(
    1, daily_bikes[:,9,:].long().unsqueeze(1) - 1, 1.0)
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [40]:
daily_bikes.shape

torch.Size([730, 17, 24])

In [41]:
# 次元1で連結
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim=1)

In [12]:
# weather を連続値として扱い、それを0-1に基準化している
daily_bikes[:, 9, :] = (daily_bikes[:, 9, :] - 1.0) / 3.0

In [45]:
# 0-1に並べる方法
temp = daily_bikes[:, 10, :]
temp_min = torch.min(temp)
temp_max = torch.max(temp)
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - temp_min)
                         / (temp_max - temp_min))

In [43]:
# 標準化
temp = daily_bikes[:, 10, :]
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - torch.mean(temp))
                         / torch.std(temp))