In [1]:
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, threshold=50, linewidth=100)

In [3]:
bikes_numpy = np.loadtxt(
    "../data/p1ch4/bike-sharing-dataset/hour-fixed.csv", 
    dtype=np.float32, 
    delimiter=",", 
    skiprows=1, 
    converters={1: lambda x: float(x[8:10])}) # <1>
bikes = torch.from_numpy(bikes_numpy)
bikes

(tensor([[1.0000e+00, 1.0000e+00,  ..., 1.3000e+01, 1.6000e+01],
         [2.0000e+00, 1.0000e+00,  ..., 3.2000e+01, 4.0000e+01],
         ...,
         [1.7378e+04, 3.1000e+01,  ..., 4.8000e+01, 6.1000e+01],
         [1.7379e+04, 3.1000e+01,  ..., 3.7000e+01, 4.9000e+01]]),
 torch.Size([17520, 17]))

- `../data/p1ch4/bike-sharing-dataset/hour-fixed.csv`：CSV文件的路径。
- `dtype=np.float32`：指定数据类型为`float32`。
- `delimiter=","`：指定CSV文件中的分隔符为逗号。
- `skiprows=1`：跳过CSV文件的第一行（标题行）。
- `converters={1: lambda x: float(x[8:10])}`：定义了一个转换器，用于将第一列的数据进行转换。该转换器是一个lambda函数，将从第一列中的每个元素中提取索引为8到9的子字符串，并将其转换为浮点数。

In [4]:
bikes.shape, bikes.stride()

(torch.Size([17520, 17]), (17, 1))

In [5]:
# reshape the data to have 3 axes—day, hour, and then our 17 columns:
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 24, 17]), (408, 17, 1))

view(-1, 24, bikes.shape[1])将bikes张量重新构造为一个三维张量，其中第一个维度的大小自动推断，使得每个子张量都具有24列，且列数与原始bikes张量的列数相同。

返回的daily_bikes张量的形状为(N, 24, C)，其中N表示自动推断的第一个维度大小，24表示每个子张量的列数，C表示原始bikes张量的列数。

In [20]:
# 我们现在有N个序列，每天有L个小时，有C个通道。为了得到我们想要的N × C × L的顺序，我们需要对张量进行转置
daily_bikes = daily_bikes.transpose(1, 2)
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 17, 24]), (408, 1, 17))

In [None]:
# 我们初始化一个零填充矩阵，其中行数等于一天中的小时数，列数等于天气级别的数量

In [21]:
first_day = bikes[:24].long()
weather_onehot = torch.zeros(first_day.shape[0], 4)
first_day, first_day.shape, first_day[:,9]

(tensor([[ 1,  1,  ..., 13, 16],
         [ 2,  1,  ..., 32, 40],
         ...,
         [23,  1,  ..., 17, 28],
         [24,  1,  ..., 24, 39]]),
 torch.Size([24, 17]),
 tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2]))

然后我们根据每一行对应的级别将它们分散到矩阵中。还记得我们在前几节中使用unsqueeze来添加一个单例维度吗? 将数值减1，因为天气情况的范围从1到4，而指数是以0为基础的

In [22]:
weather_onehot.scatter_(
    dim=1,
    index=first_day[:,9].unsqueeze(1).long() - 1,
    value=1.0)

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        ...,
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [23]:
torch.cat((bikes[:24], weather_onehot), 1)[:1]

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,  0.0000,  1.0000,
          0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000, 16.0000,  1.0000,  0.0000,  0.0000,
          0.0000]])

将上面的推广到整个数据集中

In [24]:
daily_weather_onehot = torch.zeros(daily_bikes.shape[0], 4, daily_bikes.shape[2])
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [25]:
daily_weather_onehot.scatter_(dim=1, index=daily_bikes[:,9,:].long().unsqueeze(1) - 1, value=1.0)
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [27]:
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim=1)

In [28]:
daily_bikes, daily_bikes.shape

(tensor([[[1.0000e+00, 2.0000e+00,  ..., 2.3000e+01, 2.4000e+01],
          [1.0000e+00, 1.0000e+00,  ..., 1.0000e+00, 1.0000e+00],
          ...,
          [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
          [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],
 
         [[2.5000e+01, 2.6000e+01,  ..., 4.6000e+01, 4.7000e+01],
          [2.0000e+00, 2.0000e+00,  ..., 2.0000e+00, 2.0000e+00],
          ...,
          [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
          [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],
 
         ...,
 
         [[1.7332e+04, 1.7333e+04,  ..., 1.7354e+04, 1.7355e+04],
          [3.0000e+01, 3.0000e+01,  ..., 3.0000e+01, 3.0000e+01],
          ...,
          [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
          [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],
 
         [[1.7356e+04, 1.7357e+04,  ..., 1.7378e+04, 1.7379e+04],
          [3.1000e+01, 3.1000e+01,  ..., 3.1000e+01, 3.1000e+01],
      

In [29]:
daily_bikes[:, 9, :] = (daily_bikes[:, 9, :] - 1.0) / 3.0
daily_bikes

tensor([[[1.0000e+00, 2.0000e+00,  ..., 2.3000e+01, 2.4000e+01],
         [1.0000e+00, 1.0000e+00,  ..., 1.0000e+00, 1.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],

        [[2.5000e+01, 2.6000e+01,  ..., 4.6000e+01, 4.7000e+01],
         [2.0000e+00, 2.0000e+00,  ..., 2.0000e+00, 2.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],

        ...,

        [[1.7332e+04, 1.7333e+04,  ..., 1.7354e+04, 1.7355e+04],
         [3.0000e+01, 3.0000e+01,  ..., 3.0000e+01, 3.0000e+01],
         ...,
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],

        [[1.7356e+04, 1.7357e+04,  ..., 1.7378e+04, 1.7379e+04],
         [3.1000e+01, 3.1000e+01,  ..., 3.1000e+01, 3.1000e+01],
         ...,
         [0.00

In [35]:
# 取出daily_bikes中的所有行（:），第10个位置的列（10），以及所有的列（:）。
#通过这个操作，您得到了一个名为temp的新张量，其形状为(N, C)，其中N表示daily_bikes的行数，C表示daily_bikes的列数。
temp = daily_bikes[:, 10, :]
daily_bikes.shape, temp, temp.shape

(torch.Size([730, 21, 24]),
 tensor([[0.2245, 0.2041,  ..., 0.3878, 0.4490],
         [0.4490, 0.4286,  ..., 0.2245, 0.2041],
         ...,
         [0.2449, 0.2449,  ..., 0.1837, 0.1837],
         [0.1633, 0.1633,  ..., 0.2449, 0.2449]]),
 torch.Size([730, 24]))

In [36]:
temp_min = torch.min(temp)
temp_max = torch.max(temp)
temp_min, temp_max

(tensor(0.), tensor(1.))

In [43]:
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - temp_min) / (temp_max - temp_min))
daily_bikes

tensor([[[1.0000e+00, 2.0000e+00,  ..., 2.3000e+01, 2.4000e+01],
         [1.0000e+00, 1.0000e+00,  ..., 1.0000e+00, 1.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],

        [[2.5000e+01, 2.6000e+01,  ..., 4.6000e+01, 4.7000e+01],
         [2.0000e+00, 2.0000e+00,  ..., 2.0000e+00, 2.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],

        ...,

        [[1.7332e+04, 1.7333e+04,  ..., 1.7354e+04, 1.7355e+04],
         [3.0000e+01, 3.0000e+01,  ..., 3.0000e+01, 3.0000e+01],
         ...,
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],

        [[1.7356e+04, 1.7357e+04,  ..., 1.7378e+04, 1.7379e+04],
         [3.1000e+01, 3.1000e+01,  ..., 3.1000e+01, 3.1000e+01],
         ...,
         [0.00

In [42]:
# 也可以或者减去平均值，然后除以标准差
temp = daily_bikes[:, 10, :]
temp_mean = torch.mean(temp)
temp_std = torch.std(temp)
daily_bikes[:, 10, :] = (temp - temp_mean) / temp_std
daily_bikes[:, 10, :]

tensor([[-1.3213, -1.4248,  ..., -0.4932, -0.1827],
        [-0.1827, -0.2862,  ..., -1.3213, -1.4248],
        ...,
        [-1.2178, -1.2178,  ..., -1.5284, -1.5284],
        [-1.6319, -1.6319,  ..., -1.2178, -1.2178]])