#读取时间序列

In [4]:
import torch
import numpy as np
torch.set_printoptions(edgeitems=2, threshold=50,linewidth=75)

In [5]:
import csv
bikes_numpy = np.loadtxt(
    "../data/p1ch4/bike-sharing-dataset/hour-fixed.csv", 
    dtype=np.float32, 
    delimiter=",", 
    skiprows=1, 
    converters={1: lambda x: float(x[8:10])}) # <1>
col_list=next(csv.reader(open('../data/p1ch4/bike-sharing-dataset/hour-fixed.csv')))
col_list

['instant',
 'dteday',
 'season',
 'yr',
 'mnth',
 'hr',
 'holiday',
 'weekday',
 'workingday',
 'weathersit',
 'temp',
 'atemp',
 'hum',
 'windspeed',
 'casual',
 'registered',
 'cnt']

In [6]:
bikes=torch.from_numpy(bikes_numpy)
bikes

tensor([[1.0000e+00, 1.0000e+00,  ..., 1.3000e+01, 1.6000e+01],
        [2.0000e+00, 1.0000e+00,  ..., 3.2000e+01, 4.0000e+01],
        ...,
        [1.7378e+04, 3.1000e+01,  ..., 4.8000e+01, 6.1000e+01],
        [1.7379e+04, 3.1000e+01,  ..., 3.7000e+01, 4.9000e+01]])

In [7]:
bikes.shape,bikes.stride()

(torch.Size([17520, 17]), (17, 1))

In [8]:
daily_bikes=bikes.view(-1,24,bikes.shape[1]) # -1表示自动计算 
daily_bikes.shape,daily_bikes.stride() #17是特征

(torch.Size([730, 24, 17]), (408, 17, 1))

In [9]:
daily_bikes=daily_bikes.transpose(1,2) #交换维度
daily_bikes.shape,daily_bikes.stride() #17是特征

(torch.Size([730, 17, 24]), (408, 1, 17))

In [10]:
first_day=bikes[:24].long() # 24小时的数据
weather_onehot=torch.zeros(first_day.shape[0],4) # 假设有4个不同的天气状态
first_day[:,9],first_day.shape # 第10列表示天气状态


(tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2,
         2, 2]),
 torch.Size([24, 17]))

In [11]:
weather_onehot.scatter_(
    dim=1,
    index=first_day[:,9].unsqueeze(1).long()-1,# 在原始张量的第一个维度前插入一个新的维度，使得原始的一维张量变成了一个列向量。-1 操作是为了将原始天气状态的编码从从1开始改为从0开始。
    value=1.0
)

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        ...,
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [12]:
# 对每天的天气状态进行one-hot编码
# torch.Size([730, 17, 24]
daily_weather_onehot=torch.zeros(daily_bikes.shape[0],4,daily_bikes.shape[2])

In [13]:
daily_weather_onehot.scatter_(
    dim=1,
    index=daily_bikes[:,9,:].long().unsqueeze(1)-1,
    value=1.0)
daily_weather_onehot.shape


torch.Size([730, 4, 24])

In [14]:
daily_bikes=torch.cat((daily_bikes,daily_weather_onehot),dim=1) # 在第二个维度上进行拼接

In [15]:
daily_bikes.shape

torch.Size([730, 21, 24])

In [17]:
# 标准化
daily_bikes[:,9,:]=(daily_bikes[:,9,:]-1.0)/3.0 # 减去最小值 除以(最大值-最小值)
daily_bikes

tensor([[[1.0000e+00, 2.0000e+00,  ..., 2.3000e+01, 2.4000e+01],
         [1.0000e+00, 1.0000e+00,  ..., 1.0000e+00, 1.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],

        [[2.5000e+01, 2.6000e+01,  ..., 4.6000e+01, 4.7000e+01],
         [2.0000e+00, 2.0000e+00,  ..., 2.0000e+00, 2.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],

        ...,

        [[1.7332e+04, 1.7333e+04,  ..., 1.7354e+04, 1.7355e+04],
         [3.0000e+01, 3.0000e+01,  ..., 3.0000e+01, 3.0000e+01],
         ...,
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00]],

        [[1.7356e+04, 1.7357e+04,  ..., 1.7378e+04, 1.7379e+04],
         [3.1000e+01, 3.1000e+01,  ..., 3.1000e+01, 3.1000e+01],
         ...,
         [0.00

In [20]:
# 确保了值在0-1之间
temp=daily_bikes[:,10,:]
temp_min=torch.min(temp)
temp_max=torch.max(temp)
daily_bikes[:,10,:]=((daily_bikes[:,10,:]-temp_min))/(temp_max-temp_min)

In [21]:
temp=daily_bikes[:,10,:]
daily_bikes[:,10,:]=((daily_bikes[:,10,:]-(torch.mean(temp)))/torch.std(temp))