## Created by <a href="https://github.com/yunsuxiaozi">yunsuxiaozi</a> 2024/7/1

In [1]:
import pandas as pd#导入csv文件的库
import numpy as np#矩阵运算与科学计算的库
import torch#深度学习库,pytorch
import torch.nn as nn#neural network,神经网络
import torch.nn.functional as F#神经网络函数库
import torch.optim as optim#一个实现了各种优化算法的库
import gc#垃圾回收模块
import warnings#避免一些可以忽略的报错
warnings.filterwarnings('ignore')#filterwarnings()方法是用于设置警告过滤器的方法，它可以控制警告信息的输出方式和级别。

#根据前T个时刻的数据预测后S个时刻的数据,一个batch为B个数据,一个数据有N个特征
B,T,S,N=128,128,16,1

#设置随机种子
import random
def seed_everything(seed):
    torch.backends.cudnn.deterministic = True#将cuda加速的随机数生成器设为确定性模式
    torch.backends.cudnn.benchmark = False#关闭CuDNN框架的自动寻找最优卷积算法的功能，以避免不同的算法对结果产生影响
    torch.manual_seed(seed)#pytorch的随机种子
    np.random.seed(seed)#numpy的随机种子
    random.seed(seed)#python内置的随机种子
seed_everything(seed=2024)

In [2]:
train=pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/train.csv")
print(f"len(train):{len(train)}")
test=pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/test.csv")
print(f"len(test):{len(test)}")
submission=pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/sample_submission.csv")
train.head()

len(train):3000888
len(test):28512


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,1,2013-01-01,1,BABY CARE,0.0,0
2,2,2013-01-01,1,BEAUTY,0.0,0
3,3,2013-01-01,1,BEVERAGES,0.0,0
4,4,2013-01-01,1,BOOKS,0.0,0


In [3]:
class Embedding(nn.Module):
    def __init__(self,input_dim=128,embed_dim=128,dropout=0.1):
        super(Embedding,self).__init__()
        self.input_dim=input_dim
        self.embed_dim=embed_dim
        self.head=nn.Sequential(
            nn.Linear(self.input_dim,self.embed_dim)
        )
        self.dropout=nn.Dropout(dropout)
        
    def forward(self,x):
        return self.dropout(self.head(x))

In [4]:
#多头注意力机制
class MultiHeadSelfAttention(nn.Module):
    #定义初始化函数,dim_in是embedding的维度，d_model是输出的维度
    def __init__(self,dim_in=256,d_model=256,num_heads=4):

        super(MultiHeadSelfAttention,self).__init__()
        self.padidx=2#中英文padding在字典里的下标
        self.dim_in,self.d_model,self.num_heads=dim_in,d_model,num_heads
        
        #向量的维度必须被头的个数整除,否则会抛出异常.
        assert d_model %num_heads==0,"d_model must be multiple of num_heads"
        
        #定义线性变换矩阵
        self.linear_q=nn.Linear(dim_in,d_model)
        self.linear_k=nn.Linear(dim_in,d_model)
        self.linear_v=nn.Linear(dim_in,d_model)
        self.scale=1/np.sqrt(d_model//num_heads)
        #最后的线性层
        self.fc=nn.Linear(d_model,d_model)
        
    def forward(self,x):#x是input,x_copy是每个位置在字典中(中英文)的idx,为了去除padding项的影响.
        batch,n,dim_in=x.shape#(batch_size,max_len,embedding)
        
        assert dim_in==self.dim_in#检查一下输入的embedding维度是否正确
        
        nh=self.num_heads#几个头
        
        dk=self.d_model//nh
        #对x进行线性映射变成(batch_size,max_len,d_model) -> (batch_size,num_heads,max_len,d_model//num_heads)
        q=self.linear_q(x).reshape(batch,n,nh,dk).transpose(1,2)
        k=self.linear_k(x).reshape(batch,n,nh,dk).transpose(1,2)
        v=self.linear_v(x).reshape(batch,n,nh,dk).transpose(1,2)
        
        #q:(batch_size,num_heads,max_len,d_model//num_heads)
        #k.transpose(2,3):(batch_size,num_heads,d_model//num_heads,max_len)
        #dist:(batch_size,num_heads,max_len,max_len) (每个词和每个词的注意力)
        dist=torch.matmul(q,k.transpose(2,3))*self.scale
        
        #softmax  (batch_size,num_heads,max_len,max_len)
        dist=torch.softmax(dist,dim=-1)
        
        #v:(batch_size,num_heads,max_len,d_model//num_heads)
        #att:(batch_size,num_heads,max_len,d_model//num_heads)
        att=torch.matmul(dist,v)
        
        #(batch_size,max_len,num_heads,d_model//num_heads)->(batch_size,max_len,d_model)
        att=att.transpose(1,2).reshape(batch,n,self.d_model)
        
        #output:(batch_size,max_len,d_model)
        output=self.fc(att)
        
        return output

In [5]:
#前馈神经网络,捕获复杂的非线性关系.
class FeedForward(nn.Module):
    def __init__(self, dim=256, hidden_dim=256, dropout=0.1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim), 
            nn.Dropout(dropout)
        )
    #将x传入神经网络中得到输出
    def forward(self, x):
        return self.net(x)

In [6]:
class Model(nn.Module):
    def __init__(self,embed_dim=256,L=6):
        super(Model,self).__init__()
        self.embed_dim=embed_dim#对时间维度映射的维度
        self.L=L#有几层att+feed
        self.encodeembedding=Embedding(input_dim=T,embed_dim=self.embed_dim,dropout=0.1)
        self.decodeembedding=Embedding(input_dim=self.embed_dim,embed_dim=S,dropout=0)
        
        self.att=[MultiHeadSelfAttention(dim_in=self.embed_dim,d_model=self.embed_dim,num_heads=4)for i in range(self.L)]
        self.feed=[FeedForward(dim=self.embed_dim,hidden_dim=self.embed_dim,dropout=0.1) for i in range(self.L)]
        self.laynorm=[nn.LayerNorm(self.embed_dim) for i in range(2*self.L)]
        
    def forward(self,x):#(Batch_size,Time,N_features)
        #对第一个维度求均值,保留这个维度同时不参与计算图
        #(Batch_size,Time,N_features)->(Batch_size,1,N_features)
        means = x.mean(1, keepdim=True).detach()
        #对第一个维度计算标准差,保留维度,使用有偏估计(分母为n而不是n-1),加上1e-15防止分母为0
        stdev = torch.sqrt(torch.var(x, dim=1, keepdim=True, unbiased=False) + 1e-15)
        x = (x - means)/stdev

        x=torch.transpose(x,1,2)#(Batch_size,N_features,T)
        #MLP
        x=self.encodeembedding(x)#(Batch_size,N_features,embed_dim) 
        for i in range(self.L):
            x=x+self.att[i](x)
            x=self.laynorm[2*i](x)
            x=x+self.feed[i](x)
            x=self.laynorm[2*i+1](x)
        #MLP
        x=self.decodeembedding(x)#(Batch_size,N_features,T)
        x=torch.transpose(x,1,2)#(Batch_size,N_features,T)
        x = x * (stdev.repeat(1, S, 1))
        x = x + (means.repeat(1, S, 1))
        return x

In [7]:
def loss_fn(y_true,y_pred):#torch.tensor
    return torch.mean((y_true-y_pred)**2)
#这里使用官方的评估指标
def metric(y_true,y_pred):#np.array
    y_true=y_true.reshape(-1,y_true.shape[-1])
    y_pred=y_pred.reshape(-1,y_pred.shape[-1])
    return np.sqrt(np.mean((np.log1p(y_true)-np.log1p(y_pred))**2))
family=['AUTOMOTIVE', 'BABY CARE', 'BEAUTY', 'BEVERAGES', 'BOOKS',
       'BREAD/BAKERY', 'CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS',
       'FROZEN FOODS', 'GROCERY I', 'GROCERY II', 'HARDWARE',
       'HOME AND KITCHEN I', 'HOME AND KITCHEN II', 'HOME APPLIANCES',
       'HOME CARE', 'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE',
       'LIQUOR,WINE,BEER', 'MAGAZINES', 'MEATS', 'PERSONAL CARE',
       'PET SUPPLIES', 'PLAYERS AND ELECTRONICS', 'POULTRY',
       'PREPARED FOODS', 'PRODUCE', 'SCHOOL AND OFFICE SUPPLIES',
       'SEAFOOD']
store_nbr=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54]
for i in range(len(family)):
    for j in range(len(store_nbr)):
        tmp_train=train[(train['family']==family[i])&(train['store_nbr']==store_nbr[j])]
        tmp_test=test[(test['family']==family[i])&(test['store_nbr']==store_nbr[j])]
        data=tmp_train['sales'].values.reshape(-1,1)#(len(train),1)
        X,y=[],[]
        for k in range(len(data)-T-len(tmp_test)):
            X.append(data[k:k+T])
            y.append(data[k+T:k+T+len(tmp_test)])
        split=int(len(X)*0.8)
        #train_X,valid_X,train_y,valid_y=np.array(X[:split]),np.array(X[split:]),np.array(y[:split]),np.array(y[split:])
        
        #full_data no valid 
        train_X,valid_X,train_y,valid_y=np.array(X),np.array(X[split:]),np.array(y),np.array(y[split:])
        
        #print(f"train_X.shape:{train_X.shape},train_y.shape:{train_y.shape},valid_X.shape:{valid_X.shape}")
        #模型设置
        model=Model()
        num_epochs=30
        batch_size=128
        #优化器设置
        optimizer=optim.Adam(model.parameters(),lr=0.00003,betas=(0.5,0.999))
        for epoch in range(num_epochs):
            #模型设置为训练状态
            model.train()
            #将梯度清空
            optimizer.zero_grad()
            #每次训练之前先打乱顺序
            random_index=np.arange(len(train_X))
            np.random.shuffle(random_index)
            train_X,train_y=train_X[random_index],train_y[random_index]
            train_loss=0.0
            for idx in range(0,len(train_X),batch_size):
                train_X1=torch.Tensor(train_X[idx:idx+batch_size])
                train_y1=torch.Tensor(train_y[idx:idx+batch_size])
                train_pred=model(train_X1)
                loss=loss_fn(train_y1,train_pred)
                #反向传播
                loss.backward()
                #优化器进行优化(梯度下降,降低误差)
                optimizer.step()
                train_loss+=loss
             #模型设置为评估模式
            model.eval()
            with torch.no_grad():
                valid_loss=0.00
                valid_preds=np.zeros((len(valid_y),S,N))
                for idx in range(0,len(valid_X),batch_size):
                    valid_X1=torch.Tensor(valid_X[idx:idx+batch_size])
                    valid_y1=torch.Tensor(valid_y[idx:idx+batch_size])
                    valid_pred=model(valid_X1)
                    loss=loss_fn(valid_y1,valid_pred)
                    valid_loss+=loss
                    valid_preds[idx:idx+batch_size]=valid_pred.detach().numpy()
                valid_preds=np.clip(valid_preds,0,1e20)
            torch.cuda.empty_cache()
        print(f"{family[i]},{store_nbr[j]},train_loss:{train_loss/(len(train_X)//batch_size)},valid_loss:{valid_loss/(len(valid_X)//batch_size)},metric:{metric(valid_y,valid_preds)}")
        #根据训练数据tmp_train预测len(tmp_test)的销售额
        model.eval()
        with torch.no_grad():
            test_preds=model(torch.Tensor([data[-T:]]))
            test_preds=test_preds.detach().numpy().reshape(-1)
            submission.loc[tmp_test.index,'sales']=np.clip(test_preds,0,1e20)
submission.to_csv("transformer.csv",index=None)
submission.head()

AUTOMOTIVE,1,train_loss:7.933492183685303,valid_loss:11.501394271850586,metric:0.6085181618331207
AUTOMOTIVE,2,train_loss:12.22263240814209,valid_loss:19.031681060791016,metric:0.5660508521669589
AUTOMOTIVE,3,train_loss:24.778066635131836,valid_loss:37.82584762573242,metric:0.5043246946706692
AUTOMOTIVE,4,train_loss:8.642611503601074,valid_loss:11.989980697631836,metric:0.5855278104895081
AUTOMOTIVE,5,train_loss:10.296677589416504,valid_loss:15.049962997436523,metric:0.5597993952423861
AUTOMOTIVE,6,train_loss:13.39505386352539,valid_loss:17.534029006958008,metric:0.5737387206666097
AUTOMOTIVE,7,train_loss:13.05517292022705,valid_loss:17.067466735839844,metric:0.6158717486079978
AUTOMOTIVE,8,train_loss:19.20771026611328,valid_loss:25.81452178955078,metric:0.6407711652691015
AUTOMOTIVE,9,train_loss:64.50350189208984,valid_loss:56.08516311645508,metric:0.5623162383150795
AUTOMOTIVE,10,train_loss:7.610986232757568,valid_loss:8.939112663269043,metric:0.7126113947536341
AUTOMOTIVE,11,train_l

Unnamed: 0,id,sales
0,3000888,3.365715
1,3000889,4.348152e-08
2,3000890,3.258557
3,3000891,2305.117
4,3000892,0.0
