# 数据集构造

## data_provider/uea.py
- 数据集构造时一些功能函数

In [1]:
def collate_fn(data, max_len=None):
    '''
    用于规范各bs的维度（用0填补）,并返回padding_masks（告知那些位置是填补的）
    '''
    batch_size = len(data)
    features, labels = zip(*data)
    
    # 添加批量为维度
    lengths = [X.shape[0] for X in features]  # lengths = seq_len
    if max_len is None:
        max_len = max(lengths)
        
    # (padded_length, feat_dim) -> (batch_size, padded_length, feat_dim)
    X = torch.zeros(batch_size, max_len, features[0].shape[-1]) 
    for i in range(batch_size):
        end = min(lengths[i], max_len)
        X[i, :end, :] = features[i][:end, :]
        
    # (batch_size, num_labels)
    targets = torch.stack(labels, dim=0)
    
    # (batch_size, padded_length) boolean tensor, "1" means keep
    padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16),max_len=max_len)
    return X, targets, padding_masks

def padding_mask(lengths, max_len=None):
    """
    用一个布尔矩阵，指明填充的位置（填充的是seq_len，不是特征维度)
    所以返回的是 [bs, seq_len]
    """
    batch_size = lengths.numel()  # 获取批量大小
    max_len = max_len or lengths.max_val()
    return (torch.arange(0, max_len, device=lengths.device).type_as(lengths).repeat(batch_size, 1)
            .lt(lengths.unsqueeze(1)))

class Normalizer(object):
    """
    Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization.
    """
    def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None):
        """
        Args:
            norm_type: choose from:
                "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps)
                "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows)
            mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values
        """
        self.norm_type = norm_type
        self.mean = mean
        self.std = std
        self.min_val = min_val
        self.max_val = max_val

    def normalize(self, df):
        """
        Args:
            df: input dataframe
        Returns:
            df: normalized dataframe
        """
        if self.norm_type == "standardization":
            if self.mean is None:
                self.mean = df.mean()
                self.std = df.std()
            return (df - self.mean) / (self.std + np.finfo(float).eps)

        elif self.norm_type == "minmax":
            if self.max_val is None:
                self.max_val = df.max()
                self.min_val = df.min()
            return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps)

        elif self.norm_type == "per_sample_std":
            grouped = df.groupby(by=df.index)
            return (df - grouped.transform('mean')) / grouped.transform('std')

        elif self.norm_type == "per_sample_minmax":
            grouped = df.groupby(by=df.index)
            min_vals = grouped.transform('min')
            return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps)

        else:
            raise (NameError(f'Normalize method "{self.norm_type}" not implemented'))

def interpolate_missing(y):
    """
    Replaces NaN values in pd.Series `y` using linear interpolation
    """
    if y.isna().any():
        y = y.interpolate(method='linear', limit_direction='both')
    return y


def subsample(y, limit=256, factor=2):
    """
    If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor
    """
    if len(y) > limit:
        return y[::factor].reset_index(drop=True)
    return y

## data_provider/data_loader.py
- 数据集制作

In [2]:
import os
import numpy as np
import pandas as pd
import glob
import re
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from utils.timefeatures import time_features
import warnings
warnings.filterwarnings('ignore')

# from data_provider.uea import subsample, interpolate_missing, Normalizer

In [3]:
class MyDataset_classifier(Dataset):
    def __init__(self, args, flag=None):
        self.args = args    
        self.flag = flag
        self.root_path = self.args.root_path
        self.all_df, self.labels_df = self.load_data()
        normalizer = Normalizer()
        self.feature_df = normalizer.normalize(self.all_df)
        
    def load_data(self):
        if self.flag == "train":
            self.data_path = "train_data.npy"
            self.label_path = "train_label.csv"
            df = np.load(os.path.join(self.root_path, self.data_path))
            label = pd.read_csv(os.path.join(self.root_path, self.label_path)).values
            
        elif self.flag == "val":
            self.data_path = "val_data.npy"
            self.label_path = "val_label.csv"
            df = np.load(os.path.join(self.root_path, self.data_path))
            label = pd.read_csv(os.path.join(self.root_path, self.label_path)).values
            
        elif self.flag == "test":
            self.data_path = "test_data.npy"
            df = np.load(os.path.join(self.root_path, self.data_path))
            label = None
        else:
            print("请指定读取的文件")
            
        if self.flag in ["train", "val"]:
            print(label.shape)
        return df, label

    def instance_norm(self, case):
        mean = case.mean(0, keepdim=True)
        case = case - mean
        stdev = torch.sqrt(torch.var(case, dim=1, keepdim=True, unbiased=False) + 1e-5)
        case /= stdev
        return case

    def __getitem__(self, ind):
        data = self.feature_df[ind]   # [seq_len,n_features]
        
        if self.labels_df is not None:
            label = self.labels_df[ind]
        else:
            label = np.zeros(1) 
        return self.instance_norm(torch.from_numpy(data)), torch.from_numpy(label).reshape(-1,)

    def __len__(self):
        return len(self.feature_df)

In [4]:
class Config:
    def __init__(self):
        self.root_path = "./user_data/"
        self.batch_size = 128
        
args = Config()
data = MyDataset_classifier(args = args, flag="train")

print(len(data), "\n",   # 原文件大小是2877305，因为sep_len是5，所以少了5
      len(data[0]), "\n", 
      data[0][0].shape,"\n", 
      data[0][1].shape, "\n", 
     )

(11106, 1)
11106 
 2 
 torch.Size([180, 42]) 
 torch.Size([1]) 



# data_provider/data_factory.py

In [5]:
from torch.utils.data import DataLoader

In [6]:
def data_provider(args, flag):
    shuffle_flag = False if flag == 'test' else True
    drop_last = False if flag == 'test' else True
    
    data_set = MyDataset_classifier(
        args = args,
        flag = flag
    )
    
    data_loader = DataLoader(
        data_set,
        batch_size=args.batch_size,  # bs还是放到参数里面指定
        shuffle=shuffle_flag,
        num_workers=0,              # 直接写死吧
        drop_last=drop_last,        # 最后的数据组不成一个bs，就舍弃
        collate_fn=lambda x: collate_fn(x)
    )
    return data_set, data_loader

In [7]:
r1, r2 = data_provider(args, "train")

for e1, i in enumerate(r2):
    if e1==1:
        break
        
i[0].shape, i[1].shape, i[2].shape

(11106, 1)


(torch.Size([128, 180, 42]), torch.Size([128, 1]), torch.Size([128, 180]))

# my_model/TimesNet.py
- 对预测类任务
    - 因为模型好像都是seq-to-seq模型，所以c_out = enc_in = len(features)（其实好像不用，但是c_out要大于enc_in)，因为模型输出最后设置了只要pred_len那一段
- 对分类任务
    - enc_in = len(features)

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.fft
from layers.Embed import DataEmbedding
from layers.Conv_Blocks import Inception_Block_V1

In [9]:
def FFT_for_Period(x, k=2):
    '''
    提取时间序列数据中的主要周期成分和对应周期的权重
    '''
    # [B, T, C]
    xf = torch.fft.rfft(x, dim=1)  # 得到频域
    # find period by amplitudes
    frequency_list = abs(xf).mean(0).mean(-1)  # 对第一个和最后一个维度求均值，只剩下seq_len
    frequency_list[0] = 0
    _, top_list = torch.topk(frequency_list, k)
    top_list = top_list.detach().cpu().numpy()
    period = x.shape[1] // top_list
    return period, abs(xf).mean(-1)[:, top_list]

class TimesBlock(nn.Module):
    def __init__(self, configs):
        super(TimesBlock, self).__init__()
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        self.k = configs.top_k
        
        # parameter-efficient design
        self.conv = nn.Sequential(
            Inception_Block_V1(configs.d_model, configs.d_ff,num_kernels=configs.num_kernels),
            nn.GELU(),
            Inception_Block_V1(configs.d_ff, configs.d_model,num_kernels=configs.num_kernels)
        )
        
    def forward(self, x):
        B, T, N = x.size()
        period_list, period_weight = FFT_for_Period(x, self.k)
        
        res = []
        for i in range(self.k):
            period = period_list[i]
            
            # padding ：将输入时间序列的长度 T 补齐到一个 period 的整数倍
            if T % period != 0:
                length = ((T // period) + 1) * period
                padding = torch.zeros([x.shape[0], (length - T), x.shape[2]]).to(x.device)
                out = torch.cat([x, padding], dim=1)
            else:
                length = T
                out = x
                
            # reshape
            out = out.reshape(B, length // period, period, N).permute(0, 3, 1, 2).contiguous()
            
            # 2D conv: from 1d Variation to 2d Variation
            out = self.conv(out)
            
            # 重塑输出，特征维度不变
            out = out.permute(0, 2, 3, 1).reshape(B, -1, N)
            res.append(out[:, :length, :]) # 为啥没有label_len
            
        res = torch.stack(res, dim=-1)  # 维度：（B,T,N,self.k）
        
        # adaptive aggregation
        period_weight = F.softmax(period_weight, dim=1)
        period_weight = period_weight.unsqueeze(1).unsqueeze(1).repeat(1, T, N, 1)
        
        res = torch.sum(res * period_weight, -1)  # 乘上对应频段的权重
        res = res + x                             # residual connection
        return res
    
class Model(nn.Module):
    """
    Paper link: https://openreview.net/pdf?id=ju_Uqw384Oq
    """
    def __init__(self, configs):
        super(Model, self).__init__()
        self.configs = configs
        self.task_name = configs.task_name
        
        self.seq_len = configs.seq_len
        
        # 分类任务用不上pred_len
        self.pred_len = configs.pred_len
        
        # 这里其实没用上label_len，因为这个模型不decoder
        self.label_len = configs.label_len
        
        # 这里的enc_in = len(features)/特征个数
        self.model = nn.ModuleList([TimesBlock(configs)for _ in range(configs.e_layers)])
        
        # 因为我不使用x_mark_enc，所以configs.embed，configs.freq都用不上
        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed,
                                           configs.freq, configs.dropout)
        self.layer = configs.e_layers
        self.layer_norm = nn.LayerNorm(configs.d_model)
        
        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
            self.predict_linear = nn.Linear(self.seq_len, self.pred_len + self.seq_len)
            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
            
        if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
            
        if self.task_name == 'classification':
            self.act = F.gelu
            self.dropout = nn.Dropout(configs.dropout)
            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
            
    def classification(self, x_enc, x_mark_enc):
        '''
        这里的x_mark_enc，是填充标志矩阵。和预测任务中的x_mark_enc（时间特征）不一样
        '''
        
        # 这里也是把x_mark_enc定为None，那么freq和embed都没必要传入
        enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]
        
        for i in range(self.layer):
            enc_out = self.layer_norm(self.model[i](enc_out))
        
        output = self.act(enc_out)
        output = self.dropout(output)
        
        # zero-out padding embeddings
        output = output * x_mark_enc.unsqueeze(-1)
        
        # (batch_size, seq_length * d_model)
        output = output.reshape(output.shape[0], -1)
        output = self.projection(output)  # (batch_size, num_classes)
        return output
    
    def forward(self, x_enc, x_mark_enc=None):
        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
            dec_out = self.forecast(x_enc, x_mark_enc)
            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
        if self.task_name == 'imputation':
            dec_out = self.imputation(
                x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
            return dec_out  # [B, L, D]
        if self.task_name == 'anomaly_detection':
            dec_out = self.anomaly_detection(x_enc)
            return dec_out  # [B, L, D]
        if self.task_name == 'classification':
            dec_out = self.classification(x_enc, x_mark_enc)
            return dec_out  # [B, N]
        return None

# exp/exp_basic.py

In [10]:
import os
import torch
# from models import Autoformer, Transformer, TimesNet, Nonstationary_Transformer, DLinear, FEDformer, \
#     Informer, LightTS, Reformer, ETSformer, Pyraformer, PatchTST, MICN, Crossformer, FiLM, iTransformer, \
#     Koopa, TiDE, FreTS, TimeMixer, TSMixer, SegRNN, MambaSimple, Mamba, TemporalFusionTransformer
# from my_model import TimesNet

class Exp_Basic(object):
    def __init__(self, args):
        self.args = args
        self.device = self._acquire_device()
        self.model = self._build_model().to(self.device)
        
    def _build_model(self):
        raise NotImplementedError
        return None

    def _acquire_device(self):
        if self.args.use_gpu:
            os.environ["CUDA_VISIBLE_DEVICES"] = str(self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
            device = torch.device('cuda:{}'.format(self.args.gpu))
            print('Use GPU: cuda:{}'.format(self.args.gpu))
        else:
            device = torch.device('cpu')
            print('Use CPU')
        return device

    def _get_data(self):
        pass

    def vali(self):
        pass

    def train(self):
        pass

    def test(self):
        pass

# ./exp/classification.py
- enc_in = features_dim
- pred_len = 0

In [11]:
# from data_provider.data_factory import data_provider
# from exp.exp_basic import Exp_Basic

from utils.tools import EarlyStopping, adjust_learning_rate, cal_accuracy
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import numpy as np
import pdb
warnings.filterwarnings('ignore')

In [12]:
class Exp_Classification(Exp_Basic):
    def __init__(self, args):
        super(Exp_Classification, self).__init__(args)
        
    def _build_model(self):
        model = Model(self.args).float()
        if self.args.use_multi_gpu and self.args.use_gpu:
            model = nn.DataParallel(model, device_ids=self.args.device_ids)
        return model
    
    def _get_data(self, flag):
        data_set, data_loader = data_provider(self.args, flag)
        return data_set, data_loader

    def _select_optimizer(self):
        # model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
        model_optim = optim.RAdam(self.model.parameters(), lr=self.args.learning_rate)
        return model_optim

    def _select_criterion(self):
        criterion = nn.CrossEntropyLoss()
        return criterion

    def vali(self, vali_data, vali_loader, criterion):
        '''
        这个函数在train中被调用
        '''
        total_loss = []
        preds = []
        trues = []
        self.model.eval()
        with torch.no_grad():
            for i, (batch_x, label, padding_mask) in enumerate(vali_loader):
                batch_x = batch_x.float().to(self.device)
                padding_mask = padding_mask.float().to(self.device)
                label = label.to(self.device)
                
                outputs = self.model(batch_x, padding_mask)

                pred = outputs.detach().cpu()
                loss = criterion(pred, label.long().squeeze().cpu())
                total_loss.append(loss)

                preds.append(outputs.detach())
                trues.append(label)

        total_loss = np.average(total_loss)
        preds = torch.cat(preds, 0)
        trues = torch.cat(trues, 0)
        probs = torch.nn.functional.softmax(preds)  # (total_samples, num_classes) est. prob. for each class and sample
        predictions = torch.argmax(probs, dim=1).cpu().numpy()  # (total_samples,) int class index for each sample
        trues = trues.flatten().cpu().numpy()
        accuracy = cal_accuracy(predictions, trues)
        self.model.train()
        return total_loss, accuracy
    
    def train(self, setting):
        train_data, train_loader = self._get_data(flag='train')
        vali_data, vali_loader = self._get_data(flag='val')
        
        path = os.path.join(self.args.checkpoints, setting)
        if not os.path.exists(path):
            os.makedirs(path)

        time_now = time.time()
        train_steps = len(train_loader)
        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)

        model_optim = self._select_optimizer()
        criterion = self._select_criterion()
        
        for epoch in range(self.args.train_epochs):
            iter_count = 0
            train_loss = []
            self.model.train()
            epoch_time = time.time()
            for i, (batch_x, label, padding_mask) in enumerate(train_loader):
                iter_count += 1
                model_optim.zero_grad()

                batch_x = batch_x.float().to(self.device)
                padding_mask = padding_mask.float().to(self.device)
                label = label.to(self.device)
                
                # 模型不该修改，不然这里也要改
                # outputs = self.model(batch_x, padding_mask, None, None)
                outputs = self.model(batch_x, padding_mask)
                loss = criterion(outputs, label.long().squeeze(-1))
                train_loss.append(loss.item())

                if (i + 1) % 1 == 0:
                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
                    speed = (time.time() - time_now) / iter_count
                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                    iter_count = 0
                    time_now = time.time()

                loss.backward()
                nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=4.0)
                model_optim.step()

            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
            train_loss = np.average(train_loss)
            vali_loss, val_accuracy = self.vali(vali_data, vali_loader, criterion)

            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.3f} Vali Loss: {3:.3f} Vali Acc: {4:.3f}"
                .format(epoch + 1, train_steps, train_loss, vali_loss, val_accuracy))
            
            # 模型是在早停里面保存的
            early_stopping(-val_accuracy, self.model, path, epoch)
            if early_stopping.early_stop:
                print("Early stopping")
                break
                
            if (epoch + 1) % 3 == 0:  # 每3个epoch调整一次学习率
                adjust_learning_rate(model_optim, epoch + 1, self.args)
                
        # 保存最后一次的模型（无论是否早停）
        final_model_path = os.path.join(path, 'final_checkpoint.pth')
        torch.save(self.model.state_dict(), final_model_path)
        
    def test(self, setting, test=1):
        test_data, test_loader = self._get_data(flag='test')
        if test:
            print('loading model, 需要自定义checkpint文件名')
            self.model.load_state_dict(
                torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint_10.pth')))
        preds = []
        trues = []
        
        self.model.eval()
        with torch.no_grad():
            for i, (batch_x, label, padding_mask) in enumerate(test_loader):
                batch_x = batch_x.float().to(self.device)
                padding_mask = padding_mask.float().to(self.device)
                label = label.to(self.device)
                outputs = self.model(batch_x, padding_mask)
                preds.append(outputs.detach())
                trues.append(label)

        preds = torch.cat(preds, 0)
        trues = torch.cat(trues, 0)
        print('test shape:', preds.shape, trues.shape)
        
        probs = torch.nn.functional.softmax(preds)  # (total_samples, num_classes) est. prob. for each class and sample
        predictions = torch.argmax(probs, dim=1).cpu().numpy()  # (total_samples,) int class index for each sample

        # result save
        folder_path = './results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        predictions_label = pd.Series(predictions).to_csv(folder_path+"/"+"predictions_label.csv")

# run.py

In [13]:
# from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
# from exp.exp_imputation import Exp_Imputation
# from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast
# from exp.exp_anomaly_detection import Exp_Anomaly_Detection
# from exp.exp_classification import Exp_Classification
import argparse
import os
import torch
from utils.print_args import print_args
import random
import numpy as np

In [14]:
'''
参数解析：
    - 做测试，就把is_training设置为0
    - features，target ：都是forecasting任务特有的，但也不算，反正不好用
    - freq，embed：如果输入的数据有时间戳，可以指定这两个参数做时间的embed，但是如果没传入时间戳特征，
                    这两个参数不管就行（指定了也不用）
'''

if __name__ == '__main__':
    fix_seed = 2021
    random.seed(fix_seed)
    torch.manual_seed(fix_seed)
    np.random.seed(fix_seed)

    parser = argparse.ArgumentParser(description='TimesNet')
    
    # basic config
    parser.add_argument('--task_name', type=str, default='classification',
                        help='task name, options:[long_term_forecast, short_term_forecast,imputation, classification, anomaly_detection]')
    parser.add_argument('--is_training', type=int, default=0, help='status')
    parser.add_argument('--model', type=str, default='TimesNet',
                        help='model name, options: [Autoformer, Transformer, TimesNet]')
    
    # data loader
    parser.add_argument('--root_path', type=str, default='./user_data/')
    parser.add_argument('--features', type=str, default='MS',
                        help='forecasting任务特有, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
    parser.add_argument('--target', type=str, default='OT', 
                        help='target feature in S or MS task')
    parser.add_argument('--freq', type=str, default='h',
                        help='不传入x_mark，freq用不上。freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
    parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
    
    # forecasting task
    # parser.add_argument('--seq_len', type=int, default=180, help='input sequence length')
    parser.add_argument('--label_len', type=int, default=128, help='start token length')
    parser.add_argument('--pred_len', type=int, default=64, help='prediction sequence length')
    parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False)
    
    # inputation task
    parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio')
    
    # anomaly detection task
    parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%)')
    
    # classification task
    parser.add_argument('--seq_len', type=int, default=180,
                        help='说是forecasting的参数，但是classification也要用')
    parser.add_argument('--num_class', type=int, default=3, help='几分类任务')
    
    # model define
    parser.add_argument('--expand', type=int, default=2, help='expansion factor for Mamba')
    parser.add_argument('--d_conv', type=int, default=4, help='conv kernel size for Mamba')
    parser.add_argument('--top_k', type=int, default=5,
                        help='for TimesBlock，提取前几个主要成分周期')
    parser.add_argument('--num_kernels', type=int, default=6, help='for Inception')
    parser.add_argument('--enc_in', type=int, default=42,
                        help='等于特征数，encoder input size')
    parser.add_argument('--dec_in', type=int, default=7, 
                        help='TimesNet不使用decoder, decoder input size')
    parser.add_argument('--c_out', type=int, default=2,
                        help='inputation和forecasting任务的，输出维度设置为和enc_in一样，output size')
    
    parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
    parser.add_argument('--n_heads', type=int, default=5,
                        help='多头自注意机制的数量')
    parser.add_argument('--e_layers', type=int, default=2,
                        help='编码器由几层堆叠组成，每一层包括自注意力机制和前馈神经网络')
    parser.add_argument('--d_layers', type=int, default=1,
                        help='解码器由几层堆叠组成，每一层包括自注意力机制、编码器-解码器注意力机制和前馈神经网络')
    parser.add_argument('--d_ff', type=int, default=512,
                        help='前馈神经网络（Feed Forward Network）的隐藏层维度')
    
    parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
    parser.add_argument('--factor', type=int, default=1, help='attn factor')
    parser.add_argument('--distil', action='store_false', default=True,
                        help='是否使用蒸馏:将深层网络简化为较浅层网络的方法，以减少计算量和模型复杂度')
    parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
    parser.add_argument('--embed', type=str, default='timeF',
                        help='time features encoding, options:[timeF, fixed, learned]')
    parser.add_argument('--activation', type=str, default='gelu', help='activation')
    parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
    
    parser.add_argument('--channel_independence', type=int, default=1,
                        help='0: channel dependence 1: channel independence for FreTS model')
    parser.add_argument('--decomp_method', type=str, default='moving_avg',
                        help='method of series decompsition, only support moving_avg or dft_decomp')
    parser.add_argument('--use_norm', type=int, default=1, help='whether to use normalize; True 1 False 0')
    parser.add_argument('--down_sampling_layers', type=int, default=0, help='num of down sampling layers')
    parser.add_argument('--down_sampling_window', type=int, default=1, help='down sampling window size')
    
    parser.add_argument('--down_sampling_method', type=str, default=None,
                        help='down sampling method, only support avg, max, conv')
    parser.add_argument('--seg_len', type=int, default=48,
                        help='the length of segmen-wise iteration of SegRNN')
    
    # optimization
    parser.add_argument('--itr', type=int, default=1,help='实验次数')
    parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
    parser.add_argument('--batch_size', type=int, default=256, help='batch size of train input data')
    parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
    parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
    parser.add_argument('--loss', type=str, default='MSE', help='loss function')
    parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
    parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
    
    # GPU
    parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
    parser.add_argument('--gpu', type=int, default=0, help='gpu')
    parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
    parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')

    # de-stationary projector params
    parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128],
                        help='hidden layer dimensions of projector (List)')
    parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector')
    
    # metrics (dtw)
    parser.add_argument('--use_dtw', type=bool, default=False, 
                        help='the controller of using dtw metric (dtw is time consuming, not suggested unless necessary)')
    
    # Augmentation
    parser.add_argument('--augmentation_ratio', type=int, default=0, help="How many times to augment")
    parser.add_argument('--seed', type=int, default=2, help="Randomization seed")
    parser.add_argument('--jitter', default=False, action="store_true", help="Jitter preset augmentation")
    parser.add_argument('--scaling', default=False, action="store_true", help="Scaling preset augmentation")
    parser.add_argument('--permutation', default=False, action="store_true", help="Equal Length Permutation preset augmentation")
    parser.add_argument('--randompermutation', default=False, action="store_true", help="Random Length Permutation preset augmentation")
    parser.add_argument('--magwarp', default=False, action="store_true", help="Magnitude warp preset augmentation")
    parser.add_argument('--timewarp', default=False, action="store_true", help="Time warp preset augmentation")
    parser.add_argument('--windowslice', default=False, action="store_true", help="Window slice preset augmentation")
    parser.add_argument('--windowwarp', default=False, action="store_true", help="Window warp preset augmentation")
    parser.add_argument('--rotation', default=False, action="store_true", help="Rotation preset augmentation")
    parser.add_argument('--spawner', default=False, action="store_true", help="SPAWNER preset augmentation")
    parser.add_argument('--dtwwarp', default=False, action="store_true", help="DTW warp preset augmentation")
    parser.add_argument('--shapedtwwarp', default=False, action="store_true", help="Shape DTW warp preset augmentation")
    parser.add_argument('--wdba', default=False, action="store_true", help="Weighted DBA preset augmentation")
    parser.add_argument('--discdtw', default=False, action="store_true", help="Discrimitive DTW warp preset augmentation")
    parser.add_argument('--discsdtw', default=False, action="store_true", help="Discrimitive shapeDTW warp preset augmentation")
    parser.add_argument('--extra_tag', type=str, default="", help="Anything extra")
    
    args = parser.parse_args(["--use_amp"])
    args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
    args.use_gpu = True if torch.cuda.is_available() else False

    print(torch.cuda.is_available())

    if args.use_gpu and args.use_multi_gpu:
        args.devices = args.devices.replace(' ', '')
        device_ids = args.devices.split(',')
        args.device_ids = [int(id_) for id_ in device_ids]
        args.gpu = args.device_ids[0]

    print('Args in experiment:')
    print_args(args)

    if args.task_name == 'long_term_forecast':
        Exp = Exp_Long_Term_Forecast
    elif args.task_name == 'short_term_forecast':
        Exp = Exp_Short_Term_Forecast
    elif args.task_name == 'imputation':
        Exp = Exp_Imputation
    elif args.task_name == 'anomaly_detection':
        Exp = Exp_Anomaly_Detection
    elif args.task_name == 'classification':
        Exp = Exp_Classification
    else:
        Exp = Exp_Long_Term_Forecast

    if args.is_training:
        for ii in range(args.itr):
            # setting record of experiments
            exp = Exp(args)  # set experiments
            setting = '{}_sl{}'.format(
                args.model,
                args.seq_len)

            print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
            exp.train(setting)
    else:
        setting = '{}_sl{}'.format(
            args.model,
            args.seq_len)

        exp = Exp(args)  # set experiments
        print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
        exp.test(setting, test=1)
        torch.cuda.empty_cache()

True
Args in experiment:
[1mBasic Config[0m
  Task Name:          classification      Is Training:        0                   
  Model:              TimesNet            

[1mData Loader[0m
  Root Path:          ./user_data/        
  Checkpoints:        ./checkpoints/      

[1mclassification Task[0m
  Seq Len:            180                 

[1mModel Parameters[0m
  Top k:              5                   Num Kernels:        6                   
  Enc In:             42                  Dec In:             7                   
  C Out:              2                   d model:            512                 
  n heads:            5                   e layers:           2                   
  d layers:           1                   d FF:               512                 
  Moving Avg:         25                  Factor:             1                   
  Distil:             1                   Dropout:            0.1                 
  Embed:              timeF               

In [15]:
pred_label = pd.read_csv("./results/classification_TimesNet_sl180_ll128_pl64_dm512_nh5_el2_dl1_df512_expand2_dc4_fc1_ebtimeF_dtTrue_test_0/predictions_label.csv")
pred_label.columns = ["id", "label"]
pred_label.to_csv("../out_put/timesnet.csv",index=None)

In [16]:
pred_label["label"].value_counts()

label
0    501
2    366
1    288
Name: count, dtype: int64

In [17]:
pred_label.shape

(1155, 2)

In [18]:
pred_label

Unnamed: 0,id,label
0,0,2
1,1,2
2,2,1
3,3,0
4,4,1
...,...,...
1150,1150,1
1151,1151,0
1152,1152,0
1153,1153,0
