In [None]:
import warnings
warnings.filterwarnings("ignore")
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
from scipy.signal import resample
from sklearn.model_selection import StratifiedKFold
import tensorflow as tf


In [3]:
def one_hot(labels, num_classes):
    labels = np.squeeze(labels)
    if labels.ndim==0:
        arr = np.zeros(num_classes)
        arr[labels]=1
        return arr
    batch_size = labels.shape[0]
    idxs = np.arange(0, batch_size, 1)
    arr = np.zeros([batch_size, num_classes])
    arr[idxs, labels] = 1
    return arr


In [4]:
def jitter(x, snr_db):
    """
    根据信噪比添加噪声
    :param x:
    :param snr_db:
    :return:
    """
    # 随机选择信噪比
    assert isinstance(snr_db, list)
    snr_db_low = snr_db[0]
    snr_db_up = snr_db[1]
    snr_db = np.random.randint(snr_db_low, snr_db_up, (1,))[0]

    snr = 10 ** (snr_db / 10)
    Xp = np.sum(x ** 2, axis=0, keepdims=True) / x.shape[0]  # 计算信号功率
    Np = Xp / snr  # 计算噪声功率
    n = np.random.normal(size=x.shape, scale=np.sqrt(Np), loc=0.0)  # 计算噪声 loc均值，scale方差
    xn = x + n
    return xn


In [5]:
def standardization(X):
    # x1 = X.transpose(0, 1, 3, 2)
    x1 = X
    x2 = x1.reshape(-1, x1.shape[-1])
    mean = [8.03889039e-03, -6.41381949e-02, 2.37856977e-02, 8.64949391e-01,
            2.80964889e+00, 7.83041714e+00, 6.44853358e-01, 9.78580749e+00]
    std = [0.6120893, 0.53693888, 0.7116134, 3.22046385, 3.01195336, 2.61300056, 0.87194132, 0.68427254]
    mu=np.array(mean)
    sigma=np.array(std)
    x3 = ((x2 - mu) / (sigma))
    # x4 = x3.reshape(x1.shape).transpose(0, 1, 3, 2)
    x4 = x3.reshape(x1.shape)
    return x4


# 构造训练集Dataset

In [6]:
class XWDataset(object):
    def __init__(self,data_path,with_label=True,n_classes=19,**kwargs):

        self.data_path=data_path
        self.with_label=with_label #测试集无标签导入
        self.n_classes=n_classes
        #增加参数 with_nosie,
        self.with_nosie=kwargs.get("with_nosie",False)
        self.noise_SNR_db=kwargs.get("noise_SNR_db",[5,15])
        if self.with_nosie:
            print("添加随机噪声,SNR_db:{}".format(self.noise_SNR_db))
        self.load_dataset()

    @property
    def data(self):
        if self.with_label==True:
            return self.X,self.Y
        else:
            return self.X

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, index):
        '''Generate one  of data'''

        x = self.X[int(index)]
        if self.with_label == True:
            y=self.Y[int(index)]
            y=one_hot(y,self.n_classes)
            return x,y
        else:
            return x
    @property
    def dim(self):
        return tuple(self.X.shape[1:])

    def load_dataset(self):
        df = pd.read_csv(self.data_path)
        # print(df.head())
        df = df.sort_values(['fragment_id', 'time_point'])
        ###特征提取
        df['mod'] = (df.acc_x ** 2 + df.acc_y ** 2 + df.acc_z ** 2) ** .5
        df['modg'] = (df.acc_xg ** 2 + df.acc_yg ** 2 + df.acc_zg ** 2) ** .5
        ###数据读取

        num = np.unique(df["fragment_id"]).shape[0]
        X_shape = (num,1, 60, 8)
        X = np.zeros(X_shape)
        for i in tqdm(range(X_shape[0])):
            tmp = df[df.fragment_id == i][:60]
            if self.with_label:
                arr = resample(tmp.drop(['fragment_id', 'time_point', 'behavior_id'],
                                        axis=1), 60, np.array(tmp.time_point))[0]
                X[i, 0, :, :] = arr
            else:
                arr = resample(tmp.drop(['fragment_id', 'time_point',],
                                        axis=1), 60, np.array(tmp.time_point))[0]
                X[i, 0, :, :] = arr
        ###############################################
        if self.with_label:
            #标准化
            X=standardization(X)
            Y = np.array(df.groupby("fragment_id")["behavior_id"].min())
            if self.with_nosie:
                X1 = jitter(X, self.noise_SNR_db)
                X = np.concatenate([X, X1], axis=0)
                Y = np.concatenate([Y, Y], axis=0)
            self.X ,self.Y=X,Y
        else:
            # 标准化
            X = standardization(X)
            self.X=X
        self.fragment_ids = df.groupby("fragment_id")["fragment_id"].min()
        self.time_points = df.groupby("fragment_id")["time_point"]
        self.indexes = np.arange(self.X.shape[0])

    def stratifiedKFold(self,fold=5):
        kfold = StratifiedKFold(fold, shuffle=True)
        self.X_copy,self.Y_copy=self.X.copy(),self.Y.copy()
        self.train_valid_idxs=[ (train_idx,valid_idx) for train_idx,valid_idx in kfold.split(self.X_copy,self.Y_copy) ]

    def get_valid_data(self,index):
        """
        :param index:
        :return:  重新划分训练集和验证集 , 并返回验证集数据
        """
        train_idx,valid_idx= self.train_valid_idxs[index]
        X,Y= self.X_copy[train_idx],self.Y_copy[train_idx]
        self.X, self.Y=X,Y
        self.valid_X,self.valid_Y=self.X_copy[valid_idx],self.Y_copy[valid_idx]
        return self.valid_X,self.valid_Y
    

In [17]:
train_data=XWDataset(os.path.join("./data","sensor_train.csv"),with_label=True)
test_data=XWDataset(os.path.join("./data","sensor_test.csv"),with_label=False,)

  0%|          | 0/7292 [00:00<?, ?it/s]  0%|          | 15/7292 [00:00<00:49, 147.75it/s]  1%|          | 60/7292 [00:00<00:39, 185.03it/s]  1%|▏         | 94/7292 [00:00<00:33, 213.94it/s]  2%|▏         | 129/7292 [00:00<00:29, 241.68it/s]  2%|▏         | 155/7292 [00:00<00:29, 244.39it/s]  3%|▎         | 191/7292 [00:00<00:26, 268.63it/s]  3%|▎         | 225/7292 [00:00<00:24, 285.94it/s]  4%|▎         | 264/7292 [00:00<00:22, 309.58it/s]  4%|▍         | 297/7292 [00:00<00:22, 307.52it/s]  5%|▍         | 329/7292 [00:01<00:22, 310.10it/s]  5%|▍         | 364/7292 [00:01<00:21, 319.30it/s]  5%|▌         | 398/7292 [00:01<00:21, 322.84it/s]  6%|▌         | 438/7292 [00:01<00:20, 341.94it/s]  6%|▋         | 473/7292 [00:01<00:20, 331.75it/s]  7%|▋         | 509/7292 [00:01<00:20, 335.03it/s]  7%|▋         | 543/7292 [00:01<00:20, 334.51it/s]  8%|▊         | 578/7292 [00:01<00:19, 337.06it/s]  8%|▊         | 612/7292 [00:01<00:21, 307.67it/s]  9%|▉         | 647/7292

(1, 60, 8)

In [18]:
feature,label=next(iter(train_data))
print(feature.shape,label.shape)

(1, 60, 8) (19,)


In [8]:
mapping = {0: 'A_0', 1: 'A_1', 2: 'A_2', 3: 'A_3',
           4: 'D_4', 5: 'A_5', 6: 'B_1', 7: 'B_5',
           8: 'B_2', 9: 'B_3', 10: 'B_0', 11: 'A_6',
           12: 'C_1', 13: 'C_3', 14: 'C_0', 15: 'B_6',
           16: 'C_2', 17: 'C_5', 18: 'C_6'}
def get_acc_combo():
    def combo(y, y_pred):
        # 数值ID与行为编码的对应关系
        mapping = {0: 'A_0', 1: 'A_1', 2: 'A_2', 3: 'A_3',
            4: 'D_4', 5: 'A_5', 6: 'B_1',7: 'B_5',
            8: 'B_2', 9: 'B_3', 10: 'B_0', 11: 'A_6',
            12: 'C_1', 13: 'C_3', 14: 'C_0', 15: 'B_6',
            16: 'C_2', 17: 'C_5', 18: 'C_6'}
        # 将行为ID转为编码
        code_y, code_y_pred = mapping[int(y)], mapping[int(y_pred)]
        if code_y == code_y_pred: #编码完全相同得分1.0
            return 1.0
        elif code_y.split("_")[0] == code_y_pred.split("_")[0]: #编码仅字母部分相同得分1.0/7
            return 1.0/7
        elif code_y.split("_")[1] == code_y_pred.split("_")[1]: #编码仅数字部分相同得分1.0/3
            return 1.0/3
        else:
            return 0.0
    confusionMatrix=np.zeros((19,19))
    for i in range(19):
        for j in range(19):
            confusionMatrix[i,j]=combo(i,j)
    def acc_combo(y, y_pred):
        y=np.argmax(y,axis=1)
        y_pred = np.argmax(y_pred, axis=1)
        scores=confusionMatrix[y,y_pred]
        return np.mean(scores)
    return acc_combo

def get_acc_func():
    confusionMatrix=np.zeros((19,19))
    for i in range(19):
            confusionMatrix[i,i]=1
    def acc_func(y, y_pred):
        y=np.argmax(y,axis=1)
        y_pred = np.argmax(y_pred, axis=1)
        scores=confusionMatrix[y,y_pred]
        return np.mean(scores)
    return acc_func
acc_combo_func=get_acc_combo()
acc_func=get_acc_func()


# 构造评价指标

In [9]:
class XWMetrics(object):

    def __init__(self):
        pass
        self.labels=[]
        self.scores=[]

    def reset(self):
        self.labels=[]
        self.scores=[]

    def add_batch(self,labels,scores):
        self.labels.append(labels)
        self.scores.append(scores)

    def apply(self):

        labels=np.concatenate(self.labels,axis=0)
        scores=np.concatenate(self.scores,axis=0)
        acc_combo=acc_combo_func(labels,scores)
        acc=acc_func(labels,scores)
        return {"acc":acc,"acc_combo":acc_combo}

# 将pytorch构造成类似keras的训练操作

In [10]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import  pandas as pd
import  os
from tqdm import tqdm
from collections import Iterable
import numpy as np
class Agent(object):
    def __init__(self,model,device_info,save_dir):
        
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        self.save_dir=save_dir
        self.device =device_info["device"]
        # self.saver=None
        self.model=model
        self.ParallelModel = torch.nn.DataParallel(model, device_ids= device_info["device_ids"])
        self.ParallelModel.to(self.device)
        
    def summary(self):
        print(self.model)

    def compile(self,loss_dict,optimizer,metrics):
        self.loss_dict=loss_dict
        self.optimizer=optimizer
        self.metrics=metrics

    def fit_generator(self,dataloader,epochs, validation_data,reduceLR=None,earlyStopping=None,**kwargs):
        metric=self.metrics
        loss_dict= self.loss_dict
        valid_acc=[]
        for epoch  in range(epochs):
            print("epoch:{}-lr:{:.8f}".format(epoch,self.optimizer.state_dict()['param_groups'][0]['lr'])+"-"*5)
            #train
            phase="train"
            self.model.train()
            metric.reset()
            result_epoch=self.iter_on_a_epoch(phase,dataloader,loss_dict,metric)

            # log
            s = 'phase:{}-'.format(phase)
            for key, val in result_epoch.items():
                if not isinstance(val, Iterable):
                    s += ",{}:{:.4f}".format(key, val)
            print(s)

            #valid
            phase = "valid"
            metric.reset()
            self.model.eval()
            valid_dataloader=DataLoader(validation_data,batch_size=1024,drop_last=False)
            result_epoch = self.iter_on_a_epoch( phase, valid_dataloader, loss_dict, metric)
            valid_acc.append(result_epoch["acc_metrics"])
            # log
            s = 'phase:{}---'.format( phase)
            for key, val in result_epoch.items():
                if not isinstance(val, Iterable):
                    s += ",{}:{:.4f}".format(key, val)
            print(s)

            #保存模型
            # 保存验证集准确率>0.7的当前最高准确率权重
            if (valid_acc[-1] > 0.7 and valid_acc[-1] == max(valid_acc)) or (epoch==epochs-1):
                save_name="epo_{}-score_{:.5f}.pth".format(epoch, valid_acc[-1])
                self.save_model(save_name)
            # recude lr
            if reduceLR is not  None:
                epoch_loss = sum([val for key, val in result_epoch.items() if "loss" in key])
                reduceLR.step(valid_acc[-1], epoch)
            # earlyStopping
            if earlyStopping is not None:
                earlyStopping.step()

    def iter_on_a_epoch(self, phase, dataloader,loss_dict, metric, **kwargs):
        assert  phase in ["train","valid","test"]
        result_epoch = {"count": 0,}
        metric.reset()
        # for cnt_batch, batch in zip(tqdm(range(1, len(dataloader) + 1)), dataloader):
        for cnt_batch, batch in zip(range(1, len(dataloader) + 1), dataloader):
            result_batch = self.iter_on_a_batch(batch, loss_dict=loss_dict, phase=phase)
            #返回结果
            score_batch,label_batch,img_batch=result_batch["score_batch"],result_batch["label_batch"],result_batch["img_batch"]

            metric.add_batch(label_batch.astype(np.float),score_batch.astype(np.float))
            # print(np.array(metric.labels).shape)
            # 返回损失
            result_epoch["count"] += label_batch.shape[0]
            for key, val in result_batch["loss"].items():
                key = key + "_loss"
                if key not in result_epoch.keys(): result_epoch[key] = []
                result_epoch[key].append(val)
            # ###### 打印loss
            # if phase == "train":
            #     cul_lr = self.optimizer_ft.state_dict()['param_groups'][0]['lr']
            #     s = "epoch:{},batch:{},lr:{:.5f}".format(epoch, cnt_batch, float(cul_lr))
            #     for key, loss in result["loss"].items():
            #         s += ",{}:{:.4f}".format(key, float(loss))
            #     # self.logger.info(s)
            #     print(s)

        # 将所有loss平均
        for key, val in result_epoch.items():
            if "loss" in key:
                result_epoch[key] = np.array(val).sum() / len(val)

        metric_dict=metric.apply()
        for key,val in metric_dict.items():
            key=key+"_metrics"
            result_epoch[key]=val
        return result_epoch

    def iter_on_a_batch(self, batch,  phase,loss_dict):
        assert phase in ["train", "valid", "test",],print(phase)
        # self.model.setMode("segment")
        img_tensor, label_tensor = batch
        model=self.ParallelModel
        optimizer=self.optimizer
        device=self.device
        # forward
        img_rensor = self.type_tran(img_tensor)

        label_tensor =self.type_tran(label_tensor)
        score_tensor = model(img_rensor)
        # update_mask_batch=mask_tensor.detach().cpu().numpy()
        ###### cul loss
        losses = dict()
        if phase in ["train", "valid", "test"]:
            for name,loss in loss_dict.items():
                loss_val = loss(score_tensor, label_tensor)

                losses[name] = loss_val
        ##### backward
        if phase in ["train"]:
            assert isinstance(losses, dict)
            model.zero_grad()
            loss_sum = sum(list(losses.values()))
            loss_sum.backward()
            optimizer.step()
        #### return

        score_tensor=score_tensor.softmax(dim=-1)
        img_batch = img_rensor.detach().cpu().numpy()
        label_batch = label_tensor.detach().cpu().numpy()
        score_batch = score_tensor.detach().cpu().numpy()
        result = {"img_batch": img_batch,"label_batch": label_batch, "score_batch": score_batch}
        if phase in ["train", "valid", "test"]:
            sum_loss = 0
            for key, loss in losses.items():
                losses[key] = float(loss)
                sum_loss += float(loss)
            # losses["sum"] = sum_loss
        result["loss"] = losses
        return result

    def load_weights(self,load_name):
        save_dir = self.save_dir + "/model/"
        load_path=os.path.join(save_dir,load_name)
        if os.path.exists(load_path):
            pthfile = torch.load(load_path)
            # print(pthfile.keys())
            self.model.load_state_dict(pthfile, strict=True)
            print("load weights from {}".format(load_path))
        else:
            raise  Exception("Load model falied, {} is not existing!!!".format(load_path))

    def save_model(self,save_name):
        save_dir=self.save_dir+"/model/"
        if not  os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_path=os.path.join(save_dir,save_name)
        print("save weights to {}".format(save_path))
        torch.save(self.model.state_dict(),save_path)


    def load_best_model(self):

        load_names=[  name for name in os.listdir(self.save_dir+"/model/") if name.endswith(".pth")]
        load_name = sorted(load_names, key=lambda x: float(x.split(".")[-2]),
                           reverse=True)[0]
        self.load_weights(load_name)

    def predict(self,data,phase,batch_size=1024):
        # valid
        dataloader = DataLoader(data, batch_size=batch_size, drop_last=False,shuffle=False)
        score_batchs=[]
        result_epoch = {"count": 0,}
        for cnt_batch, batch in zip(tqdm(range(1, len(dataloader) + 1)), dataloader):
            result_batch = self.infer_on_a_batch(batch)
            #返回结果
            score_batch,img_batch=result_batch["score_batch"],result_batch["img_batch"]
            score_batchs.append(score_batch)
            # 返回损失
            result_epoch["count"] += score_batch.shape[0]
        dim=score_batchs[0].shape[-1]
        score_array=np.concatenate(score_batchs,axis=0)

        df = pd.DataFrame(score_array)
        df.to_csv(self.save_dir + "/{}_score.csv".format(phase))

        return score_array

    def infer_on_a_batch(self, batch):
        img_tensor = batch
        # forward
        img_rensor = self.type_tran(img_tensor)
        score_tensor = self.ParallelModel(img_rensor)
        score_tensor=score_tensor.softmax(dim=-1)
        #### return
        img_batch = img_rensor.detach().cpu().numpy()
        score_batch = score_tensor.detach().cpu().numpy()
        result = {"img_batch": img_batch, "score_batch": score_batch}
        return result

    def type_tran(self,data):
        return  data.to(torch.float32).to(self.device)

# 构造CNN模型

In [11]:
class Model(nn.Module):
    def __init__(self, num_classes=19):
        super(Model, self).__init__()

        # input: 1, num, features_num
        base_channel=64
        self.features = nn.Sequential(
            # 1
            nn.Conv2d(1, base_channel, kernel_size=(3, 3),stride=(1,1),padding=(1,1)),
            nn.BatchNorm2d(base_channel),
            nn.ReLU(inplace=True),
            # 2
            nn.Conv2d(base_channel, base_channel*2,kernel_size=(3, 3), stride=(1,1),padding=(1,1)),
            nn.BatchNorm2d(base_channel*2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2,2),
            # 3
            nn.Conv2d(base_channel*2, base_channel*4, kernel_size=(3, 3), stride=(1,1),padding=(1,1)),
            nn.BatchNorm2d(base_channel*4),
            nn.ReLU(inplace=True),
            nn.Conv2d(base_channel * 4, base_channel * 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.BatchNorm2d(base_channel * 4),
            nn.ReLU(inplace=True),
            # last
            nn.AdaptiveMaxPool2d((1, 1)),
            nn.Dropout(0.3),
        )
        self.classier = nn.Linear(base_channel*4, num_classes)
        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)

        x = x.view(x.shape[0], -1)
        x = self.classier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv2d):
                # nn.init.constant_(m.weight, 0)
                # nn.init.normal_(m.weight, 0, 0.01)
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
                    

In [15]:
class CELoss(nn.Module):

    def __init__(self, reduction='mean'):
        super().__init__()

        self.log_softmax = nn.LogSoftmax(dim=1)
        self.nllloss=  nn.NLLLoss(reduction=reduction)
    def forward(self, x, target):

        if x.size(0) != target.size(0):
            raise ValueError('Expected input batchsize ({}) to match target batch_size({})'
                             .format(x.size(0), target.size(0)))

        if x.dim() < 2:
            raise ValueError('Expected input tensor to have least 2 dimensions(got {})'
                             .format(x.size(0)))

        if x.dim() != 2:
            raise ValueError('Only 2 dimension tensor are implemented, (got {})'
                             .format(x.size()))

        x = self.log_softmax(x)
        target=torch.argmax(target,dim=-1)
        loss=self.nllloss(x,target=target)
        return loss
    

In [16]:
import random
random.seed(1)
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
import os
import time
from torch.optim import SGD, lr_scheduler, Adam

timer = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
SAVE_DIR="./save_{}/".format(timer)
if not os.path.exists(SAVE_DIR):
    os.makedirs(SAVE_DIR)

data_dir="./data"
sub=pd.read_csv("./data/submit.csv")
EPOCH=150
BATCH_SIZE=512

DEVICE_INFO=dict(
    gpu_num=torch.cuda.device_count(),
    device_ids = range(0, torch.cuda.device_count(), 1),
    #device = "cpu",
    device = torch.device("cuda:0") if torch.cuda.is_available() else "cpu",
    index_cuda=0, )

NUM_CLASSES = 19

ACTIVATION="relu"

METRICS=XWMetrics()
train_data=XWDataset(os.path.join(data_dir,"sensor_train.csv"),with_label=True)
test_data=XWDataset(os.path.join(data_dir,"sensor_test.csv"),with_label=False,)
proba_t = np.zeros((len(test_data), NUM_CLASSES))
folds=5
train_data.stratifiedKFold(folds)
for fold in range(folds):
	#定义模型
    #划分训练集和验证集 并返回验证集数据
    model=Model(num_classes=NUM_CLASSES)
    save_dir=os.path.join(SAVE_DIR,"flod_{}".format(fold))
    agent=Agent(model=model,device_info=DEVICE_INFO, save_dir=save_dir)
    earlyStopping = None
	
	#定义损失
    LOSS={ "celoss":CELoss() }
	
	#定义优化器
    OPTIM=Adam(model.parameters(), lr=0.001, weight_decay=0.001)
	
	#定义损失
    reduceLR = lr_scheduler.ReduceLROnPlateau(OPTIM, mode="max", factor=0.5, patience=8, verbose=True)
	
    agent.compile(loss_dict=LOSS,optimizer=OPTIM, metrics=METRICS)
    agent.summary()
	
	#生成验证集
    valid_X,valid_Y=train_data.get_valid_data(fold)
    valid_Y=one_hot(valid_Y,NUM_CLASSES)
    valid_data = [(valid_X[i],valid_Y[i]) for i in range(valid_X.shape[0])]

    train_generator=DataLoader(train_data,batch_size=BATCH_SIZE,shuffle=True,num_workers=0)
	#生成验证集
    agent.fit_generator(train_generator, epochs=EPOCH,
                                  validation_data=valid_data,
                            reduceLR=reduceLR,
                            earlyStopping=earlyStopping)

    agent.load_best_model()
    test_X=[test_data.data[i] for i in range(test_data.data.shape[0])]
    scores_test= agent.predict(test_X,batch_size=1024,phase="test")
    proba_t+=scores_test/5.
sub.behavior_id = np.argmax(proba_t, axis=1)
sub.to_csv(SAVE_DIR+'submit.csv', index=False)

  0%|          | 0/7292 [00:00<?, ?it/s]  0%|          | 22/7292 [00:00<00:33, 217.81it/s]  1%|          | 61/7292 [00:00<00:28, 250.59it/s]  1%|          | 88/7292 [00:00<00:28, 255.38it/s]  2%|▏         | 128/7292 [00:00<00:25, 286.42it/s]  2%|▏         | 163/7292 [00:00<00:23, 301.36it/s]  3%|▎         | 204/7292 [00:00<00:21, 326.60it/s]  3%|▎         | 246/7292 [00:00<00:20, 349.07it/s]  4%|▍         | 288/7292 [00:00<00:19, 367.69it/s]  5%|▍         | 335/7292 [00:00<00:17, 391.43it/s]  5%|▌         | 375/7292 [00:01<00:17, 388.22it/s]  6%|▌         | 415/7292 [00:01<00:20, 328.08it/s]  6%|▋         | 459/7292 [00:01<00:19, 355.17it/s]  7%|▋         | 497/7292 [00:01<00:18, 360.21it/s]  7%|▋         | 539/7292 [00:01<00:17, 375.17it/s]  8%|▊         | 581/7292 [00:01<00:17, 387.37it/s]  9%|▊         | 622/7292 [00:01<00:16, 392.78it/s]  9%|▉         | 662/7292 [00:01<00:23, 280.62it/s] 10%|▉         | 695/7292 [00:02<00:22, 288.41it/s] 10%|▉         | 728/7292

Model(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): AdaptiveMaxPool2d(output_size=(1, 1))
    (14): Dropout(p=0.3, inplace=False)
  )
  (classier):

RuntimeError: module must have its parameters and buffers on device cuda:0 (device_ids[0]) but found one of them on device: cpu