In [1]:
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
import zipfile

In [2]:
zipf = zipfile.ZipFile("../Dataset/ml2022spring-hw1.zip")
namelist = zipf.namelist()
print(namelist)

['covid.test.csv', 'covid.train.csv']


In [3]:
zipf.extractall('../Dataset/hw1')

In [4]:
train_data_path = "../Dataset/hw1/covid.train.csv"
test_data_path = "../Dataset/hw1/covid.test.csv"

In [5]:
train_csv = pd.read_csv(train_data_path)
test_csv = pd.read_csv(test_data_path)

In [6]:
train_csv.head()

Unnamed: 0,id,AL,AK,AZ,AR,CA,CO,CT,FL,GA,...,work_outside_home.4,shop.4,restaurant.4,spent_time.4,large_event.4,public_transit.4,anxious.4,depressed.4,worried_finances.4,tested_positive.4
0,0,0,0,0,0,0,0,0,1,0,...,31.113209,67.394551,36.674291,40.743132,17.842221,4.093712,10.440071,8.627117,37.329512,7.456154
1,1,0,0,0,0,0,1,0,0,0,...,33.920257,64.39838,34.612238,44.035688,17.808103,4.924935,10.172662,9.954333,32.508881,8.010957
2,2,0,0,0,0,0,0,0,0,0,...,31.604604,62.101064,26.521875,36.746453,13.903667,7.313833,10.388712,7.956139,36.745588,2.906977
3,3,0,0,0,0,0,0,0,0,0,...,35.115738,67.93552,38.022492,48.434809,27.134876,3.101904,10.498683,8.231522,38.680162,12.575816
4,4,0,0,0,0,0,0,0,0,0,...,35.129714,69.934592,38.242368,49.095933,22.683709,4.59462,9.878927,9.46929,28.344123,21.428589


共118列：
+ 0：id
+ 1~37: State
+ 38~41: COVID-like illness  (5天)
+ 42~49: Behavior Indicators  (5天)
+ 50~52: Medical Health Indicators  (5天)
+ 53: Tested Positive Cases  (5天)

$$1 + 37 + 3\times 5 + 8\times 5 + 3\times 5 + 1\times 5 = 118$$

## Dataset
+ 统计信息
+ 缺失值
+ 特征相关性
+ 特征规约

In [7]:
# 去掉'id'列
coulmns = train_csv.columns
train_dataset = train_csv[coulmns[1:]]

coulmns = test_csv.columns
test_dataset = test_csv[coulmns[1:]]

In [8]:
# 查看每一列Nan值的个数
nacount = train_dataset.isna().sum()
print(f"Nan元素总数：{nacount.sum()}")

print(nacount)

Nan元素总数：0
AL                    0
AK                    0
AZ                    0
AR                    0
CA                    0
                     ..
public_transit.4      0
anxious.4             0
depressed.4           0
worried_finances.4    0
tested_positive.4     0
Length: 117, dtype: int64


In [9]:
import csv

def save_pred(preds, save_path):
    with open(save_path, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(['id', 'tested_positive'])
        for i, p in enumerate(preds):
            writer.writerow([i, p])

### Baseline

**All original features, linear regression**

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

def cross_val(model, x, y):
    scores = cross_val_score(model, x, y, scoring='neg_mean_squared_error', cv=10)
    print(np.mean(np.sqrt(-scores)))

In [11]:
lin_reg = LinearRegression()
# 使用全部原始特征
cross_val(lin_reg, train_dataset.iloc[:, :-1], train_dataset.iloc[:, -1])

1.0519071309202654


In [12]:
# save test result

lin_reg = LinearRegression()
lin_reg.fit(train_dataset.iloc[:, :-1], train_dataset.iloc[:, -1])
preds = lin_reg.predict(test_dataset)

save_pred(preds, './plain_line_reg.txt')

**All original features, decision tree**

In [13]:
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor()

# 使用全部原始特征
cross_val(tree_reg, train_dataset.iloc[:, :-1], train_dataset.iloc[:, -1])

1.630425929431865


In [14]:
# save test results

tree_reg = DecisionTreeRegressor()
tree_reg.fit(train_dataset.iloc[:, :-1], train_dataset.iloc[:, -1])
preds = tree_reg.predict(test_dataset)

save_pred(preds, './plain_tree_reg.txt')

#### Feature Selection

In [15]:
# 特征与特征之间的相关性矩阵
corr_matrix = train_dataset.corr()

# 查看与test_positive.4与其他特征之间的相关性
positive4_coor = corr_matrix['tested_positive.4'].sort_values(ascending=False)
mask = positive4_coor > 0.5
print(f'相关性大于0.5的特征数：{sum(mask)}')
# print(positive4_coor[mask])

相关性大于0.5的特征数：35


**Selected Original features, linear regression**

In [16]:
lin_reg = LinearRegression()
# 使用相关性较大的若干原始特征
selcted_columns = list(positive4_coor.index[mask])
cross_val(lin_reg, train_dataset[selcted_columns[1:]], train_dataset[selcted_columns[0]])

1.049216997777881


In [17]:
# save test result

lin_reg = LinearRegression()
lin_reg.fit(train_dataset[selcted_columns[1:]], train_dataset[selcted_columns[0]])
preds = lin_reg.predict(test_dataset[selcted_columns[1:]])

save_pred(preds, './plain_line_reg_with_feature_selction.txt')

**Selected Original features, Decision Tree**

In [18]:
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor()
# 使用相关性较大的若干原始特征
selcted_columns = list(positive4_coor.index[mask])
cross_val(tree_reg, train_dataset[selcted_columns[1:]], train_dataset[selcted_columns[0]])

1.5786687146977054


In [19]:
# save test result

tree_reg = LinearRegression()
tree_reg.fit(train_dataset[selcted_columns[1:]], train_dataset[selcted_columns[0]])
preds = tree_reg.predict(test_dataset[selcted_columns[1:]])

save_pred(preds, './plain_tree_reg_with_feature_selction.txt')

**Feature Scaling**

In [20]:
from sklearn.base import BaseEstimator, TransformerMixin

In [21]:
class NormalScaler(BaseEstimator, TransformerMixin):
    def __init__(self, skip=None):
        self.skip = skip

    def fit(self, x, y=None):
        return self
    
    def transform(self, x):
        for col in x.columns:
            if self.skip not in col and x[col].max() > 1:
                mean = x[col].mean()
                std = x[col].std()
                x[col] = x[col].map(lambda i: (i - mean) / std)
                
        return x


class MaxminScaler(BaseEstimator, TransformerMixin):
    def __init__(self, skip=None):
        self.skip = skip

    def fit(self, x, y=None):
        return self
    
    def transform(self, x):
        for col in x.columns:
            if self.skip not in col and x[col].max() > 1:
                max_v = x[col].max()
                min_v = x[col].min()
                x[col] = x[col].map(lambda i: (i - min_v) / (max_v - min_v))
                
        return x

In [22]:
from sklearn.pipeline import Pipeline

normal_scaled_pipeline = Pipeline([('std_scaler', NormalScaler('tested_positive'))])
train_dataset_normal_scaled = normal_scaled_pipeline.transform(train_dataset.copy())
test_dataset_normal_scaled = normal_scaled_pipeline.transform(test_dataset.copy())

maxmin_scaled_pipeline = Pipeline([('maxmin_scaler', MaxminScaler('tested_positive'))])
train_dataset_maxmin_scaled = maxmin_scaled_pipeline.transform(train_dataset.copy())
test_dataset_maxmin_scaled = maxmin_scaled_pipeline.transform(test_dataset.copy())

In [23]:
train_dataset_normal_scaled.head()

Unnamed: 0,AL,AK,AZ,AR,CA,CO,CT,FL,GA,ID,...,work_outside_home.4,shop.4,restaurant.4,spent_time.4,large_event.4,public_transit.4,anxious.4,depressed.4,worried_finances.4,tested_positive.4
0,0,0,0,0,0,0,0,1,0,0,...,-0.497704,0.375942,0.45434,-1.121859,-0.626113,-0.281985,-0.671085,-0.854542,0.691637,7.456154
1,0,0,0,0,0,1,0,0,0,0,...,0.235301,-0.845236,-0.052472,-0.221926,-0.635917,0.108189,-0.78887,-0.024633,-0.642577,8.010957
2,0,0,0,0,0,0,0,0,0,0,...,-0.369386,-1.781575,-2.040928,-2.214246,-1.75786,1.229533,-0.693707,-1.274106,0.530023,2.906977
3,0,0,0,0,0,0,0,0,0,0,...,0.547478,0.596431,0.785702,0.980457,2.04414,-0.747538,-0.645268,-1.101909,1.065458,12.575816
4,0,0,0,0,0,0,0,0,0,1,...,0.551127,1.411212,0.839743,1.161158,0.765093,-0.04686,-0.91825,-0.327931,-1.795264,21.428589


In [24]:
train_dataset_maxmin_scaled.head()

Unnamed: 0,AL,AK,AZ,AR,CA,CO,CT,FL,GA,ID,...,work_outside_home.4,shop.4,restaurant.4,spent_time.4,large_event.4,public_transit.4,anxious.4,depressed.4,worried_finances.4,tested_positive.4
0,0,0,0,0,0,0,0,1,0,0,...,0.327454,0.62273,0.65406,0.297424,0.373811,0.184499,0.252049,0.257307,0.608567,7.456154
1,0,0,0,0,0,1,0,0,0,0,...,0.467725,0.405868,0.560692,0.475805,0.372223,0.242983,0.230257,0.397139,0.37825,8.010957
2,0,0,0,0,0,0,0,0,0,0,...,0.35201,0.239588,0.194364,0.080897,0.190477,0.411063,0.247863,0.186614,0.580669,2.906977
3,0,0,0,0,0,0,0,0,0,0,...,0.527464,0.661885,0.715106,0.714135,0.806369,0.114716,0.256825,0.215628,0.673097,12.575816
4,0,0,0,0,0,0,0,0,0,1,...,0.528163,0.806577,0.725062,0.749953,0.599174,0.219742,0.206319,0.346036,0.179269,21.428589


In [25]:
lin_reg = LinearRegression()
cross_val(lin_reg, train_dataset_normal_scaled.iloc[:, :-1], train_dataset_normal_scaled.iloc[:, -1])

lin_reg = LinearRegression()
cross_val(lin_reg, train_dataset_maxmin_scaled.iloc[:, :-1], train_dataset_maxmin_scaled.iloc[:, -1])

1.0519556275693465
1.051955627569344


In [26]:
# save test result

lin_reg = LinearRegression()
lin_reg.fit(train_dataset_normal_scaled.iloc[:, :-1], train_dataset_normal_scaled.iloc[:, -1])
preds = lin_reg.predict(test_dataset_normal_scaled)

save_pred(preds, './line_reg_with_normal_scaled_features.txt')

In [27]:
# save test result

lin_reg = LinearRegression()
lin_reg.fit(train_dataset_maxmin_scaled.iloc[:, :-1], train_dataset_maxmin_scaled.iloc[:, -1])
preds = lin_reg.predict(test_dataset_maxmin_scaled)

save_pred(preds, './tree_reg_with_normal_scaled_features.txt')

In [28]:
# 特征与特征之间的相关性矩阵
corr_matrix = train_dataset_normal_scaled.corr()

# 查看与test_positive.4与其他特征之间的相关性
positive4_coor = corr_matrix['tested_positive.4'].sort_values(ascending=False)

scores = []
thrs = []
for thr in np.linspace(0.1, 0.9, num=100):
    mask = positive4_coor > thr
    # print(f'相关性大于{thr}的特征数：{sum(mask)}')
    thrs.append(thr)
    lin_reg = LinearRegression()
    # 使用相关性较大的若干原始特征
    selcted_columns = list(positive4_coor.index[mask])
    x, y = train_dataset_normal_scaled[selcted_columns[1:]], train_dataset_normal_scaled[selcted_columns[0]]
    score_list = cross_val_score(lin_reg, x, y, scoring='neg_mean_squared_error', cv=10)
    scores.append(np.mean(np.sqrt(-score_list)))

print(f"minimal score: {min(scores)}, index {np.argmin(scores)}, thr: {thrs[np.argmin(scores)]}")

minimal score: 1.0468164063875016, index 65, thr: 0.6252525252525253


In [29]:
# save test result

# 特征与特征之间的相关性矩阵
corr_matrix = train_dataset_normal_scaled.corr()
# 查看与test_positive.4与其他特征之间的相关性
positive4_coor = corr_matrix['tested_positive.4'].sort_values(ascending=False)

mask = positive4_coor > thrs[np.argmin(scores)]
# 使用相关性较大的若干原始特征
selcted_columns = list(positive4_coor.index[mask])
x = train_dataset_normal_scaled[selcted_columns[1:]]
y = train_dataset_normal_scaled[selcted_columns[0]]

lin_reg = LinearRegression()
lin_reg.fit(x, y)
preds = lin_reg.predict(test_dataset_normal_scaled[selcted_columns[1:]])

save_pred(preds, './line_reg_with_selected_normal_scaled_feature.txt')

### Deep Learning

In [30]:
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
import pandas as pd
import sklearn
import torch
from tqdm import tqdm
from pathlib import Path
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [31]:
def same_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

### Dataloader

In [32]:
def train_valid_split(dataset, valid_ratio, seed):
    valid_set_size = int(valid_ratio * len(dataset))
    train_set_size = len(dataset) - valid_set_size
    train_set, valid_set = random_split(dataset, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed))
    return np.array(train_set), np.array(valid_set)


class COVID19Dataset(Dataset):

    def __init__(self, x, y=None):
        
        
        if y is not None:
            shuffle_idx = np.random.permutation(len(x))
            self.y = torch.from_numpy(y[shuffle_idx])
            self.x = torch.from_numpy(x[shuffle_idx])
        else:
            self.y = y
            self.x = torch.from_numpy(x)
    
    def __len__(self):
        return len(self.x)

    def __getitem__(self, item):
        if self.y is None:
            return self.x[item]
        else:
            return self.x[item], self.y[item]

## Model

In [33]:
class Model(torch.nn.Module):

    def __init__(self, input_channel):
        super(Model, self).__init__()
        self.linear1 = torch.nn.Linear(input_channel, 32)
        self.bn = torch.nn.BatchNorm1d(32)
        self.dropout = torch.nn.Dropout(p=0.2)
        self.act = torch.nn.LeakyReLU()
        self.linear2 = torch.nn.Linear(32, 1)
        

    def forward(self, x):
        x = self.linear1(x)
        x = self.bn(x)
        x = self.dropout(x)
        x = self.act(x)
        x = self.linear2(x)
        return x

#### Training

**select features with correlation matrix**

In [34]:
from sklearn.model_selection import train_test_split

same_seed(77)
dataset = pd.read_csv(train_data_path)
dataset = dataset[dataset.columns[1:]]  # remove 'id' column
corr_matrix = dataset.corr()
target_coor = corr_matrix['tested_positive.4'].sort_values(ascending=False)
mask = target_coor > 0.5
print(f"selected features num: {np.sum(mask)}")
selected_feature_idx = list(target_coor.index[mask])

x_dataset = dataset[selected_feature_idx[1:]]
y_dataset = dataset.iloc[:, -1]
x_train, x_val, y_train, y_val = train_test_split(x_dataset, y_dataset, test_size=0.2, random_state=77)
print(x_train.shape, x_val.shape, y_train.shape, y_val.shape)


selected features num: 35
(2159, 34) (540, 34) (2159,) (540,)


**select features with sklearn**

In [35]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression

dataset = pd.read_csv(train_data_path)
dataset = dataset[dataset.columns[1:]]  # remove 'id' column
x = (dataset - dataset.min()) / (dataset.max() - dataset.min())  # normalization for feature selection
bestfeatures = SelectKBest(score_func=f_regression, k=5)
fit = bestfeatures.fit(x,y)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(x.columns)
#concat two dataframes for better visualization 
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(15,'Score'))  #print 15 best features
bestSelectedFeature = featureScores.nlargest(15,'Score')

                 Specs         Score
100  tested_positive.3  90072.434014
84   tested_positive.2  42336.373701
68   tested_positive.1  26889.703770
52     tested_positive  18870.558114
103     hh_cmnty_cli.4  11290.799197
87      hh_cmnty_cli.3  10849.626387
104   nohh_cmnty_cli.4  10420.334481
71      hh_cmnty_cli.2  10365.261059
88    nohh_cmnty_cli.3  10055.850241
55      hh_cmnty_cli.1   9859.626910
72    nohh_cmnty_cli.2   9636.425489
39        hh_cmnty_cli   9330.742363
56    nohh_cmnty_cli.1   9180.163057
40      nohh_cmnty_cli   8703.901285
102              ili.4   7857.108153


In [36]:
x_dataset = dataset[bestSelectedFeature['Specs']]
y_dataset = dataset.iloc[:, -1]

x_train, x_val, y_train, y_val = train_test_split(x_dataset, y_dataset, test_size=0.2, random_state=77)
print(x_train.shape, x_val.shape, y_train.shape, y_val.shape)

(2159, 15) (540, 15) (2159,) (540,)


## hyper-parameters

In [37]:
batch_size = 32
tot_epoch = 500
patient = 50
lr = 1e-5
weight_decay = 1e-4


In [38]:
# dataloader, model, loss function, optimizer

train_loader = DataLoader(COVID19Dataset(x_train.values, y_train.values), 
                          batch_size=batch_size, 
                          shuffle=True, 
                          num_workers=0, 
                          drop_last=False)

val_loader = DataLoader(COVID19Dataset(x_val.values, y_val.values), 
                        batch_size=8, 
                        shuffle=False)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Model(input_channel=x_dataset.shape[1]).to(device=device)
loss_fcn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay, nesterov=True)
# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

In [39]:
# training

early_stop_cnt = 0
best_val_loss = float('inf')
for epoch in range(tot_epoch):
    with tqdm(train_loader, total=len(train_loader), ncols=100) as tbar:
        tbar.set_description(f"{epoch+1}/{tot_epoch}")
        for i, (x, y) in enumerate(train_loader):
            model.train()
            x = x.float().to(device)
            preds = model(x).squeeze(dim=1)                
            loss = loss_fcn(y.float().to(device), preds)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i + 1) % len(train_loader) == 0:
                tot_mse = []
                for j, (x, y) in enumerate(val_loader):
                    model.eval()
                    preds = model(x.float().to(device)).squeeze(dim=1)
                    tot_mse.append(loss_fcn(y.float().to(device), preds).detach().cpu().numpy())
                cur_lr = optimizer.state_dict()['param_groups'][0]['lr']
                val_loss = np.mean(tot_mse)
                tbar.set_postfix_str(f'train loss {loss.item():.3f} ; val loss {val_loss:.3f}; lr {cur_lr:.2e}')
                if early_stop_cnt <= patient:
                    if best_val_loss > val_loss:
                        early_stop_cnt = 0
                        best_val_loss = val_loss
                        torch.save(model.state_dict(), './best_val_model.pth')
                        print(f'saved model ValAcc: {val_loss:.3f}')
                    else:
                        early_stop_cnt += 1
                else:
                    pass
            tbar.update(1)

1/500: 100%|████| 68/68 [00:00<00:00, 347.09it/s, train loss 108.993 ; val loss 92.497; lr 1.00e-05]


saved model ValAcc: 92.497


2/500: 100%|█████| 68/68 [00:00<00:00, 448.43it/s, train loss 57.204 ; val loss 70.328; lr 1.00e-05]


saved model ValAcc: 70.328


3/500: 100%|█████| 68/68 [00:00<00:00, 424.51it/s, train loss 91.030 ; val loss 56.526; lr 1.00e-05]


saved model ValAcc: 56.526


4/500: 100%|█████| 68/68 [00:00<00:00, 333.42it/s, train loss 50.281 ; val loss 46.446; lr 1.00e-05]


saved model ValAcc: 46.446


5/500: 100%|█████| 68/68 [00:00<00:00, 480.70it/s, train loss 34.584 ; val loss 37.753; lr 1.00e-05]


saved model ValAcc: 37.753


6/500: 100%|█████| 68/68 [00:00<00:00, 464.70it/s, train loss 14.027 ; val loss 33.536; lr 1.00e-05]


saved model ValAcc: 33.536


7/500: 100%|█████| 68/68 [00:00<00:00, 482.53it/s, train loss 25.223 ; val loss 25.828; lr 1.00e-05]


saved model ValAcc: 25.828


8/500: 100%|█████| 68/68 [00:00<00:00, 461.53it/s, train loss 28.729 ; val loss 20.225; lr 1.00e-05]


saved model ValAcc: 20.225


9/500: 100%|█████| 68/68 [00:00<00:00, 485.01it/s, train loss 10.840 ; val loss 14.235; lr 1.00e-05]


saved model ValAcc: 14.235


10/500: 100%|████| 68/68 [00:00<00:00, 493.32it/s, train loss 27.393 ; val loss 14.858; lr 1.00e-05]
11/500: 100%|████| 68/68 [00:00<00:00, 466.97it/s, train loss 63.225 ; val loss 11.342; lr 1.00e-05]


saved model ValAcc: 11.342


12/500: 100%|█████| 68/68 [00:00<00:00, 471.50it/s, train loss 31.134 ; val loss 9.209; lr 1.00e-05]


saved model ValAcc: 9.209


13/500: 100%|█████| 68/68 [00:00<00:00, 478.14it/s, train loss 6.052 ; val loss 12.102; lr 1.00e-05]
14/500: 100%|█████| 68/68 [00:00<00:00, 507.96it/s, train loss 7.559 ; val loss 11.478; lr 1.00e-05]
15/500: 100%|██████| 68/68 [00:00<00:00, 499.45it/s, train loss 6.407 ; val loss 7.172; lr 1.00e-05]


saved model ValAcc: 7.172


16/500: 100%|█████| 68/68 [00:00<00:00, 500.27it/s, train loss 10.473 ; val loss 7.504; lr 1.00e-05]
17/500: 100%|█████| 68/68 [00:00<00:00, 535.44it/s, train loss 6.436 ; val loss 10.386; lr 1.00e-05]
18/500: 100%|█████| 68/68 [00:00<00:00, 516.52it/s, train loss 11.583 ; val loss 6.907; lr 1.00e-05]


saved model ValAcc: 6.907


19/500: 100%|█████| 68/68 [00:00<00:00, 498.93it/s, train loss 16.100 ; val loss 5.897; lr 1.00e-05]


saved model ValAcc: 5.897


20/500: 100%|██████| 68/68 [00:00<00:00, 520.81it/s, train loss 6.437 ; val loss 7.019; lr 1.00e-05]
21/500: 100%|█████| 68/68 [00:00<00:00, 521.09it/s, train loss 13.062 ; val loss 5.837; lr 1.00e-05]


saved model ValAcc: 5.837


22/500: 100%|█████| 68/68 [00:00<00:00, 476.72it/s, train loss 31.860 ; val loss 9.726; lr 1.00e-05]
23/500: 100%|██████| 68/68 [00:00<00:00, 465.20it/s, train loss 5.611 ; val loss 7.999; lr 1.00e-05]
24/500: 100%|█████| 68/68 [00:00<00:00, 512.04it/s, train loss 12.498 ; val loss 6.893; lr 1.00e-05]
25/500: 100%|██████| 68/68 [00:00<00:00, 528.65it/s, train loss 7.562 ; val loss 5.839; lr 1.00e-05]
26/500: 100%|██████| 68/68 [00:00<00:00, 497.33it/s, train loss 9.370 ; val loss 4.884; lr 1.00e-05]


saved model ValAcc: 4.884


27/500: 100%|█████| 68/68 [00:00<00:00, 502.80it/s, train loss 11.866 ; val loss 5.747; lr 1.00e-05]
28/500: 100%|██████| 68/68 [00:00<00:00, 524.45it/s, train loss 4.361 ; val loss 4.711; lr 1.00e-05]


saved model ValAcc: 4.711


29/500: 100%|█████| 68/68 [00:00<00:00, 508.40it/s, train loss 18.002 ; val loss 4.673; lr 1.00e-05]


saved model ValAcc: 4.673


30/500: 100%|██████| 68/68 [00:00<00:00, 536.78it/s, train loss 7.642 ; val loss 4.950; lr 1.00e-05]
31/500: 100%|██████| 68/68 [00:00<00:00, 523.65it/s, train loss 6.650 ; val loss 5.352; lr 1.00e-05]
32/500: 100%|██████| 68/68 [00:00<00:00, 541.93it/s, train loss 8.448 ; val loss 4.961; lr 1.00e-05]
33/500: 100%|█████| 68/68 [00:00<00:00, 510.20it/s, train loss 10.888 ; val loss 5.309; lr 1.00e-05]
34/500: 100%|█████| 68/68 [00:00<00:00, 465.68it/s, train loss 18.610 ; val loss 4.545; lr 1.00e-05]


saved model ValAcc: 4.545


35/500: 100%|█████| 68/68 [00:00<00:00, 503.26it/s, train loss 10.804 ; val loss 5.479; lr 1.00e-05]
36/500: 100%|██████| 68/68 [00:00<00:00, 521.33it/s, train loss 8.570 ; val loss 4.037; lr 1.00e-05]


saved model ValAcc: 4.037


37/500: 100%|█████| 68/68 [00:00<00:00, 516.05it/s, train loss 19.448 ; val loss 6.981; lr 1.00e-05]
38/500: 100%|██████| 68/68 [00:00<00:00, 503.38it/s, train loss 2.640 ; val loss 4.265; lr 1.00e-05]
39/500: 100%|██████| 68/68 [00:00<00:00, 526.94it/s, train loss 9.956 ; val loss 3.969; lr 1.00e-05]


saved model ValAcc: 3.969


40/500: 100%|██████| 68/68 [00:00<00:00, 263.17it/s, train loss 6.242 ; val loss 4.069; lr 1.00e-05]
41/500: 100%|█████| 68/68 [00:00<00:00, 507.08it/s, train loss 11.792 ; val loss 6.945; lr 1.00e-05]
42/500: 100%|██████| 68/68 [00:00<00:00, 509.20it/s, train loss 7.026 ; val loss 5.241; lr 1.00e-05]
43/500: 100%|██████| 68/68 [00:00<00:00, 544.20it/s, train loss 5.052 ; val loss 4.424; lr 1.00e-05]
44/500: 100%|█████| 68/68 [00:00<00:00, 430.34it/s, train loss 19.346 ; val loss 4.144; lr 1.00e-05]
45/500: 100%|█████| 68/68 [00:00<00:00, 427.41it/s, train loss 11.073 ; val loss 5.442; lr 1.00e-05]
46/500: 100%|█████| 68/68 [00:00<00:00, 442.80it/s, train loss 14.454 ; val loss 4.950; lr 1.00e-05]
47/500: 100%|█████| 68/68 [00:00<00:00, 505.06it/s, train loss 10.665 ; val loss 4.937; lr 1.00e-05]
48/500: 100%|██████| 68/68 [00:00<00:00, 514.33it/s, train loss 5.121 ; val loss 4.455; lr 1.00e-05]
49/500: 100%|█████| 68/68 [00:00<00:00, 302.73it/s, train loss 15.115 ; val loss 4.062; lr 

saved model ValAcc: 3.799


51/500: 100%|██████| 68/68 [00:00<00:00, 541.05it/s, train loss 3.924 ; val loss 4.025; lr 1.00e-05]
52/500: 100%|█████| 68/68 [00:00<00:00, 542.25it/s, train loss 19.581 ; val loss 4.233; lr 1.00e-05]
53/500: 100%|██████| 68/68 [00:00<00:00, 551.58it/s, train loss 9.181 ; val loss 4.250; lr 1.00e-05]
54/500: 100%|█████| 68/68 [00:00<00:00, 389.76it/s, train loss 16.541 ; val loss 5.437; lr 1.00e-05]
55/500: 100%|█████| 68/68 [00:00<00:00, 485.03it/s, train loss 17.507 ; val loss 3.798; lr 1.00e-05]


saved model ValAcc: 3.798


56/500: 100%|██████| 68/68 [00:00<00:00, 415.59it/s, train loss 9.041 ; val loss 4.411; lr 1.00e-05]
57/500: 100%|██████| 68/68 [00:00<00:00, 470.22it/s, train loss 4.049 ; val loss 4.097; lr 1.00e-05]
58/500: 100%|██████| 68/68 [00:00<00:00, 490.10it/s, train loss 8.278 ; val loss 3.912; lr 1.00e-05]
59/500: 100%|██████| 68/68 [00:00<00:00, 477.49it/s, train loss 9.917 ; val loss 3.548; lr 1.00e-05]


saved model ValAcc: 3.548


60/500: 100%|██████| 68/68 [00:00<00:00, 306.05it/s, train loss 8.233 ; val loss 4.403; lr 1.00e-05]
61/500: 100%|██████| 68/68 [00:00<00:00, 515.43it/s, train loss 4.108 ; val loss 3.853; lr 1.00e-05]
62/500: 100%|██████| 68/68 [00:00<00:00, 509.24it/s, train loss 3.874 ; val loss 4.116; lr 1.00e-05]
63/500: 100%|██████| 68/68 [00:00<00:00, 523.06it/s, train loss 5.555 ; val loss 4.831; lr 1.00e-05]
64/500: 100%|██████| 68/68 [00:00<00:00, 421.18it/s, train loss 7.792 ; val loss 3.494; lr 1.00e-05]


saved model ValAcc: 3.494


65/500: 100%|██████| 68/68 [00:00<00:00, 502.08it/s, train loss 8.396 ; val loss 6.040; lr 1.00e-05]
66/500: 100%|██████| 68/68 [00:00<00:00, 498.52it/s, train loss 4.725 ; val loss 4.696; lr 1.00e-05]
67/500: 100%|██████| 68/68 [00:00<00:00, 417.34it/s, train loss 1.799 ; val loss 3.833; lr 1.00e-05]
68/500: 100%|██████| 68/68 [00:00<00:00, 417.58it/s, train loss 9.739 ; val loss 3.418; lr 1.00e-05]


saved model ValAcc: 3.418


69/500: 100%|██████| 68/68 [00:00<00:00, 472.32it/s, train loss 4.040 ; val loss 3.220; lr 1.00e-05]


saved model ValAcc: 3.220


70/500: 100%|█████| 68/68 [00:00<00:00, 487.95it/s, train loss 30.347 ; val loss 3.391; lr 1.00e-05]
71/500: 100%|██████| 68/68 [00:00<00:00, 555.68it/s, train loss 6.222 ; val loss 3.629; lr 1.00e-05]
72/500: 100%|█████| 68/68 [00:00<00:00, 539.45it/s, train loss 22.314 ; val loss 4.150; lr 1.00e-05]
73/500: 100%|█████| 68/68 [00:00<00:00, 529.22it/s, train loss 13.199 ; val loss 4.057; lr 1.00e-05]
74/500: 100%|█████| 68/68 [00:00<00:00, 498.57it/s, train loss 35.616 ; val loss 3.333; lr 1.00e-05]
75/500: 100%|█████| 68/68 [00:00<00:00, 510.86it/s, train loss 10.472 ; val loss 3.166; lr 1.00e-05]


saved model ValAcc: 3.166


76/500: 100%|██████| 68/68 [00:00<00:00, 502.87it/s, train loss 7.538 ; val loss 3.328; lr 1.00e-05]
77/500: 100%|█████| 68/68 [00:00<00:00, 507.78it/s, train loss 11.892 ; val loss 3.276; lr 1.00e-05]
78/500: 100%|██████| 68/68 [00:00<00:00, 506.09it/s, train loss 9.284 ; val loss 4.038; lr 1.00e-05]
79/500: 100%|██████| 68/68 [00:00<00:00, 518.48it/s, train loss 7.200 ; val loss 4.535; lr 1.00e-05]
80/500: 100%|██████| 68/68 [00:00<00:00, 513.96it/s, train loss 4.821 ; val loss 3.710; lr 1.00e-05]
81/500: 100%|██████| 68/68 [00:00<00:00, 506.33it/s, train loss 2.761 ; val loss 3.393; lr 1.00e-05]
82/500: 100%|██████| 68/68 [00:00<00:00, 518.84it/s, train loss 3.711 ; val loss 3.541; lr 1.00e-05]
83/500: 100%|██████| 68/68 [00:00<00:00, 551.34it/s, train loss 3.719 ; val loss 4.965; lr 1.00e-05]
84/500: 100%|██████| 68/68 [00:00<00:00, 490.40it/s, train loss 8.498 ; val loss 3.657; lr 1.00e-05]
85/500: 100%|██████| 68/68 [00:00<00:00, 488.84it/s, train loss 8.218 ; val loss 3.065; lr 

saved model ValAcc: 3.065


86/500: 100%|█████| 68/68 [00:00<00:00, 493.70it/s, train loss 13.345 ; val loss 3.157; lr 1.00e-05]
87/500: 100%|██████| 68/68 [00:00<00:00, 501.49it/s, train loss 4.844 ; val loss 3.062; lr 1.00e-05]


saved model ValAcc: 3.062


88/500: 100%|█████| 68/68 [00:00<00:00, 485.56it/s, train loss 18.881 ; val loss 4.490; lr 1.00e-05]
89/500: 100%|██████| 68/68 [00:00<00:00, 492.16it/s, train loss 5.524 ; val loss 3.370; lr 1.00e-05]
90/500: 100%|██████| 68/68 [00:00<00:00, 507.64it/s, train loss 5.987 ; val loss 3.428; lr 1.00e-05]
91/500: 100%|██████| 68/68 [00:00<00:00, 503.94it/s, train loss 8.044 ; val loss 3.443; lr 1.00e-05]
92/500: 100%|█████| 68/68 [00:00<00:00, 516.30it/s, train loss 23.810 ; val loss 3.843; lr 1.00e-05]
93/500: 100%|██████| 68/68 [00:00<00:00, 564.01it/s, train loss 2.835 ; val loss 3.331; lr 1.00e-05]
94/500: 100%|█████| 68/68 [00:00<00:00, 529.68it/s, train loss 10.511 ; val loss 3.105; lr 1.00e-05]
95/500: 100%|██████| 68/68 [00:00<00:00, 500.25it/s, train loss 5.506 ; val loss 4.062; lr 1.00e-05]
96/500: 100%|██████| 68/68 [00:00<00:00, 497.36it/s, train loss 2.882 ; val loss 3.688; lr 1.00e-05]
97/500: 100%|██████| 68/68 [00:00<00:00, 500.25it/s, train loss 2.676 ; val loss 3.040; lr 

saved model ValAcc: 3.040


98/500: 100%|██████| 68/68 [00:00<00:00, 501.85it/s, train loss 7.144 ; val loss 3.255; lr 1.00e-05]
99/500: 100%|██████| 68/68 [00:00<00:00, 460.94it/s, train loss 4.325 ; val loss 3.147; lr 1.00e-05]
100/500: 100%|█████| 68/68 [00:00<00:00, 527.32it/s, train loss 3.884 ; val loss 2.900; lr 1.00e-05]


saved model ValAcc: 2.900


101/500: 100%|████| 68/68 [00:00<00:00, 488.77it/s, train loss 12.455 ; val loss 3.522; lr 1.00e-05]
102/500: 100%|████| 68/68 [00:00<00:00, 517.29it/s, train loss 20.567 ; val loss 2.853; lr 1.00e-05]


saved model ValAcc: 2.853


103/500: 100%|████| 68/68 [00:00<00:00, 485.40it/s, train loss 17.939 ; val loss 4.828; lr 1.00e-05]
104/500: 100%|█████| 68/68 [00:00<00:00, 520.67it/s, train loss 4.871 ; val loss 6.529; lr 1.00e-05]
105/500: 100%|████| 68/68 [00:00<00:00, 528.06it/s, train loss 12.909 ; val loss 3.108; lr 1.00e-05]
106/500: 100%|█████| 68/68 [00:00<00:00, 503.24it/s, train loss 8.500 ; val loss 3.019; lr 1.00e-05]
107/500: 100%|████| 68/68 [00:00<00:00, 503.81it/s, train loss 11.024 ; val loss 2.746; lr 1.00e-05]


saved model ValAcc: 2.746


108/500: 100%|█████| 68/68 [00:00<00:00, 515.71it/s, train loss 3.502 ; val loss 2.856; lr 1.00e-05]
109/500: 100%|█████| 68/68 [00:00<00:00, 506.02it/s, train loss 3.998 ; val loss 6.972; lr 1.00e-05]
110/500: 100%|█████| 68/68 [00:00<00:00, 557.10it/s, train loss 7.475 ; val loss 2.700; lr 1.00e-05]


saved model ValAcc: 2.700


111/500: 100%|█████| 68/68 [00:00<00:00, 535.27it/s, train loss 8.228 ; val loss 3.187; lr 1.00e-05]
112/500: 100%|█████| 68/68 [00:00<00:00, 555.73it/s, train loss 6.087 ; val loss 2.651; lr 1.00e-05]


saved model ValAcc: 2.651


113/500: 100%|█████| 68/68 [00:00<00:00, 534.87it/s, train loss 3.541 ; val loss 3.465; lr 1.00e-05]
114/500: 100%|████| 68/68 [00:00<00:00, 540.89it/s, train loss 12.714 ; val loss 2.724; lr 1.00e-05]
115/500: 100%|████| 68/68 [00:00<00:00, 548.21it/s, train loss 15.374 ; val loss 2.570; lr 1.00e-05]


saved model ValAcc: 2.570


116/500: 100%|█████| 68/68 [00:00<00:00, 535.98it/s, train loss 9.808 ; val loss 2.804; lr 1.00e-05]
117/500: 100%|█████| 68/68 [00:00<00:00, 519.36it/s, train loss 4.663 ; val loss 3.923; lr 1.00e-05]
118/500: 100%|█████| 68/68 [00:00<00:00, 531.97it/s, train loss 7.958 ; val loss 3.109; lr 1.00e-05]
119/500: 100%|████| 68/68 [00:00<00:00, 563.15it/s, train loss 11.189 ; val loss 2.846; lr 1.00e-05]
120/500: 100%|█████| 68/68 [00:00<00:00, 455.91it/s, train loss 3.459 ; val loss 3.945; lr 1.00e-05]
121/500: 100%|████| 68/68 [00:00<00:00, 535.62it/s, train loss 12.645 ; val loss 2.896; lr 1.00e-05]
122/500: 100%|█████| 68/68 [00:00<00:00, 550.60it/s, train loss 5.917 ; val loss 2.726; lr 1.00e-05]
123/500: 100%|█████| 68/68 [00:00<00:00, 526.66it/s, train loss 7.766 ; val loss 2.766; lr 1.00e-05]
124/500: 100%|█████| 68/68 [00:00<00:00, 555.04it/s, train loss 4.543 ; val loss 2.927; lr 1.00e-05]
125/500: 100%|█████| 68/68 [00:00<00:00, 540.62it/s, train loss 5.954 ; val loss 2.929; lr 

saved model ValAcc: 2.492


131/500: 100%|█████| 68/68 [00:00<00:00, 492.16it/s, train loss 7.036 ; val loss 2.511; lr 1.00e-05]
132/500: 100%|█████| 68/68 [00:00<00:00, 535.80it/s, train loss 4.273 ; val loss 2.817; lr 1.00e-05]
133/500: 100%|█████| 68/68 [00:00<00:00, 516.47it/s, train loss 4.422 ; val loss 2.592; lr 1.00e-05]
134/500: 100%|█████| 68/68 [00:00<00:00, 466.99it/s, train loss 3.827 ; val loss 2.638; lr 1.00e-05]
135/500: 100%|█████| 68/68 [00:00<00:00, 477.74it/s, train loss 3.442 ; val loss 2.788; lr 1.00e-05]
136/500: 100%|█████| 68/68 [00:00<00:00, 475.92it/s, train loss 7.813 ; val loss 4.109; lr 1.00e-05]
137/500: 100%|█████| 68/68 [00:00<00:00, 477.33it/s, train loss 9.203 ; val loss 3.027; lr 1.00e-05]
138/500: 100%|█████| 68/68 [00:00<00:00, 511.33it/s, train loss 6.162 ; val loss 3.229; lr 1.00e-05]
139/500: 100%|█████| 68/68 [00:00<00:00, 504.36it/s, train loss 5.664 ; val loss 3.767; lr 1.00e-05]
140/500: 100%|█████| 68/68 [00:00<00:00, 484.62it/s, train loss 7.554 ; val loss 3.075; lr 

saved model ValAcc: 2.477


142/500: 100%|█████| 68/68 [00:00<00:00, 491.20it/s, train loss 5.879 ; val loss 2.429; lr 1.00e-05]


saved model ValAcc: 2.429


143/500: 100%|█████| 68/68 [00:00<00:00, 452.45it/s, train loss 7.467 ; val loss 4.437; lr 1.00e-05]
144/500: 100%|████| 68/68 [00:00<00:00, 489.81it/s, train loss 17.386 ; val loss 2.481; lr 1.00e-05]
145/500: 100%|█████| 68/68 [00:00<00:00, 469.72it/s, train loss 7.832 ; val loss 2.985; lr 1.00e-05]
146/500: 100%|█████| 68/68 [00:00<00:00, 500.70it/s, train loss 5.190 ; val loss 3.516; lr 1.00e-05]
147/500: 100%|█████| 68/68 [00:00<00:00, 466.85it/s, train loss 5.668 ; val loss 3.450; lr 1.00e-05]
148/500: 100%|█████| 68/68 [00:00<00:00, 496.53it/s, train loss 4.271 ; val loss 2.341; lr 1.00e-05]


saved model ValAcc: 2.341


149/500: 100%|█████| 68/68 [00:00<00:00, 505.93it/s, train loss 5.881 ; val loss 2.401; lr 1.00e-05]
150/500: 100%|█████| 68/68 [00:00<00:00, 517.95it/s, train loss 4.957 ; val loss 3.379; lr 1.00e-05]
151/500: 100%|█████| 68/68 [00:00<00:00, 507.25it/s, train loss 1.913 ; val loss 3.404; lr 1.00e-05]
152/500: 100%|█████| 68/68 [00:00<00:00, 506.89it/s, train loss 3.503 ; val loss 2.347; lr 1.00e-05]
153/500: 100%|█████| 68/68 [00:00<00:00, 496.59it/s, train loss 9.602 ; val loss 2.510; lr 1.00e-05]
154/500: 100%|█████| 68/68 [00:00<00:00, 500.31it/s, train loss 5.945 ; val loss 2.231; lr 1.00e-05]


saved model ValAcc: 2.231


155/500: 100%|█████| 68/68 [00:00<00:00, 475.12it/s, train loss 7.436 ; val loss 2.861; lr 1.00e-05]
156/500: 100%|████| 68/68 [00:00<00:00, 523.21it/s, train loss 11.289 ; val loss 2.486; lr 1.00e-05]
157/500: 100%|█████| 68/68 [00:00<00:00, 548.66it/s, train loss 2.864 ; val loss 2.291; lr 1.00e-05]
158/500: 100%|█████| 68/68 [00:00<00:00, 351.84it/s, train loss 5.193 ; val loss 2.694; lr 1.00e-05]
159/500: 100%|█████| 68/68 [00:00<00:00, 504.14it/s, train loss 4.781 ; val loss 2.350; lr 1.00e-05]
160/500: 100%|█████| 68/68 [00:00<00:00, 457.63it/s, train loss 4.578 ; val loss 2.171; lr 1.00e-05]


saved model ValAcc: 2.171


161/500: 100%|█████| 68/68 [00:00<00:00, 475.22it/s, train loss 6.210 ; val loss 4.602; lr 1.00e-05]
162/500: 100%|█████| 68/68 [00:00<00:00, 557.72it/s, train loss 2.148 ; val loss 4.998; lr 1.00e-05]
163/500: 100%|█████| 68/68 [00:00<00:00, 553.95it/s, train loss 8.153 ; val loss 3.078; lr 1.00e-05]
164/500: 100%|█████| 68/68 [00:00<00:00, 543.42it/s, train loss 2.950 ; val loss 2.525; lr 1.00e-05]
165/500: 100%|████| 68/68 [00:00<00:00, 552.28it/s, train loss 19.931 ; val loss 2.614; lr 1.00e-05]
166/500: 100%|█████| 68/68 [00:00<00:00, 470.67it/s, train loss 5.352 ; val loss 2.364; lr 1.00e-05]
167/500: 100%|████| 68/68 [00:00<00:00, 513.33it/s, train loss 11.035 ; val loss 3.454; lr 1.00e-05]
168/500: 100%|█████| 68/68 [00:00<00:00, 502.85it/s, train loss 4.615 ; val loss 2.185; lr 1.00e-05]
169/500: 100%|█████| 68/68 [00:00<00:00, 517.43it/s, train loss 8.260 ; val loss 2.079; lr 1.00e-05]


saved model ValAcc: 2.079


170/500: 100%|█████| 68/68 [00:00<00:00, 505.03it/s, train loss 2.842 ; val loss 3.079; lr 1.00e-05]
171/500: 100%|████| 68/68 [00:00<00:00, 483.90it/s, train loss 21.228 ; val loss 2.550; lr 1.00e-05]
172/500: 100%|█████| 68/68 [00:00<00:00, 475.96it/s, train loss 4.872 ; val loss 2.452; lr 1.00e-05]
173/500: 100%|█████| 68/68 [00:00<00:00, 505.43it/s, train loss 9.313 ; val loss 2.535; lr 1.00e-05]
174/500: 100%|████| 68/68 [00:00<00:00, 502.23it/s, train loss 11.947 ; val loss 3.049; lr 1.00e-05]
175/500: 100%|█████| 68/68 [00:00<00:00, 515.09it/s, train loss 3.596 ; val loss 2.151; lr 1.00e-05]
176/500: 100%|█████| 68/68 [00:00<00:00, 494.31it/s, train loss 2.623 ; val loss 2.233; lr 1.00e-05]
177/500: 100%|█████| 68/68 [00:00<00:00, 469.60it/s, train loss 7.170 ; val loss 2.183; lr 1.00e-05]
178/500: 100%|████| 68/68 [00:00<00:00, 511.95it/s, train loss 15.732 ; val loss 2.163; lr 1.00e-05]
179/500: 100%|█████| 68/68 [00:00<00:00, 550.16it/s, train loss 9.659 ; val loss 2.031; lr 

saved model ValAcc: 2.031


180/500: 100%|████| 68/68 [00:00<00:00, 535.98it/s, train loss 15.217 ; val loss 2.281; lr 1.00e-05]
181/500: 100%|█████| 68/68 [00:00<00:00, 532.25it/s, train loss 5.943 ; val loss 2.498; lr 1.00e-05]
182/500: 100%|████| 68/68 [00:00<00:00, 550.60it/s, train loss 11.750 ; val loss 2.641; lr 1.00e-05]
183/500: 100%|█████| 68/68 [00:00<00:00, 441.32it/s, train loss 4.018 ; val loss 2.229; lr 1.00e-05]
184/500: 100%|█████| 68/68 [00:00<00:00, 538.59it/s, train loss 8.028 ; val loss 2.615; lr 1.00e-05]
185/500: 100%|█████| 68/68 [00:00<00:00, 515.35it/s, train loss 4.845 ; val loss 2.285; lr 1.00e-05]
186/500: 100%|█████| 68/68 [00:00<00:00, 469.73it/s, train loss 7.876 ; val loss 2.067; lr 1.00e-05]
187/500: 100%|████| 68/68 [00:00<00:00, 404.59it/s, train loss 13.429 ; val loss 2.388; lr 1.00e-05]
188/500: 100%|█████| 68/68 [00:00<00:00, 490.54it/s, train loss 9.330 ; val loss 2.820; lr 1.00e-05]
189/500: 100%|█████| 68/68 [00:00<00:00, 494.25it/s, train loss 5.094 ; val loss 2.811; lr 

saved model ValAcc: 1.983


198/500: 100%|█████| 68/68 [00:00<00:00, 515.56it/s, train loss 8.831 ; val loss 2.526; lr 1.00e-05]
199/500: 100%|█████| 68/68 [00:00<00:00, 537.06it/s, train loss 3.187 ; val loss 2.220; lr 1.00e-05]
200/500: 100%|█████| 68/68 [00:00<00:00, 547.91it/s, train loss 1.471 ; val loss 2.136; lr 1.00e-05]
201/500: 100%|█████| 68/68 [00:00<00:00, 537.23it/s, train loss 4.325 ; val loss 1.982; lr 1.00e-05]


saved model ValAcc: 1.982


202/500: 100%|█████| 68/68 [00:00<00:00, 324.72it/s, train loss 4.695 ; val loss 2.015; lr 1.00e-05]
203/500: 100%|████| 68/68 [00:00<00:00, 501.84it/s, train loss 11.685 ; val loss 1.996; lr 1.00e-05]
204/500: 100%|█████| 68/68 [00:00<00:00, 509.57it/s, train loss 3.497 ; val loss 2.098; lr 1.00e-05]
205/500: 100%|█████| 68/68 [00:00<00:00, 517.72it/s, train loss 3.573 ; val loss 2.190; lr 1.00e-05]
206/500: 100%|█████| 68/68 [00:00<00:00, 514.97it/s, train loss 7.562 ; val loss 2.328; lr 1.00e-05]
207/500: 100%|████| 68/68 [00:00<00:00, 542.14it/s, train loss 12.779 ; val loss 2.838; lr 1.00e-05]
208/500: 100%|█████| 68/68 [00:00<00:00, 544.10it/s, train loss 2.707 ; val loss 2.165; lr 1.00e-05]
209/500: 100%|█████| 68/68 [00:00<00:00, 532.98it/s, train loss 4.181 ; val loss 1.916; lr 1.00e-05]


saved model ValAcc: 1.916


210/500: 100%|█████| 68/68 [00:00<00:00, 507.13it/s, train loss 7.614 ; val loss 2.497; lr 1.00e-05]
211/500: 100%|█████| 68/68 [00:00<00:00, 546.54it/s, train loss 5.816 ; val loss 1.950; lr 1.00e-05]
212/500: 100%|█████| 68/68 [00:00<00:00, 534.90it/s, train loss 1.211 ; val loss 1.831; lr 1.00e-05]


saved model ValAcc: 1.831


213/500: 100%|█████| 68/68 [00:00<00:00, 489.90it/s, train loss 4.488 ; val loss 2.003; lr 1.00e-05]
214/500: 100%|█████| 68/68 [00:00<00:00, 543.96it/s, train loss 3.778 ; val loss 1.894; lr 1.00e-05]
215/500: 100%|█████| 68/68 [00:00<00:00, 503.58it/s, train loss 5.124 ; val loss 2.799; lr 1.00e-05]
216/500: 100%|█████| 68/68 [00:00<00:00, 547.17it/s, train loss 2.344 ; val loss 1.874; lr 1.00e-05]
217/500: 100%|█████| 68/68 [00:00<00:00, 545.80it/s, train loss 6.284 ; val loss 1.895; lr 1.00e-05]
218/500: 100%|████| 68/68 [00:00<00:00, 536.70it/s, train loss 12.421 ; val loss 2.041; lr 1.00e-05]
219/500: 100%|█████| 68/68 [00:00<00:00, 541.13it/s, train loss 5.065 ; val loss 1.878; lr 1.00e-05]
220/500: 100%|█████| 68/68 [00:00<00:00, 545.00it/s, train loss 4.734 ; val loss 1.999; lr 1.00e-05]
221/500: 100%|█████| 68/68 [00:00<00:00, 552.61it/s, train loss 2.879 ; val loss 2.127; lr 1.00e-05]
222/500: 100%|█████| 68/68 [00:00<00:00, 546.68it/s, train loss 2.533 ; val loss 1.970; lr 

saved model ValAcc: 1.831


233/500: 100%|█████| 68/68 [00:00<00:00, 501.33it/s, train loss 4.029 ; val loss 1.996; lr 1.00e-05]
234/500: 100%|████| 68/68 [00:00<00:00, 549.52it/s, train loss 13.076 ; val loss 2.550; lr 1.00e-05]
235/500: 100%|████| 68/68 [00:00<00:00, 532.93it/s, train loss 11.870 ; val loss 1.841; lr 1.00e-05]
236/500: 100%|█████| 68/68 [00:00<00:00, 549.49it/s, train loss 6.727 ; val loss 2.221; lr 1.00e-05]
237/500: 100%|█████| 68/68 [00:00<00:00, 549.57it/s, train loss 2.615 ; val loss 1.866; lr 1.00e-05]
238/500: 100%|█████| 68/68 [00:00<00:00, 549.66it/s, train loss 2.261 ; val loss 2.047; lr 1.00e-05]
239/500: 100%|█████| 68/68 [00:00<00:00, 541.84it/s, train loss 8.715 ; val loss 1.976; lr 1.00e-05]
240/500: 100%|████| 68/68 [00:00<00:00, 542.72it/s, train loss 23.942 ; val loss 1.789; lr 1.00e-05]


saved model ValAcc: 1.789


241/500: 100%|████| 68/68 [00:00<00:00, 538.47it/s, train loss 14.980 ; val loss 1.844; lr 1.00e-05]
242/500: 100%|█████| 68/68 [00:00<00:00, 512.48it/s, train loss 4.429 ; val loss 2.050; lr 1.00e-05]
243/500: 100%|█████| 68/68 [00:00<00:00, 539.39it/s, train loss 5.436 ; val loss 2.762; lr 1.00e-05]
244/500: 100%|█████| 68/68 [00:00<00:00, 541.92it/s, train loss 4.376 ; val loss 1.959; lr 1.00e-05]
245/500: 100%|████| 68/68 [00:00<00:00, 543.25it/s, train loss 11.361 ; val loss 2.148; lr 1.00e-05]
246/500: 100%|█████| 68/68 [00:00<00:00, 537.61it/s, train loss 2.926 ; val loss 1.872; lr 1.00e-05]
247/500: 100%|█████| 68/68 [00:00<00:00, 538.40it/s, train loss 6.440 ; val loss 1.693; lr 1.00e-05]


saved model ValAcc: 1.693


248/500: 100%|█████| 68/68 [00:00<00:00, 509.45it/s, train loss 8.785 ; val loss 2.320; lr 1.00e-05]
249/500: 100%|█████| 68/68 [00:00<00:00, 522.85it/s, train loss 9.034 ; val loss 1.768; lr 1.00e-05]
250/500: 100%|█████| 68/68 [00:00<00:00, 549.12it/s, train loss 1.535 ; val loss 1.895; lr 1.00e-05]
251/500: 100%|█████| 68/68 [00:00<00:00, 511.81it/s, train loss 3.297 ; val loss 2.018; lr 1.00e-05]
252/500: 100%|█████| 68/68 [00:00<00:00, 557.25it/s, train loss 5.185 ; val loss 2.315; lr 1.00e-05]
253/500: 100%|█████| 68/68 [00:00<00:00, 553.93it/s, train loss 5.686 ; val loss 1.809; lr 1.00e-05]
254/500: 100%|█████| 68/68 [00:00<00:00, 521.56it/s, train loss 7.253 ; val loss 1.966; lr 1.00e-05]
255/500: 100%|█████| 68/68 [00:00<00:00, 430.78it/s, train loss 7.310 ; val loss 2.167; lr 1.00e-05]
256/500: 100%|█████| 68/68 [00:00<00:00, 320.79it/s, train loss 8.187 ; val loss 2.119; lr 1.00e-05]
257/500: 100%|█████| 68/68 [00:00<00:00, 545.90it/s, train loss 3.688 ; val loss 1.729; lr 

saved model ValAcc: 1.649


271/500: 100%|█████| 68/68 [00:00<00:00, 502.06it/s, train loss 2.368 ; val loss 1.691; lr 1.00e-05]
272/500: 100%|█████| 68/68 [00:00<00:00, 410.64it/s, train loss 8.977 ; val loss 1.821; lr 1.00e-05]
273/500: 100%|█████| 68/68 [00:00<00:00, 538.99it/s, train loss 2.653 ; val loss 1.653; lr 1.00e-05]
274/500: 100%|████| 68/68 [00:00<00:00, 548.54it/s, train loss 32.531 ; val loss 1.697; lr 1.00e-05]
275/500: 100%|████| 68/68 [00:00<00:00, 541.71it/s, train loss 22.923 ; val loss 1.953; lr 1.00e-05]
276/500: 100%|█████| 68/68 [00:00<00:00, 549.22it/s, train loss 2.913 ; val loss 1.917; lr 1.00e-05]
277/500: 100%|█████| 68/68 [00:00<00:00, 515.91it/s, train loss 4.438 ; val loss 2.050; lr 1.00e-05]
278/500: 100%|█████| 68/68 [00:00<00:00, 542.01it/s, train loss 3.172 ; val loss 1.807; lr 1.00e-05]
279/500: 100%|█████| 68/68 [00:00<00:00, 548.28it/s, train loss 7.033 ; val loss 1.675; lr 1.00e-05]
280/500: 100%|█████| 68/68 [00:00<00:00, 548.99it/s, train loss 4.950 ; val loss 1.663; lr 

saved model ValAcc: 1.627


293/500: 100%|████| 68/68 [00:00<00:00, 510.98it/s, train loss 21.952 ; val loss 1.763; lr 1.00e-05]
294/500: 100%|█████| 68/68 [00:00<00:00, 525.13it/s, train loss 7.345 ; val loss 1.690; lr 1.00e-05]
295/500: 100%|████| 68/68 [00:00<00:00, 552.82it/s, train loss 14.144 ; val loss 2.613; lr 1.00e-05]
296/500: 100%|████| 68/68 [00:00<00:00, 544.56it/s, train loss 15.701 ; val loss 2.296; lr 1.00e-05]
297/500: 100%|████| 68/68 [00:00<00:00, 547.40it/s, train loss 11.069 ; val loss 1.685; lr 1.00e-05]
298/500: 100%|█████| 68/68 [00:00<00:00, 545.86it/s, train loss 1.140 ; val loss 1.715; lr 1.00e-05]
299/500: 100%|████| 68/68 [00:00<00:00, 549.72it/s, train loss 17.467 ; val loss 2.935; lr 1.00e-05]
300/500: 100%|█████| 68/68 [00:00<00:00, 535.05it/s, train loss 3.606 ; val loss 1.720; lr 1.00e-05]
301/500: 100%|█████| 68/68 [00:00<00:00, 537.66it/s, train loss 2.232 ; val loss 1.624; lr 1.00e-05]


saved model ValAcc: 1.624


302/500: 100%|█████| 68/68 [00:00<00:00, 506.81it/s, train loss 1.999 ; val loss 2.276; lr 1.00e-05]
303/500: 100%|█████| 68/68 [00:00<00:00, 541.33it/s, train loss 4.825 ; val loss 1.650; lr 1.00e-05]
304/500: 100%|█████| 68/68 [00:00<00:00, 547.61it/s, train loss 3.056 ; val loss 1.928; lr 1.00e-05]
305/500: 100%|████| 68/68 [00:00<00:00, 548.52it/s, train loss 30.488 ; val loss 1.671; lr 1.00e-05]
306/500: 100%|█████| 68/68 [00:00<00:00, 537.51it/s, train loss 2.279 ; val loss 1.616; lr 1.00e-05]


saved model ValAcc: 1.616


307/500: 100%|████| 68/68 [00:00<00:00, 504.11it/s, train loss 15.540 ; val loss 1.711; lr 1.00e-05]
308/500: 100%|█████| 68/68 [00:00<00:00, 551.10it/s, train loss 9.827 ; val loss 2.192; lr 1.00e-05]
309/500: 100%|█████| 68/68 [00:00<00:00, 539.49it/s, train loss 2.131 ; val loss 1.603; lr 1.00e-05]


saved model ValAcc: 1.603


310/500: 100%|█████| 68/68 [00:00<00:00, 507.35it/s, train loss 5.192 ; val loss 1.619; lr 1.00e-05]
311/500: 100%|█████| 68/68 [00:00<00:00, 548.68it/s, train loss 3.409 ; val loss 1.752; lr 1.00e-05]
312/500: 100%|█████| 68/68 [00:00<00:00, 541.76it/s, train loss 9.957 ; val loss 1.594; lr 1.00e-05]


saved model ValAcc: 1.594


313/500: 100%|████| 68/68 [00:00<00:00, 510.47it/s, train loss 31.739 ; val loss 1.761; lr 1.00e-05]
314/500: 100%|█████| 68/68 [00:00<00:00, 534.31it/s, train loss 1.879 ; val loss 1.530; lr 1.00e-05]


saved model ValAcc: 1.530


315/500: 100%|████| 68/68 [00:00<00:00, 504.41it/s, train loss 11.400 ; val loss 1.615; lr 1.00e-05]
316/500: 100%|████| 68/68 [00:00<00:00, 545.57it/s, train loss 10.521 ; val loss 1.524; lr 1.00e-05]


saved model ValAcc: 1.524


317/500: 100%|█████| 68/68 [00:00<00:00, 519.93it/s, train loss 6.225 ; val loss 2.371; lr 1.00e-05]
318/500: 100%|█████| 68/68 [00:00<00:00, 536.00it/s, train loss 2.599 ; val loss 1.800; lr 1.00e-05]
319/500: 100%|█████| 68/68 [00:00<00:00, 532.82it/s, train loss 5.867 ; val loss 1.743; lr 1.00e-05]
320/500: 100%|█████| 68/68 [00:00<00:00, 540.27it/s, train loss 5.634 ; val loss 1.716; lr 1.00e-05]
321/500: 100%|█████| 68/68 [00:00<00:00, 540.46it/s, train loss 6.153 ; val loss 1.853; lr 1.00e-05]
322/500: 100%|████| 68/68 [00:00<00:00, 523.62it/s, train loss 17.738 ; val loss 1.679; lr 1.00e-05]
323/500: 100%|█████| 68/68 [00:00<00:00, 509.75it/s, train loss 7.108 ; val loss 1.770; lr 1.00e-05]
324/500: 100%|█████| 68/68 [00:00<00:00, 520.83it/s, train loss 4.727 ; val loss 2.030; lr 1.00e-05]
325/500: 100%|█████| 68/68 [00:00<00:00, 547.64it/s, train loss 1.584 ; val loss 1.629; lr 1.00e-05]
326/500: 100%|█████| 68/68 [00:00<00:00, 543.62it/s, train loss 3.283 ; val loss 1.571; lr 

saved model ValAcc: 1.500


329/500: 100%|████| 68/68 [00:00<00:00, 528.81it/s, train loss 14.870 ; val loss 1.611; lr 1.00e-05]
330/500: 100%|█████| 68/68 [00:00<00:00, 534.48it/s, train loss 7.933 ; val loss 1.627; lr 1.00e-05]
331/500: 100%|████| 68/68 [00:00<00:00, 359.36it/s, train loss 12.098 ; val loss 1.947; lr 1.00e-05]
332/500: 100%|█████| 68/68 [00:00<00:00, 427.10it/s, train loss 1.062 ; val loss 1.853; lr 1.00e-05]
333/500: 100%|█████| 68/68 [00:00<00:00, 541.86it/s, train loss 2.817 ; val loss 1.657; lr 1.00e-05]
334/500: 100%|█████| 68/68 [00:00<00:00, 530.90it/s, train loss 3.046 ; val loss 1.742; lr 1.00e-05]
335/500: 100%|████| 68/68 [00:00<00:00, 542.70it/s, train loss 11.436 ; val loss 2.643; lr 1.00e-05]
336/500: 100%|█████| 68/68 [00:00<00:00, 544.44it/s, train loss 2.098 ; val loss 1.518; lr 1.00e-05]
337/500: 100%|█████| 68/68 [00:00<00:00, 549.10it/s, train loss 5.025 ; val loss 1.599; lr 1.00e-05]
338/500: 100%|█████| 68/68 [00:00<00:00, 536.47it/s, train loss 1.171 ; val loss 1.624; lr 

saved model ValAcc: 1.495


362/500: 100%|████| 68/68 [00:00<00:00, 508.73it/s, train loss 19.558 ; val loss 1.488; lr 1.00e-05]


saved model ValAcc: 1.488


363/500: 100%|█████| 68/68 [00:00<00:00, 533.54it/s, train loss 8.682 ; val loss 1.599; lr 1.00e-05]
364/500: 100%|████| 68/68 [00:00<00:00, 523.02it/s, train loss 18.038 ; val loss 1.612; lr 1.00e-05]
365/500: 100%|████| 68/68 [00:00<00:00, 404.74it/s, train loss 10.512 ; val loss 1.625; lr 1.00e-05]
366/500: 100%|█████| 68/68 [00:00<00:00, 515.79it/s, train loss 2.624 ; val loss 1.528; lr 1.00e-05]
367/500: 100%|█████| 68/68 [00:00<00:00, 543.88it/s, train loss 9.629 ; val loss 1.563; lr 1.00e-05]
368/500: 100%|█████| 68/68 [00:00<00:00, 513.29it/s, train loss 5.329 ; val loss 1.478; lr 1.00e-05]


saved model ValAcc: 1.478


369/500: 100%|█████| 68/68 [00:00<00:00, 490.65it/s, train loss 4.134 ; val loss 1.613; lr 1.00e-05]
370/500: 100%|█████| 68/68 [00:00<00:00, 546.95it/s, train loss 4.348 ; val loss 1.767; lr 1.00e-05]
371/500: 100%|█████| 68/68 [00:00<00:00, 548.16it/s, train loss 3.427 ; val loss 1.544; lr 1.00e-05]
372/500: 100%|█████| 68/68 [00:00<00:00, 539.68it/s, train loss 4.459 ; val loss 1.565; lr 1.00e-05]
373/500: 100%|█████| 68/68 [00:00<00:00, 548.03it/s, train loss 2.609 ; val loss 1.587; lr 1.00e-05]
374/500: 100%|█████| 68/68 [00:00<00:00, 552.36it/s, train loss 2.603 ; val loss 1.747; lr 1.00e-05]
375/500: 100%|█████| 68/68 [00:00<00:00, 534.53it/s, train loss 8.001 ; val loss 1.883; lr 1.00e-05]
376/500: 100%|█████| 68/68 [00:00<00:00, 364.82it/s, train loss 5.139 ; val loss 1.676; lr 1.00e-05]
377/500: 100%|█████| 68/68 [00:00<00:00, 482.56it/s, train loss 3.438 ; val loss 1.461; lr 1.00e-05]


saved model ValAcc: 1.461


378/500: 100%|█████| 68/68 [00:00<00:00, 481.62it/s, train loss 1.409 ; val loss 1.470; lr 1.00e-05]
379/500: 100%|█████| 68/68 [00:00<00:00, 498.82it/s, train loss 5.230 ; val loss 1.541; lr 1.00e-05]
380/500: 100%|█████| 68/68 [00:00<00:00, 550.79it/s, train loss 2.324 ; val loss 1.532; lr 1.00e-05]
381/500: 100%|█████| 68/68 [00:00<00:00, 539.86it/s, train loss 3.609 ; val loss 1.434; lr 1.00e-05]


saved model ValAcc: 1.434


382/500: 100%|█████| 68/68 [00:00<00:00, 506.07it/s, train loss 0.905 ; val loss 1.664; lr 1.00e-05]
383/500: 100%|█████| 68/68 [00:00<00:00, 540.87it/s, train loss 2.971 ; val loss 1.543; lr 1.00e-05]
384/500: 100%|█████| 68/68 [00:00<00:00, 545.54it/s, train loss 5.512 ; val loss 1.483; lr 1.00e-05]
385/500: 100%|█████| 68/68 [00:00<00:00, 549.28it/s, train loss 8.329 ; val loss 1.647; lr 1.00e-05]
386/500: 100%|█████| 68/68 [00:00<00:00, 543.19it/s, train loss 2.564 ; val loss 1.853; lr 1.00e-05]
387/500: 100%|█████| 68/68 [00:00<00:00, 542.39it/s, train loss 7.384 ; val loss 1.464; lr 1.00e-05]
388/500: 100%|████| 68/68 [00:00<00:00, 553.54it/s, train loss 13.829 ; val loss 1.684; lr 1.00e-05]
389/500: 100%|█████| 68/68 [00:00<00:00, 546.90it/s, train loss 3.539 ; val loss 1.509; lr 1.00e-05]
390/500: 100%|█████| 68/68 [00:00<00:00, 547.77it/s, train loss 7.733 ; val loss 1.525; lr 1.00e-05]
391/500: 100%|█████| 68/68 [00:00<00:00, 520.67it/s, train loss 9.729 ; val loss 2.399; lr 

saved model ValAcc: 1.432


404/500: 100%|█████| 68/68 [00:00<00:00, 502.37it/s, train loss 5.588 ; val loss 1.767; lr 1.00e-05]
405/500: 100%|█████| 68/68 [00:00<00:00, 538.92it/s, train loss 1.897 ; val loss 1.430; lr 1.00e-05]


saved model ValAcc: 1.430


406/500: 100%|█████| 68/68 [00:00<00:00, 526.74it/s, train loss 2.660 ; val loss 1.511; lr 1.00e-05]
407/500: 100%|█████| 68/68 [00:00<00:00, 544.68it/s, train loss 2.913 ; val loss 1.458; lr 1.00e-05]
408/500: 100%|████| 68/68 [00:00<00:00, 535.64it/s, train loss 20.278 ; val loss 1.876; lr 1.00e-05]
409/500: 100%|█████| 68/68 [00:00<00:00, 537.43it/s, train loss 3.494 ; val loss 1.676; lr 1.00e-05]
410/500: 100%|█████| 68/68 [00:00<00:00, 549.10it/s, train loss 2.897 ; val loss 1.489; lr 1.00e-05]
411/500: 100%|█████| 68/68 [00:00<00:00, 546.70it/s, train loss 6.104 ; val loss 1.432; lr 1.00e-05]
412/500: 100%|█████| 68/68 [00:00<00:00, 545.16it/s, train loss 1.428 ; val loss 1.822; lr 1.00e-05]
413/500: 100%|█████| 68/68 [00:00<00:00, 541.19it/s, train loss 8.387 ; val loss 1.469; lr 1.00e-05]
414/500: 100%|█████| 68/68 [00:00<00:00, 524.95it/s, train loss 1.490 ; val loss 1.542; lr 1.00e-05]
415/500: 100%|█████| 68/68 [00:00<00:00, 532.45it/s, train loss 4.772 ; val loss 1.407; lr 

saved model ValAcc: 1.407


416/500: 100%|█████| 68/68 [00:00<00:00, 522.19it/s, train loss 8.185 ; val loss 1.425; lr 1.00e-05]
417/500: 100%|████| 68/68 [00:00<00:00, 540.13it/s, train loss 18.299 ; val loss 1.577; lr 1.00e-05]
418/500: 100%|████| 68/68 [00:00<00:00, 549.89it/s, train loss 15.129 ; val loss 1.554; lr 1.00e-05]
419/500: 100%|█████| 68/68 [00:00<00:00, 544.59it/s, train loss 2.212 ; val loss 1.411; lr 1.00e-05]
420/500: 100%|█████| 68/68 [00:00<00:00, 551.43it/s, train loss 4.246 ; val loss 1.593; lr 1.00e-05]
421/500: 100%|█████| 68/68 [00:00<00:00, 545.71it/s, train loss 3.269 ; val loss 1.420; lr 1.00e-05]
422/500: 100%|█████| 68/68 [00:00<00:00, 509.72it/s, train loss 3.893 ; val loss 1.432; lr 1.00e-05]
423/500: 100%|█████| 68/68 [00:00<00:00, 398.78it/s, train loss 8.495 ; val loss 1.620; lr 1.00e-05]
424/500: 100%|█████| 68/68 [00:00<00:00, 533.49it/s, train loss 3.478 ; val loss 1.454; lr 1.00e-05]
425/500: 100%|█████| 68/68 [00:00<00:00, 539.20it/s, train loss 8.707 ; val loss 1.462; lr 

saved model ValAcc: 1.401


438/500: 100%|█████| 68/68 [00:00<00:00, 504.62it/s, train loss 2.453 ; val loss 1.943; lr 1.00e-05]
439/500: 100%|█████| 68/68 [00:00<00:00, 551.94it/s, train loss 7.712 ; val loss 1.428; lr 1.00e-05]
440/500: 100%|█████| 68/68 [00:00<00:00, 536.39it/s, train loss 2.380 ; val loss 1.477; lr 1.00e-05]
441/500: 100%|█████| 68/68 [00:00<00:00, 373.91it/s, train loss 4.450 ; val loss 1.527; lr 1.00e-05]
442/500: 100%|█████| 68/68 [00:00<00:00, 475.40it/s, train loss 7.073 ; val loss 1.395; lr 1.00e-05]


saved model ValAcc: 1.395


443/500: 100%|█████| 68/68 [00:00<00:00, 521.44it/s, train loss 2.327 ; val loss 1.561; lr 1.00e-05]
444/500: 100%|█████| 68/68 [00:00<00:00, 528.27it/s, train loss 6.570 ; val loss 1.403; lr 1.00e-05]
445/500: 100%|█████| 68/68 [00:00<00:00, 550.14it/s, train loss 9.032 ; val loss 1.894; lr 1.00e-05]
446/500: 100%|█████| 68/68 [00:00<00:00, 357.33it/s, train loss 5.926 ; val loss 1.498; lr 1.00e-05]
447/500: 100%|█████| 68/68 [00:00<00:00, 542.73it/s, train loss 6.293 ; val loss 2.025; lr 1.00e-05]
448/500: 100%|█████| 68/68 [00:00<00:00, 550.22it/s, train loss 6.334 ; val loss 1.585; lr 1.00e-05]
449/500: 100%|█████| 68/68 [00:00<00:00, 547.30it/s, train loss 2.313 ; val loss 1.394; lr 1.00e-05]


saved model ValAcc: 1.394


450/500: 100%|█████| 68/68 [00:00<00:00, 519.30it/s, train loss 2.309 ; val loss 1.419; lr 1.00e-05]
451/500: 100%|█████| 68/68 [00:00<00:00, 542.98it/s, train loss 4.389 ; val loss 1.712; lr 1.00e-05]
452/500: 100%|█████| 68/68 [00:00<00:00, 538.53it/s, train loss 2.688 ; val loss 1.398; lr 1.00e-05]
453/500: 100%|█████| 68/68 [00:00<00:00, 545.19it/s, train loss 2.077 ; val loss 1.476; lr 1.00e-05]
454/500: 100%|█████| 68/68 [00:00<00:00, 535.69it/s, train loss 1.895 ; val loss 1.825; lr 1.00e-05]
455/500: 100%|█████| 68/68 [00:00<00:00, 552.34it/s, train loss 4.895 ; val loss 1.884; lr 1.00e-05]
456/500: 100%|█████| 68/68 [00:00<00:00, 545.52it/s, train loss 3.664 ; val loss 1.387; lr 1.00e-05]


saved model ValAcc: 1.387


457/500: 100%|████| 68/68 [00:00<00:00, 499.86it/s, train loss 17.024 ; val loss 1.395; lr 1.00e-05]
458/500: 100%|████| 68/68 [00:00<00:00, 551.61it/s, train loss 11.150 ; val loss 1.660; lr 1.00e-05]
459/500: 100%|█████| 68/68 [00:00<00:00, 503.09it/s, train loss 4.177 ; val loss 1.465; lr 1.00e-05]
460/500: 100%|████| 68/68 [00:00<00:00, 548.30it/s, train loss 11.204 ; val loss 1.493; lr 1.00e-05]
461/500: 100%|█████| 68/68 [00:00<00:00, 536.83it/s, train loss 2.534 ; val loss 1.369; lr 1.00e-05]


saved model ValAcc: 1.369


462/500: 100%|████| 68/68 [00:00<00:00, 498.06it/s, train loss 10.285 ; val loss 1.573; lr 1.00e-05]
463/500: 100%|█████| 68/68 [00:00<00:00, 548.67it/s, train loss 4.257 ; val loss 1.567; lr 1.00e-05]
464/500: 100%|████| 68/68 [00:00<00:00, 544.89it/s, train loss 13.752 ; val loss 1.658; lr 1.00e-05]
465/500: 100%|█████| 68/68 [00:00<00:00, 547.68it/s, train loss 9.301 ; val loss 1.372; lr 1.00e-05]
466/500: 100%|█████| 68/68 [00:00<00:00, 547.58it/s, train loss 8.677 ; val loss 1.572; lr 1.00e-05]
467/500: 100%|█████| 68/68 [00:00<00:00, 537.31it/s, train loss 3.561 ; val loss 1.426; lr 1.00e-05]
468/500: 100%|█████| 68/68 [00:00<00:00, 546.64it/s, train loss 5.131 ; val loss 1.443; lr 1.00e-05]
469/500: 100%|█████| 68/68 [00:00<00:00, 543.63it/s, train loss 1.351 ; val loss 1.424; lr 1.00e-05]
470/500: 100%|████| 68/68 [00:00<00:00, 543.52it/s, train loss 18.002 ; val loss 1.586; lr 1.00e-05]
471/500: 100%|█████| 68/68 [00:00<00:00, 550.27it/s, train loss 1.695 ; val loss 1.615; lr 

saved model ValAcc: 1.362


475/500: 100%|█████| 68/68 [00:00<00:00, 488.79it/s, train loss 4.214 ; val loss 1.469; lr 1.00e-05]
476/500: 100%|█████| 68/68 [00:00<00:00, 336.46it/s, train loss 2.676 ; val loss 1.384; lr 1.00e-05]
477/500: 100%|████| 68/68 [00:00<00:00, 426.28it/s, train loss 12.727 ; val loss 1.425; lr 1.00e-05]
478/500: 100%|████| 68/68 [00:00<00:00, 503.73it/s, train loss 10.092 ; val loss 1.411; lr 1.00e-05]
479/500: 100%|████| 68/68 [00:00<00:00, 541.65it/s, train loss 11.731 ; val loss 1.517; lr 1.00e-05]
480/500: 100%|█████| 68/68 [00:00<00:00, 461.70it/s, train loss 4.515 ; val loss 1.390; lr 1.00e-05]
481/500: 100%|█████| 68/68 [00:00<00:00, 539.85it/s, train loss 7.477 ; val loss 1.393; lr 1.00e-05]
482/500: 100%|█████| 68/68 [00:00<00:00, 548.17it/s, train loss 4.874 ; val loss 1.393; lr 1.00e-05]
483/500: 100%|█████| 68/68 [00:00<00:00, 543.82it/s, train loss 6.127 ; val loss 1.827; lr 1.00e-05]
484/500: 100%|█████| 68/68 [00:00<00:00, 552.76it/s, train loss 5.072 ; val loss 1.492; lr 

saved model ValAcc: 1.352


486/500: 100%|█████| 68/68 [00:00<00:00, 504.01it/s, train loss 1.964 ; val loss 1.440; lr 1.00e-05]
487/500: 100%|█████| 68/68 [00:00<00:00, 548.51it/s, train loss 2.213 ; val loss 1.358; lr 1.00e-05]
488/500: 100%|████| 68/68 [00:00<00:00, 547.45it/s, train loss 10.130 ; val loss 1.733; lr 1.00e-05]
489/500: 100%|█████| 68/68 [00:00<00:00, 540.71it/s, train loss 4.123 ; val loss 1.497; lr 1.00e-05]
490/500: 100%|█████| 68/68 [00:00<00:00, 540.65it/s, train loss 2.037 ; val loss 1.388; lr 1.00e-05]
491/500: 100%|█████| 68/68 [00:00<00:00, 551.87it/s, train loss 6.367 ; val loss 1.392; lr 1.00e-05]
492/500: 100%|█████| 68/68 [00:00<00:00, 535.29it/s, train loss 4.781 ; val loss 1.410; lr 1.00e-05]
493/500: 100%|█████| 68/68 [00:00<00:00, 543.16it/s, train loss 4.829 ; val loss 1.368; lr 1.00e-05]
494/500: 100%|█████| 68/68 [00:00<00:00, 547.04it/s, train loss 7.020 ; val loss 1.543; lr 1.00e-05]
495/500: 100%|█████| 68/68 [00:00<00:00, 546.05it/s, train loss 4.647 ; val loss 1.471; lr 

In [40]:
# for j, (x, y) in enumerate(val_loader):
#     model.eval()
#     preds = model(x.float().to(device)).squeeze(dim=1)
#     print(preds, y, sep='\n')
#     print(f"{'=' * 80}")

In [41]:
test_dataset = pd.read_csv(test_data_path)
test_dataset = test_dataset[test_dataset.columns[1:]]
test_dataset = test_dataset[bestSelectedFeature['Specs']]

test_loader = DataLoader(COVID19Dataset(test_dataset.values, None), batch_size=16, shuffle=False)

model.load_state_dict(torch.load('./best_val_model.pth'))
model.eval()
preds_all = []
for x in test_loader:
    x = x.float().to(device)
    preds = model(x).detach().cpu().numpy().squeeze()
    # print(preds)
    preds_all.extend(preds)


save_pred(preds_all, "./dl_selected_original_feature.txt")

#### 使用所有(feature scaled)数据

In [42]:
same_seed(77)
dataset = pd.read_csv(train_data_path)
dataset = dataset[dataset.columns[1:]]  # remove 'id' column

feature_process_pipeline = Pipeline([('maxmin_scaler', MaxminScaler('tested_positive'))]) 
dataset = feature_process_pipeline.transform(dataset.copy())

corr_matrix = dataset.corr()
target_coor = corr_matrix['tested_positive.4'].sort_values(ascending=False)
mask = target_coor > 0.5  # 选择与target相关性大于0.5的feature参与训练
selected_feature_idx = list(target_coor.index[mask])

x_dataset = dataset[selected_feature_idx[1:]]
y_dataset = dataset.iloc[:, -1]
x_train, x_val, y_train, y_val = train_test_split(x_dataset, y_dataset, test_size=0.2, random_state=77)  # 划分测试集和验证集
print(x_train.shape, x_val.shape, y_train.shape, y_val.shape)

(2159, 34) (540, 34) (2159,) (540,)


In [43]:
x_train.head()

Unnamed: 0,tested_positive.3,tested_positive.2,tested_positive.1,tested_positive,hh_cmnty_cli.4,hh_cmnty_cli.3,nohh_cmnty_cli.4,hh_cmnty_cli.2,nohh_cmnty_cli.3,hh_cmnty_cli.1,...,anxious.4,anxious.3,anxious.2,anxious.1,anxious,work_outside_home.4,work_outside_home.3,work_outside_home.2,work_outside_home.1,work_outside_home
1611,5.720446,5.066153,3.728124,3.217874,0.030578,0.038501,0.043391,0.035927,0.050363,0.03626,...,0.349364,0.315327,0.365267,0.352714,0.315041,0.12502,0.101719,0.108746,0.125541,0.118981
1738,5.238175,6.132241,8.019026,7.42582,0.12311,0.137675,0.101318,0.140786,0.106964,0.132884,...,0.424925,0.448514,0.426557,0.41761,0.46968,0.461224,0.470054,0.477118,0.463857,0.506354
1156,14.071856,14.583333,14.583333,13.4375,0.795198,0.790392,0.810744,0.811431,0.795247,0.801507,...,0.61092,0.637113,0.735003,0.739021,0.779809,0.432208,0.454133,0.552732,0.601344,0.591493
530,12.980782,10.194185,8.715588,9.047263,0.19316,0.193023,0.159913,0.176219,0.154867,0.181626,...,0.393338,0.43717,0.466091,0.519418,0.507335,0.498154,0.50657,0.537812,0.517657,0.528373
614,2.909232,3.374709,3.514739,3.488372,0.060224,0.056132,0.050647,0.057334,0.046105,0.058849,...,0.29007,0.281768,0.294648,0.268472,0.224005,0.152179,0.156876,0.128048,0.107788,0.110595


In [44]:
x_train.columns

Index(['tested_positive.3', 'tested_positive.2', 'tested_positive.1',
       'tested_positive', 'hh_cmnty_cli.4', 'hh_cmnty_cli.3',
       'nohh_cmnty_cli.4', 'hh_cmnty_cli.2', 'nohh_cmnty_cli.3',
       'hh_cmnty_cli.1', 'nohh_cmnty_cli.2', 'hh_cmnty_cli',
       'nohh_cmnty_cli.1', 'nohh_cmnty_cli', 'ili.4', 'cli.4', 'ili.3',
       'cli.3', 'ili.2', 'cli.2', 'ili.1', 'cli.1', 'ili', 'cli', 'anxious.4',
       'anxious.3', 'anxious.2', 'anxious.1', 'anxious', 'work_outside_home.4',
       'work_outside_home.3', 'work_outside_home.2', 'work_outside_home.1',
       'work_outside_home'],
      dtype='object')

In [45]:
train_loader = DataLoader(COVID19Dataset(x_dataset.values, y_dataset.values),   # 使用全部的训练数据
                            batch_size=16, 
                            shuffle=True, 
                            num_workers=0, 
                            drop_last=True)

val_loader = DataLoader(COVID19Dataset(x_val.values, y_val.values),   # 从训练数据中拿出一部分测试（其实这部分数据也参加了训练）
                        batch_size=8, 
                        shuffle=False)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Model(input_channel=len(selected_feature_idx[1:])).to(device=device)
loss_fcn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9, weight_decay=1e-4, nesterov=True)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 200], gamma=0.1)

total_epoch = 300
for epoch in range(total_epoch):
    with tqdm(train_loader, total=len(train_loader), ncols=100) as tbar:
        tbar.set_description(f"{epoch+1}/{total_epoch}")
        for i, (x, y) in enumerate(train_loader):
            # print(x, y)
            model.train()
            x = x.float().to(device)
            preds = model(x).squeeze(dim=1)                
            loss = loss_fcn(y.float().to(device), preds)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % 100 == 0:
                tot_mse = []
                for j, (x, y) in enumerate(val_loader):
                    model.eval()
                    preds = model(x.float().to(device)).squeeze(dim=1)
                    tot_mse.append(loss_fcn(y.float().to(device), preds).detach().cpu().numpy())
                tbar.set_postfix_str(f'train loss {loss.item():.3f} ; val loss {np.mean(tot_mse):.3f}; lr {lr_scheduler.get_last_lr()[0]:.2e}')
            tbar.update(1)
            
    lr_scheduler.step()

1/300: 100%|████| 168/168 [00:00<00:00, 605.29it/s, train loss 17.000 ; val loss 8.537; lr 1.00e-04]
2/300: 100%|████| 168/168 [00:00<00:00, 596.74it/s, train loss 16.180 ; val loss 6.300; lr 1.00e-04]
3/300: 100%|█████| 168/168 [00:00<00:00, 508.32it/s, train loss 8.894 ; val loss 5.893; lr 1.00e-04]
4/300: 100%|█████| 168/168 [00:00<00:00, 603.22it/s, train loss 6.480 ; val loss 4.060; lr 1.00e-04]
5/300: 100%|█████| 168/168 [00:00<00:00, 582.98it/s, train loss 9.465 ; val loss 2.196; lr 1.00e-04]
6/300: 100%|█████| 168/168 [00:00<00:00, 601.84it/s, train loss 5.034 ; val loss 2.149; lr 1.00e-04]
7/300: 100%|█████| 168/168 [00:00<00:00, 603.06it/s, train loss 7.280 ; val loss 1.594; lr 1.00e-04]
8/300: 100%|█████| 168/168 [00:00<00:00, 589.41it/s, train loss 4.363 ; val loss 1.467; lr 1.00e-04]
9/300: 100%|█████| 168/168 [00:00<00:00, 603.29it/s, train loss 6.542 ; val loss 1.536; lr 1.00e-04]
10/300: 100%|████| 168/168 [00:00<00:00, 612.90it/s, train loss 2.678 ; val loss 1.397; lr 

#### Testing

In [46]:
# for j, (x, y) in enumerate(val_loader):
#     model.eval()
#     preds = model(x.float().to(device)).squeeze(dim=1)
#     print(preds, y, sep='\n')
#     print(f"{'=' * 80}")

In [47]:
test_dataset = pd.read_csv(test_data_path)
test_dataset = test_dataset[test_dataset.columns[1:]]
test_dataset = feature_process_pipeline.transform(test_dataset.copy())

test_dataset = test_dataset[selected_feature_idx[1:]]
test_loader = DataLoader(COVID19Dataset(test_dataset.values, None), batch_size=16, shuffle=False)

model.eval()
preds_all = []
for x in test_loader:
    x = x.float().to(device)
    preds = model(x).detach().cpu().numpy().squeeze()
    # print(preds)
    preds_all.extend(preds)

save_pred(preds_all, "./dl_selected_maxmin_normalized_feature.txt")