In [1]:
import os
import numpy as np
import pandas as pd


import torch 
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms as tfs
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader,random_split,TensorDataset

In [2]:
df = pd.read_pickle("../features.pkl")

features_ori = np.zeros([3688,384],dtype=np.float64)
for i,f in enumerate(df['feature']):
    features_ori[i] = f

In [3]:
len(df['feature'])

3688

In [4]:
features_zero = (features_ori-features_ori.mean(axis=0))/(features_ori.std(axis=0)+0.000001)
features = features_zero[:,:]

fea_dim = len(features[0])
print(fea_dim)
MyModel =  nn.Sequential(
            nn.Linear(fea_dim,32),
            nn.ReLU(),
            nn.Linear(32,64),
            nn.ReLU(),
            nn.Linear(64,1),
        )




384


In [5]:

dataset = TensorDataset(torch.tensor(features,dtype=torch.float),torch.tensor(df['label'].values.astype(np.int)))
train_size = int(len(dataset)*0.7)
val_size = (len(dataset) - train_size)

test_size = len(dataset) - train_size- val_size
train_set,val_set,test_set = random_split(dataset,[train_size,val_size,test_size])

train_loader = DataLoader(train_set,batch_size=128,shuffle=True)
val_loader = DataLoader(val_set,batch_size=64,shuffle=False)
test_loader = DataLoader(test_set,batch_size=64,shuffle=False)

In [6]:

def train_epoch(model,data_loader,criterion,optimizer,device):
    
    # 训练
    model.train()
    model.to(device)
    criterion = criterion.to(device)
#     optimizer = optim.Adam(model.parameters(),lr = 0.01)
    for i,(fea,target) in enumerate(data_loader):
        
        optimizer.zero_grad()
        
        fea = fea.to(device)
        target = target.to(device)
        
        out = model(fea).squeeze()
        # 可能有问题
        loss = criterion(out,target.float())
        loss.backward()
        optimizer.step()
    print('finished 1 train epoch')
        
def validate(model,data_loader,device,show=False):
    correct = 0.
    total_num = 0.
    
    model.eval()
    model.to(device)
    for i,(img,target) in enumerate(data_loader):
        img = img.to(device)
        target = target.to(device)
        out = model(img).squeeze()
        
        correct += torch.sum((torch.round(nn.Sigmoid()(out))==target)).cpu().item()
        total_num += len(target)
        
        if show:
            print(nn.Sigmoid()(out))
            print(target)
            
    print(correct,total_num)
    print('accuracy: %f\n'%(correct/total_num))
    return correct/total_num
        
    
def train(model,train_loader,test_loader,criterion,optimizer,epoch_num=1, device='cpu'):
    best = 0.
    for i in range(epoch_num):
        train_epoch(model,train_loader,criterion,optimizer,device)
        print('epoch %d'%i)
        acc = validate(model,test_loader,device)
        if acc > best:
            best = acc
#             torch.save(model,'./best_model_epoch%d_acc_%.3f.pkl'%(i,best))
#     torch.save(model,'./last_epoch.pkl')


device = torch.device('cuda:0')
model = MyModel
optimizer = optim.Adam(model.parameters(),lr = 0.001)
criterion = torch.nn.BCEWithLogitsLoss()



In [7]:
train(model,train_loader,val_loader,criterion,optimizer,200,device)

finished 1 train epoch
epoch 0
735.0 1107.0
accuracy: 0.663957

finished 1 train epoch
epoch 1
734.0 1107.0
accuracy: 0.663053

finished 1 train epoch
epoch 2
734.0 1107.0
accuracy: 0.663053

finished 1 train epoch
epoch 3
726.0 1107.0
accuracy: 0.655827

finished 1 train epoch
epoch 4
723.0 1107.0
accuracy: 0.653117

finished 1 train epoch
epoch 5
717.0 1107.0
accuracy: 0.647696

finished 1 train epoch
epoch 6
694.0 1107.0
accuracy: 0.626920

finished 1 train epoch
epoch 7
686.0 1107.0
accuracy: 0.619693

finished 1 train epoch
epoch 8
688.0 1107.0
accuracy: 0.621500

finished 1 train epoch
epoch 9
665.0 1107.0
accuracy: 0.600723

finished 1 train epoch
epoch 10
672.0 1107.0
accuracy: 0.607046

finished 1 train epoch
epoch 11
687.0 1107.0
accuracy: 0.620596

finished 1 train epoch
epoch 12
663.0 1107.0
accuracy: 0.598916

finished 1 train epoch
epoch 13
664.0 1107.0
accuracy: 0.599819

finished 1 train epoch
epoch 14
665.0 1107.0
accuracy: 0.600723

finished 1 train epoch
epoch 15
663

finished 1 train epoch
epoch 127
665.0 1107.0
accuracy: 0.600723

finished 1 train epoch
epoch 128
666.0 1107.0
accuracy: 0.601626

finished 1 train epoch
epoch 129
667.0 1107.0
accuracy: 0.602529

finished 1 train epoch
epoch 130
668.0 1107.0
accuracy: 0.603433

finished 1 train epoch
epoch 131
663.0 1107.0
accuracy: 0.598916

finished 1 train epoch
epoch 132
665.0 1107.0
accuracy: 0.600723

finished 1 train epoch
epoch 133
665.0 1107.0
accuracy: 0.600723

finished 1 train epoch
epoch 134
665.0 1107.0
accuracy: 0.600723

finished 1 train epoch
epoch 135
667.0 1107.0
accuracy: 0.602529

finished 1 train epoch
epoch 136
667.0 1107.0
accuracy: 0.602529

finished 1 train epoch
epoch 137
668.0 1107.0
accuracy: 0.603433

finished 1 train epoch
epoch 138
667.0 1107.0
accuracy: 0.602529

finished 1 train epoch
epoch 139
666.0 1107.0
accuracy: 0.601626

finished 1 train epoch
epoch 140
667.0 1107.0
accuracy: 0.602529

finished 1 train epoch
epoch 141
667.0 1107.0
accuracy: 0.602529

finished 1

In [58]:
validate(model,val_loader,device,show=True)

tensor([3.0363e-01, 1.1769e-05, 2.8642e-08, 1.0000e+00, 1.3245e-13, 2.6972e-17,
        6.1149e-08, 9.8163e-01, 1.0000e+00, 1.3167e-01, 7.2368e-01, 1.5811e-02,
        3.8579e-09, 9.3560e-11, 1.0000e+00, 9.9996e-01, 1.1236e-10, 1.0000e+00,
        9.9996e-01, 1.0000e+00, 4.3980e-01, 3.8597e-01, 1.1731e-22, 9.9995e-01,
        1.6373e-03, 1.0535e-15, 1.0000e+00, 2.9101e-08, 1.7422e-08, 4.1866e-03,
        6.2611e-10, 6.7406e-01, 7.4260e-12, 1.0000e+00, 1.8196e-18, 7.8738e-16,
        1.3146e-11, 7.8760e-01, 1.9414e-03, 4.0261e-01, 5.8733e-05, 2.9760e-06,
        2.0073e-03, 2.7675e-03, 1.0000e+00, 9.8660e-01, 7.7495e-07, 7.2315e-09,
        1.0000e+00, 8.2817e-02, 9.9956e-01, 2.7288e-24, 8.9131e-12, 1.0000e+00,
        5.9866e-10, 1.4928e-08, 4.2155e-10, 9.4661e-13, 4.8067e-02, 2.5203e-24,
        5.6830e-09, 1.9579e-14, 2.7072e-01, 4.4020e-06], device='cuda:0',
       grad_fn=<SigmoidBackward>)
tensor([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
        0, 1

0.5998193315266486

In [35]:
df['label'].sum()/3689

0.3347790729194904

In [123]:
features[:2]

array([[ 7.375495e-02,  1.222110e-03,  7.253284e-02,  1.930000e+02,
         2.570000e+02,  5.142023e-03,  4.413058e-06,  4.049791e-03,
         4.490844e-05,  6.731100e-03,  5.514662e+00,  4.483424e+01,
        -1.208101e+00, -2.961012e+01,  2.840202e+01,  4.040000e+02,
         1.930000e+02, -1.001011e+01,  1.116773e-02, -1.277413e+01,
         1.781583e+01,  4.513614e+00, -1.168356e+00,  5.066848e+00,
         5.941076e+00, -1.710257e+01,  2.304364e+01,  6.400000e+01,
         3.110000e+02, -3.466938e+00, -1.350412e-02, -1.246681e-01,
         1.319619e+01,  4.115194e+00, -7.652906e-01,  3.820773e+00,
         1.214566e+01, -1.795498e+01,  3.010064e+01,  3.830000e+02,
         3.060000e+02, -3.996887e+00, -1.041718e-02, -1.418636e+00,
         3.018254e+01,  5.692740e+00,  1.423583e-01,  2.692385e+00,
         7.340120e+00, -2.195029e+01,  2.929041e+01,  2.700000e+02,
         3.250000e+02, -5.176172e+00, -1.876240e-02, -5.324774e-01,
         2.430781e+01,  5.614697e+00, -6.989497e

AttributeError: 'Sequential' object has no attribute 'layer'

In [165]:
features[:10,15]

array([404., 385., 178.,   4., 462., 363., 131., 378.,  88., 348.])

541    0
570    0
514    0
534    0
522    0
524    0
539    0
552    0
590    0
521    1
Name: label, dtype: object

In [7]:
train_size

2581

In [8]:
val_size

1107

In [13]:
len(df[df["label"]==1])

1235

In [14]:
len(df[df["label"]==0])

2453

In [15]:
1235+2453

3688