In [63]:
import torch
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import time
 
import numpy as np
import matplotlib.pyplot as plt
import os
 
image_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
}

In [64]:
import pandas as pd
import numpy as np
from glob import glob
from tqdm import tqdm_notebook
from skimage import io

In [65]:
TRAIN = pd.read_csv("TRAIN.csv", index_col=0)
VAL = pd.read_csv("VAL.csv", index_col=0)
TEST = pd.read_csv("TEST.csv", index_col=0)

In [66]:
TRAIN.index

Index(['ISIC_0010569', 'ISIC_0066469', 'ISIC_0069513',
       'ISIC_0013758_downsampled', 'ISIC_0068400', 'ISIC_0027268',
       'ISIC_0068116', 'ISIC_0034084', 'ISIC_0067254', 'ISIC_0028725',
       ...
       'ISIC_0031091', 'ISIC_0025045', 'ISIC_0060924', 'ISIC_0072727',
       'ISIC_0000407_downsampled', 'ISIC_0026569', 'ISIC_0066910',
       'ISIC_0061182', 'ISIC_0033768', 'ISIC_0027568'],
      dtype='object', name='image', length=14943)

In [77]:
data_dir = "/Users/tangyujin/Desktop/09-半监督/03_data/ISIC2019 384x384 jpeg/train/"
TRAIN_dir = "/Users/tangyujin/Desktop/09-半监督/03_data/ISIC2019 384x384 jpeg/data/train/"
TEST_dir = "/Users/tangyujin/Desktop/09-半监督/03_data/ISIC2019 384x384 jpeg/data/test/"
VAL_dir = "/Users/tangyujin/Desktop/09-半监督/03_data/ISIC2019 384x384 jpeg/data/val/"

In [69]:
import os
import imageio

In [53]:
# 对train,val,test,需要修改三处地方
def make_class_image(class_name):
    class_df = TRAIN[TRAIN[class_name]==1.0]
    class_dir = TRAIN_dir + class_name
    if not os.path.exists(class_dir):
        os.mkdir(class_dir)
    for name in tqdm_notebook(class_df.index):
        img = io.imread(data_dir+name+".jpg")
        imageio.imsave(class_dir+ '/'+name+".jpg", img)
make_class_image("MEL")       
make_class_image("BCC")
make_class_image("SCC")
make_class_image("NV")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/3165 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/2326 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/440 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/9012 [00:00<?, ?it/s]

In [54]:
def make_class_image(class_name):
    class_df = VAL[VAL[class_name]==1.0]
    class_dir = VAL_dir + class_name
    if not os.path.exists(class_dir):
        os.mkdir(class_dir)
    for name in tqdm_notebook(class_df.index):
        img = io.imread(data_dir+name+".jpg")
        imageio.imsave(class_dir+ '/'+name+".jpg", img)
make_class_image("MEL")       
make_class_image("BCC")
make_class_image("SCC")
make_class_image("NV")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/461 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/339 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/64 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/1313 [00:00<?, ?it/s]

In [55]:
def make_class_image(class_name):
    class_df = TEST[TEST[class_name]==1.0]
    class_dir = TEST_dir + class_name
    if not os.path.exists(class_dir):
        os.mkdir(class_dir)
    for name in tqdm_notebook(class_df.index):
        img = io.imread(data_dir+name+".jpg")
        imageio.imsave(class_dir+ '/'+name+".jpg", img)
make_class_image("MEL")       
make_class_image("BCC")
make_class_image("SCC")
make_class_image("NV")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/896 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/658 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/124 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/2550 [00:00<?, ?it/s]

In [70]:
# numpy arrays for input and targets,input表示输入的index，target表示对应的类？即为标签
imageTrainList = []
targetTrainList = []
imageValList = []
targetValList = []
imageTestList = []
targetTestList = []

In [71]:
for name in tqdm_notebook(TRAIN.index):
    imageTrainList.append(io.imread(data_dir+name+".jpg"))# 根据index直接去读取图片
    targetTrainList.append(np.array(TRAIN.loc[name]))# 这是根据index去找csv里对应的行
    
for name in tqdm_notebook(VAL.index):
    imageValList.append(io.imread(data_dir+name+".jpg"))# 根据index直接去读取图片
    targetValList.append(np.array(VAL.loc[name]))# 这是根据index去找csv里对应的行
    
for name in tqdm_notebook(TEST.index):
    imageTestList.append(io.imread(data_dir+name+".jpg"))# 根据index直接去读取图片
    targetTestList.append(np.array(TEST.loc[name]))# 这是根据index去找csv里对应的行

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


  0%|          | 0/14943 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """


  0%|          | 0/2177 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if __name__ == '__main__':


  0%|          | 0/4228 [00:00<?, ?it/s]

In [72]:
pd.DataFrame(targetTrainList)

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
14938,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14939,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14940,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14941,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [73]:
pd.DataFrame(targetTrainList).sum()

0    3165.0
1    9012.0
2    2326.0
3       0.0
4       0.0
5       0.0
6       0.0
7     440.0
8       0.0
dtype: float64

In [78]:
# 报错的意思是需要在train文件夹下，把所有的文件按照标签名进行命名
batch_size = 64
num_classes = 10
 
data = {
    'train': datasets.ImageFolder(root=TRAIN_dir, transform=image_transforms['train']),
    'valid': datasets.ImageFolder(root=VAL_dir, transform=image_transforms['valid']),
     'test': datasets.ImageFolder(root=TEST_dir, transform=image_transforms['test'])
 
}
 

train_data_size = len(data['train'])
valid_data_size = len(data['valid'])
test_data_size = len(data['test'])
 
train_data = DataLoader(data['train'], batch_size=batch_size, shuffle=True)
valid_data = DataLoader(data['valid'], batch_size=batch_size, shuffle=True)
test_data = DataLoader(data['test'], batch_size=batch_size, shuffle=False)
 
print(train_data_size, valid_data_size,test_data_size)

14943 2177 4228


In [57]:
resnet50 = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/tangyujin/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [58]:
for param in resnet50.parameters():
    param.requires_grad = False

In [59]:
fc_inputs = resnet50.fc.in_features
resnet50.fc = nn.Sequential(
    nn.Linear(fc_inputs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, 10),
    nn.LogSoftmax(dim=1)
)

In [None]:
resnet50 = resnet50.to('cuda:0')

In [None]:
loss_func = nn.NLLLoss()
optimizer = optim.Adam(resnet50.parameters())

In [None]:
def train_and_valid(model, loss_function, optimizer, epochs=25):
    #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    history = []
    best_acc = 0.0
    best_epoch = 0
 
    for epoch in range(epochs):
        epoch_start = time.time()
        print("Epoch: {}/{}".format(epoch+1, epochs))
 
        model.train()
 
        train_loss = 0.0
        train_acc = 0.0
        valid_loss = 0.0
        valid_acc = 0.0
 
        for i, (inputs, labels) in enumerate(train_data):
            inputs = inputs.cuda()
            labels = labels.cuda()
 
            #因为这里梯度是累加的，所以每次记得清零
            optimizer.zero_grad()
 
            outputs = model(inputs)
 
            loss = loss_function(outputs, labels)
 
            loss.backward()
 
            optimizer.step()
 
            train_loss += loss.item() * inputs.size(0)
 
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))
 
            acc = torch.mean(correct_counts.type(torch.FloatTensor))
 
            train_acc += acc.item() * inputs.size(0)
 
        with torch.no_grad():
            model.eval()
 
            for j, (inputs, labels) in enumerate(valid_data):
                inputs = inputs.to(device)
                labels = labels.to(device)
 
                outputs = model(inputs)
 
                loss = loss_function(outputs, labels)
 
                valid_loss += loss.item() * inputs.size(0)
 
                ret, predictions = torch.max(outputs.data, 1)
                correct_counts = predictions.eq(labels.data.view_as(predictions))
 
                acc = torch.mean(correct_counts.type(torch.FloatTensor))
 
                valid_acc += acc.item() * inputs.size(0)
 
        avg_train_loss = train_loss/train_data_size
        avg_train_acc = train_acc/train_data_size
 
        avg_valid_loss = valid_loss/valid_data_size
        avg_valid_acc = valid_acc/valid_data_size
 
        history.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])
 
        if best_acc < avg_valid_acc:
            best_acc = avg_valid_acc
            best_epoch = epoch + 1
 
        epoch_end = time.time()
 
        print("Epoch: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation: Loss: {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(
            epoch+1, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100, epoch_end-epoch_start
        ))
        print("Best Accuracy for validation : {:.4f} at epoch {:03d}".format(best_acc, best_epoch))
 
        torch.save(model, 'models/'+dataset+'_model_'+str(epoch+1)+'.pt')
    return model, history