##画像を20種類に分類するタスクです
https://signate.jp/competitions/108

In [16]:
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import (Dataset, 
                              DataLoader,
                              TensorDataset)
import tqdm

In [126]:
label_data=pd.read_csv("label_master.tsv",sep="\t",header=0)
label_data

Unnamed: 0,label_id,label_name
0,0,aquatic_mammals
1,1,fish
2,2,flowers
3,3,food_containers
4,4,fruit_and_vegetables
5,5,household_electrical_devices
6,6,household_furniture
7,7,insects
8,8,large_carnivores
9,9,large_man-made_outdoor_things


In [18]:
train_label=pd.read_csv("train_master.tsv",sep='\t',header=0)
train_label.head()

Unnamed: 0,file_name,label_id
0,train_00000.png,11
1,train_00001.png,15
2,train_00002.png,4
3,train_00003.png,14
4,train_00004.png,1


In [19]:
#train_dataをtrain_dataとval_dataに分ける（提出用のテストデータは教師データがない）

import shutil,os

modes=["train", "val"]
os.makedirs(r"C:\Users\heste\machine_learning\pytorch\ImagePractice\val",exist_ok=True)
dgpath=r"C:\Users\heste\machine_learning\pytorch\ImagePractice"

for i in range(40000,50000):
    img_path="train_{0:05d}.png".format(i)
    a=os.path.join(dgpath,modes[0],img_path)
    b=os.path.join(dgpath,modes[1],img_path)
    shutil.move(a,b)
    

In [20]:
#trainとvalに0-19のラベルフォルダーを作る

lalabel=list(train_label['label_id'])
for mode in modes:   
    for i in range(20):
        path=r"{}\{}".format(mode,i)
        os.makedirs(path,exist_ok=True)
        

In [21]:
#画像をラベルフォルダーに格納する


dgpath=r"C:\Users\heste\machine_learning\pytorch\ImagePractice"
for mode in modes:
    if mode=='train':
        for i in range(40000):
            img_path="train_{0:05d}.png".format(i)
            label_path="{}".format(lalabel[i])
            first_=os.path.join(dgpath, mode, img_path)
            next_=os.path.join(dgpath, mode, label_path, img_path)
            shutil.move(first_, next_)
    else:
        for i in range(40000,50000):
            img_path="train_{0:05d}.png".format(i)
            label_path="{}".format(lalabel[i])
            first_=os.path.join(dgpath, mode, img_path)
            next_=os.path.join(dgpath, mode, label_path, img_path)
            shutil.move(first_, next_)

In [31]:
#画像のデータセットを作る

from torchvision.datasets import ImageFolder
from torchvision import transforms
import sys,os
sys.path.append(os.pardir)

# ImageFolder関数を使用してDatasetを作成する
train_imgs = ImageFolder(
    "train/",
    transform=transforms.Compose([
      transforms.ToTensor(),
      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
))
val_imgs = ImageFolder(
    "val/",
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
))

# DataLoaderを作成
train_loader = DataLoader(
    train_imgs, batch_size=40, shuffle=True)
val_loader = DataLoader(
    val_imgs, batch_size=40, shuffle=False)

In [80]:
ll=iter(train_loader)
xp=next(ll)
xp[0].type()

'torch.FloatTensor'

In [26]:
from torchvision import models
print(models.resnet18())

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [27]:
#モデルの作成
from torchvision import models

# 事前学習済みのresnet18をロード
net = models.resnet18(pretrained=True)

# すべてのパラメータを微分対象外にする
for p in net.parameters():
    p.requires_grad=False
    
# 最後の線形層を付け替える
fc_input_dim = net.fc.in_features
net.fc = nn.Linear(fc_input_dim, 20)

①学習時にconv層毎回計算するタイプ

In [33]:
#学習の補助関数作成
def eval_net(net, data_loader, device="cpu"):
    # DropoutやBatchNormを無効化
    net.eval()
    ys = []
    ypreds = []
    for x, y in data_loader:
        # toメソッドで計算を実行するデバイスに転送する
        x = x.to(device)
        y = y.to(device)
        # 確率が最大のクラスを予測
        # ここではforward（推論）の計算だけなので微分を保存しない
        with torch.no_grad():
            _, y_pred = net(x).max(1)
        ys.append(y)
        ypreds.append(y_pred)
    # ミニバッチごとの予測結果などを1つにまとめる
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    # 予測精度を計算
    acc = (ys == ypreds).float().sum() / len(ys)
    return acc.item()

def train_net(net, train_loader, test_loader,
              only_fc=True,
              optimizer_cls=optim.Adam,
              loss_fn=nn.CrossEntropyLoss(),
              n_iter=10, device="cpu"):
    train_losses = []
    train_acc = []
    val_acc = []
    if only_fc:
        # 最後の線形層のパラメータのみを、
        # optimizerに渡す
        optimizer = optimizer_cls(net.fc.parameters())
    else:
        optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        # ネットワークを訓練モードにする
        net.train()
        n = 0
        n_acc = 0
        # 非常に時間がかかるのでtqdmを使用してプログレスバーを出す
        for i, (xx, yy) in tqdm.tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = xx.to(device)
            yy = yy.to(device)
            h = net(xx)
            loss = loss_fn(h, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            n += len(xx)
            _, y_pred = h.max(1)
            n_acc += (yy == y_pred).float().sum().item()
        train_losses.append(running_loss / i)
        # 訓練データの予測精度
        train_acc.append(n_acc / n)
        # 検証データの予測精度
        val_acc.append(eval_net(net, test_loader, device))
        # このepochでの結果を表示
        print(epoch, train_losses[-1], train_acc[-1], val_acc[-1], flush=True)

In [None]:
# ネットワークの全パラメータをGPUに転送
net#.to("cuda:0")

# 訓練を実行
train_net(net, train_loader, val_loader, n_iter=20)# device="cuda:0")

①の学習結果、CrossEntropyLoss=2.14, train_acc=0.35, val=acc=0.36 程度でした

②conv層の計算をあらかじめしておいて、学習時は全結合層のみ計算するタイプ

In [None]:
class IdentityLayer(nn.Module):
    def forward(self, x):
        return x
    
cal_net = models.resnet18(pretrained=True)
for p in cal_net.parameters():
    p.requires_grad=False
cal_net.fc = IdentityLayer()


In [None]:
#ResNet18のconv層による計算をあらかじめして、それを画像データの代わりに特徴量とする
def pre_eval(unet, img_tensor, device="cpu"):
    unet.eval()
    img_tensor=img_tensor.to(device)
    unet.to(device)
    pred_y = unet(img_tensor)
    return pred_y.detach()
    
  

In [137]:
pd.read_csv("sample_submit.csv",header=None)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,test_00000.png,0.036151,0.014607,0.034591,0.079934,0.053778,0.011690,0.039329,0.079885,0.047027,...,0.062593,0.037947,0.080989,0.066377,0.020849,0.057315,0.056351,0.033405,0.078505,0.053430
1,test_00001.png,0.033393,0.037818,0.021027,0.043661,0.082885,0.041105,0.069309,0.059735,0.023879,...,0.077569,0.053666,0.044468,0.040490,0.090077,0.050215,0.052718,0.067940,0.030642,0.078394
2,test_00002.png,0.098902,0.047091,0.022525,0.059422,0.038273,0.011877,0.086344,0.054882,0.046612,...,0.028493,0.027183,0.092749,0.076549,0.026914,0.099406,0.032409,0.038710,0.000611,0.060666
3,test_00003.png,0.054705,0.080344,0.047048,0.083701,0.043293,0.068830,0.026523,0.076391,0.008649,...,0.020802,0.054486,0.065079,0.060174,0.045692,0.052243,0.006735,0.036344,0.070731,0.026432
4,test_00004.png,0.087472,0.045188,0.091187,0.003265,0.110322,0.102048,0.048273,0.080005,0.050142,...,0.096636,0.038774,0.050342,0.044085,0.057429,0.023512,0.018324,0.002596,0.030469,0.001106
5,test_00005.png,0.024683,0.065744,0.077615,0.046113,0.059905,0.099605,0.043327,0.006000,0.011979,...,0.042550,0.022934,0.057004,0.020315,0.046078,0.086747,0.005502,0.094937,0.049692,0.087786
6,test_00006.png,0.075007,0.033773,0.080307,0.044577,0.038601,0.028002,0.077726,0.008767,0.086582,...,0.004459,0.064445,0.063074,0.088214,0.023182,0.056242,0.056795,0.058184,0.019113,0.022420
7,test_00007.png,0.072468,0.039549,0.021179,0.014118,0.098646,0.046889,0.006402,0.089835,0.067446,...,0.092106,0.057349,0.063792,0.082661,0.004543,0.027652,0.066910,0.058867,0.023153,0.040974
8,test_00008.png,0.019808,0.057558,0.050134,0.015984,0.080800,0.043501,0.069915,0.003914,0.075305,...,0.040010,0.068089,0.056534,0.038834,0.044472,0.076316,0.063137,0.074565,0.032309,0.042868
9,test_00009.png,0.010335,0.046566,0.028081,0.075470,0.034927,0.078965,0.073302,0.054055,0.042573,...,0.057014,0.065897,0.037600,0.058659,0.034870,0.079438,0.073363,0.064722,0.000482,0.013864


In [121]:
#testデータに対して上記のようなデータフレームを作る
from PIL import Image

p_fn=nn.Softmax()

array_2d=[]

for i in range(10000):
    im_path="test_{0:05d}.png".format(i)
    path=r"C:\Users\heste\machine_learning\pytorch\ImagePractice\test\test_{0:05d}.png".format(i)
    
    im=Image.open(path)
    p=np.asarray(im, dtype=float)
    #netはバッチでデータを受け取るので一番外側にリストを作る
    p=torch.tensor([p.transpose(2,0,1)], requires_grad=False)
    #tensorはdouble型になっているのでfloat型に直す
    p=p.float()
    
    value=list(p_fn(net(p)).detach().numpy().squeeze())
    value.insert(0,im_path)
    array_2d.append(value)

sub_df=pd.DataFrame(array_2d)




In [136]:
sub_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,test_00000.png,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,8.866682e-15,0.0,0.0,0.0,0.0,0.0
1,test_00001.png,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.978292e-31,0.0,0.0,2.570496e-16,0.0,0.0
2,test_00002.png,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.231394e-42,0.0,0.0,9.133869e-32,0.0,0.0
3,test_00003.png,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.3920119999999996e-38,0.0,0.0,0.0,0.0,0.0
4,test_00004.png,1.0,2.757135e-35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [138]:
#submitファイル作成
sub_df.to_csv('2020-03-36submit.csv',header=None,index=None)