In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [1]:
import torch
torch.cuda.is_available()

False

In [0]:
# 各自のパスを指定
cd "/content/gdrive/My Drive/ObjectDetection/"

In [2]:
import os

In [3]:
!ls

[31mObjectDetection.ipynb[m[m [34mdata[m[m                  [34mutils[m[m
[34m__pycache__[m[m           [34mlayers[m[m
[34mcollect[m[m               ssd.py


In [3]:
from data import *
from utils.augmentations import SSDAugmentation
from layers.modules import MultiBoxLoss
from ssd import build_ssd
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torch.utils.data as data
import numpy as np

In [4]:
batch_size = 12
gpu = True
lr_ = 5e-4
weight_decay = 5e-4
gamma_ = 0.1

In [5]:
if torch.cuda.is_available():
    if gpu:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    if not gpu:
        torch.set_default_tensor_type('torch.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')

In [6]:
cfg = voc
dataset = VOCDetection(root=VOC_ROOT,
                       transform=SSDAugmentation(cfg['min_dim'],
                                                                    MEANS))

In [7]:
ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = ssd_net.to(device)

  self.priors = Variable(self.priorbox.forward(), volatile=True)


In [8]:
vgg_weights = torch.load('weights/vgg16.pth')
ssd_net.vgg.load_state_dict(vgg_weights)

FileNotFoundError: [Errno 2] No such file or directory: 'weights/vgg16.pth'

In [0]:
print(net)

In [0]:
if gpu:
    net = torch.nn.DataParallel(ssd_net)
    cudnn.benchmark = True

In [0]:
def adjust_learning_rate(optimizer, gamma, step):
    lr = lr_ * (gamma ** (step))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def xavier(param):
    init.xavier_uniform_(param)

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        m.bias.data.zero_()

In [0]:
# 新規モデル学習時は追加ネットワークの畳み込み、
# 位置推定、クラス分類の畳み込みパラメータを初期化する
ssd_net.extras.apply(weights_init)
ssd_net.loc.apply(weights_init)
ssd_net.conf.apply(weights_init)

In [0]:
# 損失関数の設定
criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                                     False, gpu)

In [0]:
# 最適化パラメータの設定
optimizer = optim.SGD(net.parameters(), lr=lr_, momentum=0.9,
                      weight_decay=weight_decay)

In [0]:
# 訓練モード
net.train()
loc_loss = 0
conf_loss = 0
epoch = 0

epoch_size = len(dataset) // batch_size
print('dataset_size', len(dataset))
print('epoch_size', epoch_size)
print('Training SSD on:', dataset.name)

step_index = 0

In [0]:
# 訓練データのデータローダ作成
data_loader = data.DataLoader(dataset, batch_size,
                              num_workers=4,
                              shuffle=True, 
                              collate_fn=detection_collate,
                              pin_memory=True)

In [0]:
# 学習の開始
batch_iterator = None
# iterationでループして、cfg['max_iter']まで学習する
for iteration in range(0, 1000):
    # 学習開始時または1epoch終了後にdata_loaderから訓練データをロードする
    if (not batch_iterator) or (iteration % epoch_size ==0):
        batch_iterator = iter(data_loader)
        loc_loss = 0
        conf_loss = 0
        epoch += 1

    if iteration in cfg['lr_steps']:
        step_index += 1
        adjust_learning_rate(optimizer, gamma_, step_index)
        
    # load train data
    # バッチサイズ分の訓練データをload
    images, targets = next(batch_iterator)
    
    # 画像をGPUに転送
    images = images.to(device)
    # アノテーションをGPUに転送    
    targets = [ann.to(device) for ann in targets]

    # forward
    t0 = time.time()
    # 順伝播の計算
    out = net(images)
    # 勾配の初期化
    optimizer.zero_grad()
    # 損失関数の計算
    loss_l, loss_c = criterion(out, targets)
    loss = loss_l + loss_c
    # 勾配の計算
    loss.backward()
    # パラメータの更新
    optimizer.step()
    t1 = time.time()
    # 損失関数の更新
    loc_loss += loss_l.item()
    conf_loss += loss_c.item()
    
    #ログの出力
    if iteration % 10 == 0:
        print('timer: %.4f sec.' % (t1 - t0))
        print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')

# 学習済みモデルの保存
torch.save(ssd_net.state_dict(),
                'weights/ssd.pth')

In [0]:
!wget https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth

In [0]:
ssd_net_test = build_ssd('test', cfg['min_dim'], cfg['num_classes'])
net_test = ssd_net_test.to(device)
net_test.load_weights('ssd300_mAP_77.43_v2.pth')
#net_test.load_weights('weights/ssd.pth')

In [0]:
%matplotlib inline
from matplotlib import pyplot as plt

from data import VOCDetection, VOC_ROOT, VOCAnnotationTransform
# here we specify year (07 or 12) and dataset ('test', 'val', 'train') 
testset = VOCDetection(VOC_ROOT, [('2007', 'test')], None, VOCAnnotationTransform())
# クラスVOCDetectionはindexをキーに画像を取得する
img_id = 2586
image = testset.pull_image(img_id)

# cv2のチャンネルの順番はBGR（青、緑、赤）なので、RGB（赤、緑、青）に入れ替える
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# View the sampled input image before transform
plt.figure(figsize=(10,10))
plt.imshow(rgb_image)
plt.axis('off')
plt.show()

In [0]:
# 画像のサイズを300×300に変更
x = cv2.resize(image, (300, 300)).astype(np.float32)
# 平均のRGBを引く
x -= (104.0, 117.0, 123.0)
x = x.astype(np.float32)
# cv2のチャンネルの順番はBGR（青、緑、赤）なので、RGB（赤、緑、青）に入れ替える
x = x[:, :, ::-1].copy()
plt.axis('off')
plt.imshow(x)
# HWCの形状[300, 300, 3]をCHWの形状[3, 300,300]に変更
x = torch.from_numpy(x).permute(2, 0, 1)

In [0]:
# 0次元目にバッチサイズの次元を追加
# [3, 300, 300]　→　[1, 3, 300, 300] 
xx = x.unsqueeze(0)
# GPUへの転送
xx = xx.to(device)
# dropoutを実行しない
net_test.eval()
# 計算グラフを作成しない
with torch.no_grad():
  # 順伝播を実行し、推論結果を出力
  y = net_test(xx)

In [0]:
from data import VOC_CLASSES as labels
top_k=10

plt.figure(figsize=(10,10))
colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
plt.axis('off')
plt.imshow(rgb_image)  # plot the image for matplotlib
currentAxis = plt.gca()
# 推論結果をdetectionsに格納
detections = y.data
# scale each detection back up to the image
scale = torch.Tensor(rgb_image.shape[1::-1]).repeat(2)
# クラスiでループ
for i in range(detections.size(1)):
    j = 0
# 確信度confが0.6以上のボックスを表示
# jは確信度上位200件のボックスのインデックス
# detections[0,i,j]は[conf,xmin,ymin,xmax,ymax]の形状
    while detections[0,i,j,0] >= 0.6:
        score = detections[0,i,j,0]
        label_name = labels[i-1]
        display_txt = '%s: %.2f'%(label_name, score)
        pt = (detections[0,i,j,1:]*scale).cpu().numpy()
        coords = (pt[0], pt[1]), pt[2]-pt[0]+1, pt[3]-pt[1]+1
        color = colors[i]
        currentAxis.add_patch(plt.Rectangle(*coords, fill=False, 
                                          edgecolor=color, linewidth=2))
        currentAxis.text(pt[0], pt[1], display_txt, bbox={'facecolor':color, 'alpha':0.5})
        j+=1