In [75]:
from __future__ import annotations
from typing import Final as const, Any

In [76]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn
#from torchvision.datasets import CocoDetection
from torchvision.datasets import VOCDetection
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import functional as F

In [77]:
from glob import glob

## CNNモデルの作成

In [78]:
voc_type_train_datas: const[str] = './pascal-voc--learning-daatset'

In [79]:
dataset = VOCDetection(
    voc_type_train_datas, 
    year='2012', 
    image_set='train', 
    download=True,
    transform=F.to_tensor
)

data_loader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=4)


Using downloaded and verified file: ./pascal-voc--learning-daatset/VOCtrainval_11-May-2012.tar
Extracting ./pascal-voc--learning-daatset/VOCtrainval_11-May-2012.tar to ./pascal-voc--learning-daatset


In [80]:
#モデルの設定

# モデルの定義
model = fasterrcnn_resnet50_fpn(pretrained=True)

# 分類器の出力数をチェックマークのクラス数に合わせて変更
num_classes = 3  # 1 class (checkmark) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [81]:
#モデルの設定

# モデルの定義
model = fasterrcnn_resnet50_fpn(pretrained=True)

# 分類器の出力数をチェックマークのクラス数に合わせて変更
num_classes = 2  # 1 class (checkmark) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [82]:
#学習の設定
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

num_epochs = 10

In [86]:
#学習の実行
for epoch in range(num_epochs):
    print(f'Epoch : {epoch}')
    model.train()
    i = 0    
    for imgs, targets in data_loader:
        imgs = list(img.to(device) for img in imgs)
        # FIXME:多分データセットの読み方が間違ってる
        #targets = [{k: v for k, v in t.items()} for t in targets]
        loss_dict = model(imgs, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        i += 1
        if i % 50 == 0:
            print(f"Iteration #{i} loss: {losses.item()}")

Epoch : 0
annotation


TypeError: string indices must be integers

In [None]:
# 訓練済みモデルの保存
torch.save(model.state_dict(), "path_to_save_your_model.pth")

## モデルを利用したチェックマークの検出

In [None]:
#保存したモデルの読み込み
model_path: const[str] = "path_to_your_model.pth"
model = fasterrcnn_resnet50_fpn(pretrained=False) #Fast-RCNNを指定
model.load_state_dict(torch.load(model_path))

#学習モードから評価モードへの切り替え
model.eval()

In [None]:
from PIL import Image
def predict(path: str) -> Any:
    """モデルに画像を入力し、予測結果を返す関数"""
    img = Image(path).open()
    img = F.to_tensor(img)

    #推論モードなので、学習は不要。誤差逆伝搬をしない
    with torch.no_grad():
        prediction = model(img)
    
    return prediction


In [None]:
test = predict("./sheet.jpg")