In [1]:
"""必要なモジュールの読み込み"""
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import os
import cv2
import torchvision
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import clip



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)












cuda


In [5]:
"""学習の実装"""
from torchvision.datasets import CocoDetection
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch

from demo.data import CustomCompose
from demo.simmim import MaskGenerator
from demo.data import MyCocoDetection

# 検出ブランチのデータセットの読み込み
transform = CustomCompose([transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                                transforms.Resize((512, 512))])

detection_dataset = MyCocoDetection(root="dataset/detection", annFile="dataset/detection/train_quadrant_enumeration_fdi.json", transforms=transform)
detection_dataloader = DataLoader(detection_dataset, batch_size=16, shuffle=True, collate_fn = lambda x: tuple(zip(*x)))


# モデルの読み込み
from demo.image_encoder import ResnetEncoder
from demo.detection import FCOSDetector
from demo.reconstruction import Recostruction

encoder = ResnetEncoder()
detector = FCOSDetector()
recostruction = Recostruction(encoder_outchannels=2048)


# 損失関数
from demo.loss import ReconstructionLoss, TextureConsistencyLoss

# オプティマイザー
import torch.optim as optim
optimizer = optim.Adam(
    [
        {"params": encoder.parameters(), "lr": 1e-4},
        {"params": detector.parameters(), "lr": 1e-4},
        {"params": recostruction.parameters(), "lr": 1e-4},
    ]
)

# 学習ループ
NUM_EPOCHS = 1000
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# マスク生成器のインスタンス化
mask_generator = MaskGenerator(batch_size=16)

# モデルをデバイスに移動
encoder.to(device)
detector.to(device)
recostruction.to(device)

for epoch in range(NUM_EPOCHS):
    encoder.train()
    detector.train()
    recostruction.train()

    total_loss = 0
    
    for images, targets in detection_dataloader:
        batch_size = 16
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # imagesをB,C,H,Wのテンソルに変換
        images = torch.stack(images, dim=0)
        print(images.shape)

        # マスク生成器のインスタンス化
        mask_generator = MaskGenerator(batch_size=batch_size)

        # imageにマスクをかける
        masks = mask_generator()
        # 画像の形状に合わせてマスクをリシェイプ
        masks = masks.unsqueeze(1)
        masks = masks.to(device)

        masked_images = images * (1 - masks)



        



        # マスクをかけた画像をencoderに通す
        features = encoder.forward(masked_images)

        # 特徴量抽出された画像を復元
        reconstructed_image = recostruction.forward(features)

        # 再構築ブランチの損失関数を計算
        reconstruction_loss = ReconstructionLoss(images, reconstructed_image)
        texture_consistency_loss = TextureConsistencyLoss(images, reconstructed_image)

        # 検出ブランチの損失関数を計算
        detections = detector.forward(images)
        detection_loss = detector(images, targets)
        detection_loss = sum(loss for loss in detection_loss.values())

        # 総損失を計算  
        total_loss = reconstruction_loss + texture_consistency_loss + detection_loss

        # 勾配を計算
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # ロスを表示
        print(f"Epoch {epoch+1}, Loss: {total_loss.item()}")







    





loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
torch.Size([16, 3, 512, 512])


AssertionError: targets should not be none when in training mode