In [25]:

#pathの指定(colab_frcnn-main直下まで)
bdd_xml="./mydatasets/xml"
bdd_img="./mydatasets/img"
test_path="../../input_with_line/20250918/C00452"

#datasetのクラス指定
dataset_class=['Car', 'Track', 'Bus', 'Ambulance', 'Motorcycle']
#表示したいラベルの色の指定
#注意！！一番最初は背景クラスを示すので(0,0,0)にする(それ以外は自由)
colors = ((0,0,0),(255,0,0),(0,255,0),(0,0,255),(100,100,100),(50,50,50))

#ハイパーパラメータの指定
epochs=10
batch_size=1
scale=416#画像のスケール設定(縦の大きさを入力)



In [26]:
import numpy as np
import pandas as pd

from PIL import Image
from glob import glob
import xml.etree.ElementTree as ET
import cv2

import torch
import torchvision
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import TensorDataset
import os


class xml2list(object):

    def __init__(self, classes):
        self.classes = classes

    def __call__(self, xml_path):

        ret = []
        xml = ET.parse(xml_path).getroot()

        boxes = []
        labels = []
        zz=0

        for zz,obj in enumerate(xml.iter('object')):

            label = obj.find('name').text

            ##指定クラスのみ

            if label in self.classes :
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(self.classes.index(label))
            else:
                continue

        num_objs = zz +1

        anno = {'bboxes':boxes, 'labels':labels}

        return anno,num_objs


In [27]:
class MyDataset(torch.utils.data.Dataset):

        def __init__(self,image_dir,xml_paths,scale,classes):

            super().__init__()
            self.image_dir = image_dir
            self.xml_paths = xml_paths
            self.image_ids = sorted(glob('{}/*'.format(xml_paths)))
            self.scale=scale
            self.classes=classes

        def __getitem__(self, index):

            transform = transforms.Compose([
                                            transforms.ToTensor()
            ])

            # 入力画像の読み込み
            #image_id=self.image_ids[index].split("/")[-1].split(".")[0]
            #image = Image.open(f"{self.image_dir}/{image_id}.jpg")
            filename = os.path.basename(self.image_ids[index])
            image_id = os.path.splitext(filename)[0]

            image = Image.open(os.path.join(self.image_dir, image_id + ".jpg"))

            #画像のスケール変換
            t_scale_tate=self.scale ##目標のスケール(縦)
            #縮小比を計算
            ratio=t_scale_tate/image.size[1]
            ##目標横スケールを計算
            t_scale_yoko=image.size[0]*ratio
            t_scale_yoko=int(t_scale_yoko)

            #print('縮小前:',image.size)
            #print('縮小率:',ratio)
            #リサイズ
            image = image.resize((t_scale_yoko,t_scale_tate))
            #print('縮小後:',image.size)

            image = transform(image)

            transform_anno = xml2list(self.classes)
            path_xml=f'{self.xml_paths}/{image_id}.xml'


            annotations, _ = transform_anno(path_xml) # obje_num is not needed here

            boxes_list = annotations['bboxes']
            labels_list = annotations['labels']

            # Handle cases where no bounding boxes are found for the specified classes
            if not boxes_list:
                boxes = torch.zeros((0, 4), dtype=torch.float32)
                labels = torch.zeros((0,), dtype=torch.int64)
                area = torch.zeros((0,), dtype=torch.float32)
                iscrowd = torch.zeros((0,), dtype=torch.int64)
            else:
                boxes = torch.as_tensor(boxes_list, dtype=torch.int64)
                labels = torch.as_tensor(labels_list, dtype=torch.int64)

                #bboxの縮小
                #print('縮小前:',boxes)
                boxes = boxes * ratio
                #print('縮小後:',boxes)

                area = (boxes[:, 3]-boxes[:, 1]) * (boxes[:, 2]-boxes[:, 0])
                area = torch.as_tensor(area, dtype=torch.float32)

                # iscrowd should match the number of actual objects found
                iscrowd = torch.zeros((len(labels),), dtype=torch.int64)

            target = {}
            target["boxes"] = boxes
            target["labels"] = labels+1
            target["image_id"] = torch.tensor([index])
            target["area"] = area
            target["iscrowd"] = iscrowd
            return image, target,image_id

        def __len__(self):

            return len(self.image_ids)


In [28]:

def dataloader (data,dataset_class,batch_size,scale=720):
    xml_paths=data[0]
    image_dir1=data[1]
    dataset = MyDataset(image_dir1,xml_paths,scale,dataset_class)

    #データのロード
    torch.manual_seed(2020)
    def collate_fn(batch):
        return tuple(zip(*batch))

    train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,collate_fn=collate_fn)


    return train_dataloader


In [29]:

def model ():
    #モデルの定義

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes=len(dataset_class)+1
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model



In [9]:
data_ALL=[bdd_xml,bdd_img]
train_dataloader=dataloader(data_ALL,dataset_class,batch_size,scale)

model=model()
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = epochs



device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model.to(device)

model.train()#学習モードに移行

loss_list=[]
for epoch in range(num_epochs):
    loss_epo=[]


    for i, batch in enumerate(train_dataloader):


        images, targets, image_ids = batch#####　batchはそのミニバッジのimage、tagets,image_idsが入ってる

        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]


        ##学習モードでは画像とターゲット（ground-truth）を入力する
        ##返り値はdict[tensor]でlossが入ってる。（RPNとRCNN両方のloss）
        loss_dict= model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        #lossの保存
        loss_epo.append(loss_value)

        if (i+1) % 10== 0:
          print(f"epoch #{epoch+1} Iteration #{i+1} loss: {loss_value}")


    #Epochごとのlossの保存
    loss_list.append(np.mean(loss_epo))
    torch.save(model, './model.pth')




epoch #1 Iteration #10 loss: 0.1657692939043045
epoch #1 Iteration #20 loss: 0.30838632583618164
epoch #1 Iteration #30 loss: 0.31695228815078735
epoch #1 Iteration #40 loss: 0.3969959616661072
epoch #1 Iteration #50 loss: 0.1945684254169464
epoch #1 Iteration #60 loss: 0.18708427250385284
epoch #1 Iteration #70 loss: 0.2812070846557617
epoch #1 Iteration #80 loss: 0.19812512397766113
epoch #1 Iteration #90 loss: 0.3675290644168854
epoch #2 Iteration #10 loss: 0.29136836528778076
epoch #2 Iteration #20 loss: 0.25356727838516235
epoch #2 Iteration #30 loss: 0.22632044553756714
epoch #2 Iteration #40 loss: 0.5186583399772644
epoch #2 Iteration #50 loss: 0.20700426399707794
epoch #2 Iteration #60 loss: 0.13615567982196808
epoch #2 Iteration #70 loss: 0.1884901076555252
epoch #2 Iteration #80 loss: 0.21921901404857635
epoch #2 Iteration #90 loss: 0.308813214302063
epoch #3 Iteration #10 loss: 0.17595195770263672
epoch #3 Iteration #20 loss: 0.1432512253522873
epoch #3 Iteration #30 loss: 0

In [30]:
import cv2
import glob
import matplotlib.pyplot as plt

data_class=dataset_class
data_class.insert(0, "__background__")
classes = tuple(data_class)

#学習済みモデルで推論する場合
import torch

model = torch.load(
    "./model_mydatasets.pth",
    map_location=torch.device("cpu"),
    weights_only=False
)
model.eval()


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [31]:
import os
import glob
import cv2
import torch
import torchvision

# 保存先フォルダ
save_dir = "output_mydatasets"
os.makedirs(save_dir, exist_ok=True)

device = torch.device("cpu")
model.to(device)

model.to(device)
model.eval()

for imgfile in sorted(glob.glob(test_path + '/*')):

    img = cv2.imread(imgfile)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    image_tensor = torchvision.transforms.functional.to_tensor(img)

    with torch.no_grad():
        prediction = model([image_tensor.to(device)])

    for i, box in enumerate(prediction[0]['boxes']):
        score = prediction[0]['scores'][i].cpu().numpy()
        if score > 0.5:
            score = round(float(score), 2)
            cat = prediction[0]['labels'][i].cpu().numpy()
            txt = '{} {}'.format(classes[int(cat)], str(score))
            font = cv2.FONT_HERSHEY_SIMPLEX
            cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
            c = colors[int(cat)]
            box = box.cpu().numpy().astype('int')

            cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), c, 2)
            cv2.rectangle(
                img,
                (box[0], box[1] - cat_size[1] - 2),
                (box[0] + cat_size[0], box[1] - 2),
                c,
                -1
            )
            cv2.putText(
                img,
                txt,
                (box[0], box[1] - 2),
                font,
                0.5,
                (0, 0, 0),
                thickness=1,
                lineType=cv2.LINE_AA
            )

    # RGB → BGR に戻して保存
    save_path = os.path.join(save_dir, os.path.basename(imgfile))
    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    cv2.imwrite(save_path, img_bgr)
