In [1]:

#pathの指定(colab_frcnn-main直下まで)
bdd_xml="./vehicledatasets/xml"
bdd_img="./vehicledatasets/img"
test_path="./vehicledatasets/test"

#datasetのクラス指定
dataset_class=['Car', 'Track', 'Bus', 'Ambulance', 'Motorcycle']
#表示したいラベルの色の指定
#注意！！一番最初は背景クラスを示すので(0,0,0)にする(それ以外は自由)
colors = ((0,0,0),(255,0,0),(0,255,0),(0,0,255),(100,100,100),(50,50,50))

#ハイパーパラメータの指定
epochs=10
batch_size=1
scale=416#画像のスケール設定(縦の大きさを入力)



In [20]:
import numpy as np
import pandas as pd

from PIL import Image
from glob import glob
import xml.etree.ElementTree as ET
import cv2

import torch
import torchvision
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import TensorDataset
import os


class xml2list(object):

    def __init__(self, classes):
        self.classes = classes

    def __call__(self, xml_path):

        ret = []
        xml = ET.parse(xml_path).getroot()

        boxes = []
        labels = []
        zz=0

        for zz,obj in enumerate(xml.iter('object')):

            label = obj.find('name').text

            ##指定クラスのみ

            if label in self.classes :
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(self.classes.index(label))
            else:
                continue

        num_objs = zz +1

        anno = {'bboxes':boxes, 'labels':labels}

        return anno,num_objs


In [21]:
class MyDataset(torch.utils.data.Dataset):

        def __init__(self,image_dir,xml_paths,scale,classes):

            super().__init__()
            self.image_dir = image_dir
            self.xml_paths = xml_paths
            self.image_ids = sorted(glob('{}/*'.format(xml_paths)))
            self.scale=scale
            self.classes=classes

        def __getitem__(self, index):

            transform = transforms.Compose([
                                            transforms.ToTensor()
            ])

            # 入力画像の読み込み
            #image_id=self.image_ids[index].split("/")[-1].split(".")[0]
            #image = Image.open(f"{self.image_dir}/{image_id}.jpg")
            filename = os.path.basename(self.image_ids[index])
            image_id = os.path.splitext(filename)[0]

            image = Image.open(os.path.join(self.image_dir, image_id + ".jpg"))

            #画像のスケール変換
            t_scale_tate=self.scale ##目標のスケール(縦)
            #縮小比を計算
            ratio=t_scale_tate/image.size[1]
            ##目標横スケールを計算
            t_scale_yoko=image.size[0]*ratio
            t_scale_yoko=int(t_scale_yoko)

            #print('縮小前:',image.size)
            #print('縮小率:',ratio)
            #リサイズ
            image = image.resize((t_scale_yoko,t_scale_tate))
            #print('縮小後:',image.size)

            image = transform(image)

            transform_anno = xml2list(self.classes)
            path_xml=f'{self.xml_paths}/{image_id}.xml'


            annotations, _ = transform_anno(path_xml) # obje_num is not needed here

            boxes_list = annotations['bboxes']
            labels_list = annotations['labels']

            # Handle cases where no bounding boxes are found for the specified classes
            if not boxes_list:
                boxes = torch.zeros((0, 4), dtype=torch.float32)
                labels = torch.zeros((0,), dtype=torch.int64)
                area = torch.zeros((0,), dtype=torch.float32)
                iscrowd = torch.zeros((0,), dtype=torch.int64)
            else:
                boxes = torch.as_tensor(boxes_list, dtype=torch.int64)
                labels = torch.as_tensor(labels_list, dtype=torch.int64)

                #bboxの縮小
                #print('縮小前:',boxes)
                boxes = boxes * ratio
                #print('縮小後:',boxes)

                area = (boxes[:, 3]-boxes[:, 1]) * (boxes[:, 2]-boxes[:, 0])
                area = torch.as_tensor(area, dtype=torch.float32)

                # iscrowd should match the number of actual objects found
                iscrowd = torch.zeros((len(labels),), dtype=torch.int64)

            target = {}
            target["boxes"] = boxes
            target["labels"] = labels+1
            target["image_id"] = torch.tensor([index])
            target["area"] = area
            target["iscrowd"] = iscrowd
            return image, target,image_id

        def __len__(self):

            return len(self.image_ids)


In [22]:

def dataloader (data,dataset_class,batch_size,scale=720):
    xml_paths=data[0]
    image_dir1=data[1]
    dataset = MyDataset(image_dir1,xml_paths,scale,dataset_class)

    #データのロード
    torch.manual_seed(2020)
    def collate_fn(batch):
        return tuple(zip(*batch))

    train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,collate_fn=collate_fn)


    return train_dataloader


In [23]:

def model ():
    #モデルの定義

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes=len(dataset_class)+1
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model



In [None]:
data_ALL=[bdd_xml,bdd_img]
train_dataloader=dataloader(data_ALL,dataset_class,batch_size,scale)

model=model()
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = epochs



device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model.to(device)

model.train()#学習モードに移行

loss_list=[]
for epoch in range(num_epochs):
    loss_epo=[]


    for i, batch in enumerate(train_dataloader):


        images, targets, image_ids = batch#####　batchはそのミニバッジのimage、tagets,image_idsが入ってる

        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]


        ##学習モードでは画像とターゲット（ground-truth）を入力する
        ##返り値はdict[tensor]でlossが入ってる。（RPNとRCNN両方のloss）
        loss_dict= model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        #lossの保存
        loss_epo.append(loss_value)

        if (i+1) % 10== 0:
          print(f"epoch #{epoch+1} Iteration #{i+1} loss: {loss_value}")


    #Epochごとのlossの保存
    loss_list.append(np.mean(loss_epo))
    torch.save(model, './model.pth')




epoch #1 Iteration #10 loss: 0.23933260142803192
epoch #1 Iteration #20 loss: 0.9321686625480652
epoch #1 Iteration #30 loss: 0.0199703611433506
epoch #1 Iteration #40 loss: 0.3127322494983673
epoch #1 Iteration #50 loss: 0.17409281432628632
epoch #1 Iteration #60 loss: 0.19720213115215302
epoch #1 Iteration #70 loss: 0.2814926207065582
epoch #1 Iteration #80 loss: 0.22224026918411255
epoch #1 Iteration #90 loss: 0.42712846398353577
epoch #1 Iteration #100 loss: 0.3662208318710327
epoch #1 Iteration #110 loss: 0.024678116664290428
epoch #1 Iteration #120 loss: 0.5240346789360046
epoch #1 Iteration #130 loss: 0.5905418992042542
epoch #1 Iteration #140 loss: 0.21950994431972504
epoch #1 Iteration #150 loss: 0.32100754976272583
epoch #1 Iteration #160 loss: 0.5288848876953125
epoch #1 Iteration #170 loss: 0.6074554324150085
epoch #1 Iteration #180 loss: 0.04866940155625343
epoch #1 Iteration #190 loss: 0.37006229162216187
epoch #1 Iteration #200 loss: 0.04223785549402237
epoch #1 Iteratio

In [11]:
import os

# List the contents of the image directory
print(f"Contents of {bdd_img}:")
for root, dirs, files in os.walk(bdd_img):
    for file in files:
        print(os.path.join(root, file))
    for dir_name in dirs:
        print(os.path.join(root, dir_name))

Contents of /content/drive/My Drive/colab_frcnn-main/vehicledatasets/img:
/content/drive/My Drive/colab_frcnn-main/vehicledatasets/img/011a8a0a921c2376_jpg.rf.tflPpvQDTaDj9gH1xuDJ.jpg
/content/drive/My Drive/colab_frcnn-main/vehicledatasets/img/00e2d9121adc0c20_jpg.rf.rPP5CqB9mOv9wnRTTKR3.jpg
/content/drive/My Drive/colab_frcnn-main/vehicledatasets/img/0133391d627e6df4_jpg.rf.xszyOWdyIGCiI2HmiGNr.jpg
/content/drive/My Drive/colab_frcnn-main/vehicledatasets/img/00aaf0a0a9ee7e71_jpg.rf.808b1e59067887493dffad63561c2a9d.jpg
/content/drive/My Drive/colab_frcnn-main/vehicledatasets/img/0158307c4ad94b38_jpg.rf.7c3ac0278eaf3a1773359994c4679aca.jpg
/content/drive/My Drive/colab_frcnn-main/vehicledatasets/img/00e2d9121adc0c20_jpg.rf.50fa25618ccc9f0ebf59f2ffe64cf298.jpg
/content/drive/My Drive/colab_frcnn-main/vehicledatasets/img/00aaf0a0a9ee7e71_jpg.rf.KBCzX2HtZviBuQY9JDTQ.jpg
/content/drive/My Drive/colab_frcnn-main/vehicledatasets/img/013476982d77e380_jpg.rf.3abcf2c4c484148e9e88d3c8c99bc236.jp

In [10]:
# Check if the specific missing file exists
missing_file_path = os.path.join(bdd_img, '9c5cf4fed30054f2_jpg.jpg')
if os.path.exists(missing_file_path):
    print(f"The file {missing_file_path} exists.")
else:
    print(f"The file {missing_file_path} does NOT exist.")

The file /content/drive/My Drive/colab_frcnn-main/vehicledatasets/img/9c5cf4fed30054f2_jpg.jpg does NOT exist.
