### Please run with Google Colab with Good GPU
<a href="https://colab.research.google.com/github/wakachii/SI-Org-chart/blob/main/pipeline/deeplearning_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
# Detectron2 has not released pre-built binaries for the latest pytorch (https://github.com/facebookresearch/detectron2/issues/4053)
# so we install from source instead. This takes a few minutes.
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

# Install pre-built detectron2 that matches pytorch version, if released:
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
#!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/{CUDA_VERSION}/{TORCH_VERSION}/index.html

# exit(0)  # After installation, you may need to "restart runtime" in Colab. This line can also restart runtime

In [36]:
import cv2 as cv2
import json
import os
from google.colab import drive
drive.mount('/content/drive')

# import some common detectron2 utilities
import detectron2
from tqdm import tqdm
from detectron2.utils.logger import setup_logger
setup_logger()
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.visualizer import Visualizer, ColorMode

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [37]:
path = "/content/drive/MyDrive/scan_org_charts/learning"
path_train = path + "/data/train"
path_coco = path + "/Org_chart-1.json"
path_data = "/content/drive/MyDrive/scan_org_charts/cropped"
# set train data
register_coco_instances("org_chart_train", {}, path_coco, path_train)

# setting for using the model
cfg = get_cfg() # initialize
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("org_chart_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.IMS_PER_BATCH = 1
cfg.SOLVER.BASE_LR = 0.0004
cfg.SOLVER.MAX_ITER = (500)
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (128)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

# train
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) # for output
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[01/10 13:28:18 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[01/10 13:28:18 d2.engine.train_loop]: Starting training from iteration 0
[01/10 13:28:23 d2.utils.events]:  eta: 0:01:49  iter: 19  total_loss: 5.198  loss_cls: 0.719  loss_box_reg: 0.7316  loss_mask: 0.6913  loss_rpn_cls: 2.829  loss_rpn_loc: 0.3234    time: 0.2256  last_time: 0.2348  data_time: 0.0195  last_data_time: 0.0029   lr: 1.5585e-05  max_mem: 3673M
[01/10 13:28:28 d2.utils.events]:  eta: 0:01:47  iter: 39  total_loss: 2.444  loss_cls: 0.6446  loss_box_reg: 0.7936  loss_mask: 0.6362  loss_rpn_cls: 0.06763  loss_rpn_loc: 0.1968    time: 0.2390  last_time: 0.2025  data_time: 0.0067  last_data_time: 0.0056   lr: 3.1569e-05  max_mem: 3673M
[01/10 13:28:34 d2.utils.events]:  eta: 0:01:47  iter: 59  total_loss: 2.114  loss_cls: 0.5368  loss_box_reg: 0.8027  loss_mask: 0.5177  loss_rpn_cls: 0.03972  loss_rpn_loc: 0.2012    time: 0.2521  last_time: 0.2422  data_time: 0.0112  last_data_time: 0.0029   lr: 4.7553e-05  max_mem: 3673M
[01/10 13:28:38 d2.utils.events]:  eta: 0:01:39  iter

In [40]:
# the function for making the meta-data dict of the test data
def get_test_dicts(img_dir):
    img_files = [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')]
    dataset_dicts = []
    for idx, img_file in enumerate(img_files):
        record = {}
        record["file_name"] = img_file
        record["image_id"] = idx
        record["height"], record["width"] = cv2.imread(img_file).shape[:2]
        dataset_dicts.append(record)
    return dataset_dicts

# change the test data form for dectron2
# DatasetCatalog.register("org_chart_data", lambda: get_test_dicts(path_data))
MetadataCatalog.get("org_chart_data").set(thing_classes=["department"])

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # load trained weights
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.6  # score
cfg.DATASETS.TEST = ("org_chart_data", )  # set the test data to the model

# detect departments
predictor = DefaultPredictor(cfg)
metadata = MetadataCatalog.get("org_chart_data")
dataset_dicts = DatasetCatalog.get("org_chart_data")

[01/10 13:31:18 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from ./output/model_final.pth ...


  return torch.load(f, map_location=torch.device("cpu"))


In [41]:
output_path = "/content/drive/MyDrive/scan_org_charts/learning/output"
for d in tqdm(dataset_dicts):
    outputs = predictor(img)
    json_output = {
    "file_name": d["file_name"],
    "pred_boxes": outputs["instances"].pred_boxes.tensor.cpu().numpy().tolist(),
    "scores": outputs["instances"].scores.cpu().numpy().tolist(),
    "pred_classes": outputs["instances"].pred_classes.cpu().numpy().tolist()
    }
    # save JSON
    base_name = os.path.basename(d["file_name"])
    json_name = os.path.splitext(base_name)[0] + ".json"
    json_path = os.path.join(output_path, json_name)
    with open(json_path, "w") as f:
        json.dump(json_output, f)

100%|██████████| 101/101 [00:28<00:00,  3.51it/s]
