### Please run with Google Colab with Good GPU
<a href="https://colab.research.google.com/github/Ichikawa-Satoshi/SI-Org-chart/blob/main/test_deeplearning/cross_valid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import os
import json
import random
from sklearn.model_selection import KFold
from google.colab import drive
drive.mount('/content/drive')
import numpy as np

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
# Detectron2 has not released pre-built binaries for the latest pytorch (https://github.com/facebookresearch/detectron2/issues/4053)
# so we install from source instead. This takes a few minutes.
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

NotImplementedError: A UTF-8 locale is required. Got ANSI_X3.4-1968

In [11]:
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

In [14]:
# path
path = "/content/drive/MyDrive/SI-Org-Chart/data/Org_chart/learning/train"
path_coco = "/content/drive/MyDrive/SI-Org-Chart/data/Org_chart/learning/Organization_annotation.json"

# load data
with open(path_coco) as f:
    coco_data = json.load(f)

annotations = coco_data["annotations"]
images = coco_data["images"]

# setting for K-fold cross validation
K = 5  # num of fold
kf = KFold(n_splits=K, shuffle=True, random_state=42)

# Cross validation
ap_scores = []
for fold, (train_idx, val_idx) in enumerate(kf.split(images)):
    print(f"Fold {fold + 1} / {K}")

    # split data (train and validation)
    train_images = [images[i] for i in train_idx]
    val_images = [images[i] for i in val_idx]

    train_ids = {img["id"] for img in train_images}
    train_annotations = [ann for ann in annotations if ann["image_id"] in train_ids]

    val_ids = {img["id"] for img in val_images}
    val_annotations = [ann for ann in annotations if ann["image_id"] in val_ids]

    train_coco = {"images": train_images, "annotations": train_annotations, "categories": coco_data["categories"]}
    val_coco = {"images": val_images, "annotations": val_annotations, "categories": coco_data["categories"]}

    # annotation paths
    train_coco_path = f"/content/drive/MyDrive/SI-Org-Chart/data/Org_chart/learning/train_fold{fold}.json"
    val_coco_path = f"/content/drive/MyDrive/SI-Org-Chart/data/Org_chart/learning/val_fold{fold}.json"

    with open(train_coco_path, "w") as f:
        json.dump(train_coco, f)
    with open(val_coco_path, "w") as f:
        json.dump(val_coco, f)

    # Detectron2
    register_coco_instances(f"org_train_{fold}", {}, train_coco_path, path)
    register_coco_instances(f"org_val_{fold}", {}, val_coco_path, path)
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = (f"org_train_{fold}",)
    cfg.DATASETS.TEST = (f"org_val_{fold}",)
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.SOLVER.IMS_PER_BATCH = 1
    cfg.SOLVER.BASE_LR = 0.0004
    cfg.SOLVER.MAX_ITER = 500
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2

    # train
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

    # evaluation
    evaluator = COCOEvaluator(f"org_val_{fold}", cfg, False, output_dir=cfg.OUTPUT_DIR)
    val_loader = build_detection_test_loader(cfg, f"org_val_{fold}")
    eval_results = inference_on_dataset(trainer.model, val_loader, evaluator)

    # AP
    ap = eval_results["bbox"]["AP"]      # IoU 50-95: mAP
    ap50 = eval_results["bbox"]["AP50"]  # IoU 50: AP
    ap75 = eval_results["bbox"]["AP75"]  # IoU 75: AP

    print(f"Fold {fold + 1}: AP={ap:.2f}, AP50={ap50:.2f}, AP75={ap75:.2f}")
    ap_scores.append((ap, ap50, ap75))

# results
mean_ap = np.mean([score[0] for score in ap_scores])
mean_ap50 = np.mean([score[1] for score in ap_scores])
mean_ap75 = np.mean([score[2] for score in ap_scores])

print(f"\nFinal Cross-validation Results:")
print(f"Mean AP: {mean_ap:.2f}")
print(f"Mean AP50: {mean_ap50:.2f}")
print(f"Mean AP75: {mean_ap75:.2f}")


Fold 1 / 5
[03/10 04:39:03 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[03/10 04:39:03 d2.engine.train_loop]: Starting training from iteration 0
[03/10 04:39:08 d2.utils.events]:  eta: 0:01:36  iter: 19  total_loss: 6.121  loss_cls: 1.104  loss_box_reg: 0.5136  loss_mask: 0.6966  loss_rpn_cls: 3.183  loss_rpn_loc: 0.4252    time: 0.2030  last_time: 0.1853  data_time: 0.0201  last_data_time: 0.0024   lr: 1.5585e-05  max_mem: 4336M
[03/10 04:39:12 d2.utils.events]:  eta: 0:01:29  iter: 39  total_loss: 2.923  loss_cls: 0.912  loss_box_reg: 0.6584  loss_mask: 0.6433  loss_rpn_cls: 0.228  loss_rpn_loc: 0.2935    time: 0.2023  last_time: 0.1718  data_time: 0.0025  last_data_time: 0.0023   lr: 3.1569e-05  max_mem: 4336M
[03/10 04:39:16 d2.utils.events]:  eta: 0:01:26  iter: 59  total_loss: 2.38  loss_cls: 0.7298  loss_box_reg: 0.6191  loss_mask: 0.5748  loss_rpn_cls: 0.1384  loss_rpn_loc: 0.2458    time: 0.2025  last_time: 0.1865  data_time: 0.0026  last_data_time: 0.0024   lr: 4.7553e-05  max_mem: 4336M
[03/10 04:39:20 d2.utils.events]:  eta: 0:01:21  iter: 79 

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[03/10 04:41:02 d2.engine.train_loop]: Starting training from iteration 0
[03/10 04:41:07 d2.utils.events]:  eta: 0:01:42  iter: 19  total_loss: 6.174  loss_cls: 1.102  loss_box_reg: 0.5826  loss_mask: 0.697  loss_rpn_cls: 3.484  loss_rpn_loc: 0.4113    time: 0.2093  last_time: 0.2359  data_time: 0.0207  last_data_time: 0.0025   lr: 1.5585e-05  max_mem: 5817M
[03/10 04:41:11 d2.utils.events]:  eta: 0:01:34  iter: 39  total_loss: 2.766  loss_cls: 0.8916  loss_box_reg: 0.6568  loss_mask: 0.6452  loss_rpn_cls: 0.2249  loss_rpn_loc: 0.3206    time: 0.2058  last_time: 0.1838  data_time: 0.0025  last_data_time: 0.0026   lr: 3.1569e-05  max_mem: 5817M
[03/10 04:41:15 d2.utils.events]:  eta: 0:01:29  iter: 59  total_loss: 2.293  loss_cls: 0.6973  loss_box_reg: 0.6964  loss_mask: 0.5606  loss_rpn_cls: 0.1046  loss_rpn_loc: 0.2106    time: 0.2043  last_time: 0.2221  data_time: 0.0025  last_data_time: 0.0027   lr: 4.7553e-05  max_mem: 5817M
[03/10 04:41:19 d2.utils.events]:  eta: 0:01:24  iter: 7

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[03/10 04:43:01 d2.engine.train_loop]: Starting training from iteration 0
[03/10 04:43:05 d2.utils.events]:  eta: 0:01:32  iter: 19  total_loss: 6.296  loss_cls: 1.266  loss_box_reg: 0.5807  loss_mask: 0.6971  loss_rpn_cls: 3.362  loss_rpn_loc: 0.3538    time: 0.1967  last_time: 0.1485  data_time: 0.0192  last_data_time: 0.0031   lr: 1.5585e-05  max_mem: 5817M
[03/10 04:43:09 d2.utils.events]:  eta: 0:01:30  iter: 39  total_loss: 3.004  loss_cls: 0.9651  loss_box_reg: 0.7083  loss_mask: 0.6533  loss_rpn_cls: 0.3282  loss_rpn_loc: 0.3104    time: 0.1986  last_time: 0.1937  data_time: 0.0027  last_data_time: 0.0025   lr: 3.1569e-05  max_mem: 5817M
[03/10 04:43:13 d2.utils.events]:  eta: 0:01:26  iter: 59  total_loss: 2.257  loss_cls: 0.7095  loss_box_reg: 0.6477  loss_mask: 0.5792  loss_rpn_cls: 0.1232  loss_rpn_loc: 0.2163    time: 0.2009  last_time: 0.1781  data_time: 0.0027  last_data_time: 0.0024   lr: 4.7553e-05  max_mem: 5817M
[03/10 04:43:17 d2.utils.events]:  eta: 0:01:23  iter: 

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[03/10 04:44:58 d2.engine.train_loop]: Starting training from iteration 0
[03/10 04:45:03 d2.utils.events]:  eta: 0:01:33  iter: 19  total_loss: 5.609  loss_cls: 0.9219  loss_box_reg: 0.5159  loss_mask: 0.6798  loss_rpn_cls: 3.108  loss_rpn_loc: 0.4228    time: 0.1998  last_time: 0.2180  data_time: 0.0196  last_data_time: 0.0022   lr: 1.5585e-05  max_mem: 5817M
[03/10 04:45:07 d2.utils.events]:  eta: 0:01:30  iter: 39  total_loss: 2.773  loss_cls: 0.8464  loss_box_reg: 0.7041  loss_mask: 0.6425  loss_rpn_cls: 0.2341  loss_rpn_loc: 0.3187    time: 0.1984  last_time: 0.2163  data_time: 0.0027  last_data_time: 0.0027   lr: 3.1569e-05  max_mem: 5817M
[03/10 04:45:11 d2.utils.events]:  eta: 0:01:27  iter: 59  total_loss: 2.439  loss_cls: 0.6947  loss_box_reg: 0.6782  loss_mask: 0.5646  loss_rpn_cls: 0.1435  loss_rpn_loc: 0.2793    time: 0.1999  last_time: 0.2271  data_time: 0.0025  last_data_time: 0.0025   lr: 4.7553e-05  max_mem: 5817M
[03/10 04:45:15 d2.utils.events]:  eta: 0:01:24  iter:

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[03/10 04:46:59 d2.engine.train_loop]: Starting training from iteration 0
[03/10 04:47:03 d2.utils.events]:  eta: 0:01:30  iter: 19  total_loss: 6.46  loss_cls: 1.052  loss_box_reg: 0.5352  loss_mask: 0.6899  loss_rpn_cls: 3.586  loss_rpn_loc: 0.4818    time: 0.1919  last_time: 0.2394  data_time: 0.0178  last_data_time: 0.0023   lr: 1.5585e-05  max_mem: 5817M
[03/10 04:47:07 d2.utils.events]:  eta: 0:01:29  iter: 39  total_loss: 2.85  loss_cls: 0.9036  loss_box_reg: 0.7524  loss_mask: 0.6448  loss_rpn_cls: 0.2431  loss_rpn_loc: 0.3156    time: 0.1980  last_time: 0.1722  data_time: 0.0026  last_data_time: 0.0026   lr: 3.1569e-05  max_mem: 5817M
[03/10 04:47:11 d2.utils.events]:  eta: 0:01:26  iter: 59  total_loss: 2.289  loss_cls: 0.7138  loss_box_reg: 0.6204  loss_mask: 0.5659  loss_rpn_cls: 0.1166  loss_rpn_loc: 0.2409    time: 0.1988  last_time: 0.2185  data_time: 0.0025  last_data_time: 0.0023   lr: 4.7553e-05  max_mem: 5817M
[03/10 04:47:15 d2.utils.events]:  eta: 0:01:21  iter: 79