### Please run with Google Colab with Good GPU
<a href="https://colab.research.google.com/github/wakachii/SI-Org-chart/blob/main/test_deeplearning/crosss_valid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import json
import random
from sklearn.model_selection import KFold
import numpy as np

In [None]:
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

In [None]:
# path
path = "/content/drive/MyDrive/SI-Org-Chart/data/Org_chart/learning"
path_coco = "/content/drive/MyDrive/SI-Org-Chart/data/Org_chart/Organization_annotation.json"

# load data
with open(path_coco) as f:
    coco_data = json.load(f)

annotations = coco_data["annotations"]
images = coco_data["images"]

# setting for K-fold cross validation
K = 5  # num of fold
kf = KFold(n_splits=K, shuffle=True, random_state=42)

# Cross validation
ap_scores = []
for fold, (train_idx, val_idx) in enumerate(kf.split(images)):
    print(f"Fold {fold + 1} / {K}")

    # split data (train and validation)
    train_images = [images[i] for i in train_idx]
    val_images = [images[i] for i in val_idx]

    train_ids = {img["id"] for img in train_images}
    train_annotations = [ann for ann in annotations if ann["image_id"] in train_ids]
    
    val_ids = {img["id"] for img in val_images}
    val_annotations = [ann for ann in annotations if ann["image_id"] in val_ids]

    train_coco = {"images": train_images, "annotations": train_annotations, "categories": coco_data["categories"]}
    val_coco = {"images": val_images, "annotations": val_annotations, "categories": coco_data["categories"]}

    # annotation paths
    train_coco_path = f"/content/drive/MyDrive/SI-Org-Chart/data/Org_chart/train_fold{fold}.json"
    val_coco_path = f"/content/drive/MyDrive/SI-Org-Chart/data/Org_chart/val_fold{fold}.json"

    with open(train_coco_path, "w") as f:
        json.dump(train_coco, f)
    with open(val_coco_path, "w") as f:
        json.dump(val_coco, f)

    # Detectron2 にデータを登録
    register_coco_instances(f"org_chart_train_{fold}", {}, train_coco_path, path)
    register_coco_instances(f"org_chart_val_{fold}", {}, val_coco_path, path)

    # 設定の準備
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = (f"org_chart_train_{fold}",)
    cfg.DATASETS.TEST = (f"org_chart_val_{fold}",)
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.SOLVER.IMS_PER_BATCH = 1
    cfg.SOLVER.BASE_LR = 0.0004
    cfg.SOLVER.MAX_ITER = 500
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2

    # モデルの学習
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

    # モデルの評価
    evaluator = COCOEvaluator(f"org_chart_val_{fold}", cfg, False, output_dir=cfg.OUTPUT_DIR)
    val_loader = build_detection_test_loader(cfg, f"org_chart_val_{fold}")
    eval_results = inference_on_dataset(trainer.model, val_loader, evaluator)

    # AP スコアを記録
    ap = eval_results["bbox"]["AP"]  # IoU 50-95 の mAP
    ap50 = eval_results["bbox"]["AP50"]  # IoU 50 の AP
    ap75 = eval_results["bbox"]["AP75"]  # IoU 75 の AP

    print(f"Fold {fold + 1}: AP={ap:.2f}, AP50={ap50:.2f}, AP75={ap75:.2f}")
    ap_scores.append((ap, ap50, ap75))

# 交差検証の結果を計算
mean_ap = np.mean([score[0] for score in ap_scores])
mean_ap50 = np.mean([score[1] for score in ap_scores])
mean_ap75 = np.mean([score[2] for score in ap_scores])

print(f"\nFinal Cross-validation Results:")
print(f"Mean AP: {mean_ap:.2f}")
print(f"Mean AP50: {mean_ap50:.2f}")
print(f"Mean AP75: {mean_ap75:.2f}")
