In [None]:
import numpy as np
import os
import pycocotools
import tqdm as notebook_tqdm
import tensorflow as tf
assert tf.__version__.startswith('2')

import tflite_model_maker

from tflite_model_maker.config import ExportFormat
from tflite_model_maker import model_spec
from tflite_model_maker import object_detector

tf.get_logger().setLevel('ERROR')
from absl import logging
logging.set_verbosity(logging.ERROR)

print(tf.__version__)
print(tflite_model_maker.__version__)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install tflite_model_maker

In [None]:
images_in = './oimage/'
annotations_in = './xml/'

In [None]:

import os
import random
import shutil

def split_dataset(images_path, annotations_path, val_split, test_split, out_path):
    
    
    """將已排序的圖像/註釋目錄拆分為訓練、驗證和測試集。

    參數：
    images_path：包含您的圖像 (JPG) 的目錄的路徑。
    annotations_path：您的 VOC XML 註釋文件的目錄路徑，
      與圖像文件名對應的文件名。這可能是相同的路徑
      用於圖像路徑。
    val_split：保留用於驗證的數據部分（在 0 和 1 之間浮動）。
    test_split：為測試保留的數據部分（在 0 和 1 之間浮動）。
    返回：
    分割圖像/註釋的路徑（train_dir、val_dir、test_dir）
    """
    _, dirs, _ = next(os.walk(images_path))                       #查看路徑

    train_dir = os.path.join(out_path, 'train')
    val_dir = os.path.join(out_path, 'validation')
    test_dir = os.path.join(out_path, 'test')

    IMAGES_TRAIN_DIR = os.path.join(train_dir, 'images')
    IMAGES_VAL_DIR = os.path.join(val_dir, 'images')
    IMAGES_TEST_DIR = os.path.join(test_dir, 'images')
    os.makedirs(IMAGES_TRAIN_DIR, exist_ok=True)
    os.makedirs(IMAGES_VAL_DIR, exist_ok=True)
    os.makedirs(IMAGES_TEST_DIR, exist_ok=True)

    ANNOT_TRAIN_DIR = os.path.join(train_dir, 'annotations')
    ANNOT_VAL_DIR = os.path.join(val_dir, 'annotations')
    ANNOT_TEST_DIR = os.path.join(test_dir, 'annotations')
    os.makedirs(ANNOT_TRAIN_DIR, exist_ok=True)
    os.makedirs(ANNOT_VAL_DIR, exist_ok=True)
    os.makedirs(ANNOT_TEST_DIR, exist_ok=True)

    # Get all filenames for this dir, filtered by filetype
    filenames = os.listdir(os.path.join(images_path))
    filenames = [os.path.join(images_path, f) for f in filenames if (f.endswith('.jpg'))]
    # Shuffle the files, deterministically
    filenames.sort()
    random.seed(42)
    random.shuffle(filenames)
    # Get exact number of images for validation and test; the rest is for training
    val_count = int(len(filenames) * val_split)
    test_count = int(len(filenames) * test_split)
    for i, file in enumerate(filenames):
        source_dir, filename = os.path.split(file)
        annot_file = os.path.join(annotations_path, filename.replace("jpg", "xml"))
        if i < val_count:
            shutil.copy(file, IMAGES_VAL_DIR)
            shutil.copy(annot_file, ANNOT_VAL_DIR)
        elif i < val_count + test_count:
            shutil.copy(file, IMAGES_TEST_DIR)
            shutil.copy(annot_file, ANNOT_TEST_DIR)
        else:
            shutil.copy(file, IMAGES_TRAIN_DIR)
            shutil.copy(annot_file, ANNOT_TRAIN_DIR)
    return (train_dir, val_dir, test_dir)

In [None]:
train_dir, val_dir, test_dir = split_dataset(images_in, annotations_in,
                                             val_split=0.2, test_split=0.2,
                                             out_path='new_split-dataset_2')

In [None]:
label_map= ["M", "K", "S"]
train_images_dir = './new_split-dataset_2/train/images/'
train_annotations_dir = './new_split-dataset_2/train/annotations'
val_images_dir = './new_split-dataset_2/validation/images/'
val_annotations_dir = './new_split-dataset_2/validation/annotations'
test_images_dir = './new_split-dataset_2/test/images/'
test_annotations_dir = './new_split-dataset_2/test/annotations'

In [None]:

train_data = object_detector.DataLoader.from_pascal_voc(
    train_images_dir, train_annotations_dir, label_map=label_map, cache_dir="./cache_data_ta/train", num_shards=1, max_num_images=3000)

validation_data = object_detector.DataLoader.from_pascal_voc(
    val_images_dir, val_annotations_dir, label_map=label_map, cache_dir="./cache_data_ta/validation",num_shards=1, max_num_images=1000)

test_data = object_detector.DataLoader.from_pascal_voc(
   test_images_dir, test_annotations_dir, label_map=label_map, cache_dir="./cache_data_ta/test",num_shards=1, max_num_images=1000)



In [None]:
train_data = object_detector.DataLoader.from_cache('./cache_data_ta/train/4bcdd345de559c097bc3f19ecc95ac13')
validation_data = object_detector.DataLoader.from_cache('./cache_data_ta/validation/4bcdd345de559c097bc3f19ecc95ac13')
test_data = object_detector.DataLoader.from_cache('./cache_data_ta/test/4bcdd345de559c097bc3f19ecc95ac13')




In [None]:
print(f'train count: {len(train_data)}')
print(f'validation count: {len(validation_data)}')
print(f'test count: {len(test_data)}')



## Select the model spec

Model Maker supports the EfficientDet-Lite family of object detection models that are compatible with the Edge TPU. (EfficientDet-Lite is derived from [EfficientDet](https://ai.googleblog.com/2020/04/efficientdet-towards-scalable-and.html), which offers state-of-the-art accuracy in a small model size). There are several model sizes you can choose from:

|| Model architecture | Size(MB)* | Latency(ms)** | Average Precision*** |
|-|--------------------|-----------|---------------|----------------------|
|| EfficientDet-Lite0 | 5.7       | 37.4            | 30.4%               |
|| EfficientDet-Lite1 | 7.6       | 56.3            | 34.3%               |
|| EfficientDet-Lite2 | 10.2      | 104.6           | 36.0%               |
|| EfficientDet-Lite3 | 14.4      | 107.6           | 39.4%               |
| <td colspan=4><br><i>* File size of the compiled Edge TPU models. <br/>** Latency measured on a desktop CPU with a Coral USB Accelerator. <br/>*** Average Precision is the mAP (mean Average Precision) on the COCO 2017 validation dataset.</i></td> |

Beware that the Lite2 and Lite3 models do not fit onto the Edge TPU's onboard memory, so you'll see even greater latency when using those, due to the cost of fetching data from the host system memory. Maybe this extra latency is okay for your application, but if it's not and you require the precision of the larger models, then you can [pipeline the model across multiple Edge TPUs](https://coral.ai/docs/edgetpu/pipeline/) (more about this when we compile the model below).

For this tutorial, we'll use Lite0:

In [None]:
  spec = object_detector.EfficientDetLite0Spec()

The [`EfficientDetLite0Spec`](https://www.tensorflow.org/lite/api_docs/python/tflite_model_maker/object_detector/EfficientDetLite0Spec) constructor also supports several arguments that specify training options, such as the max number of detections (default is 25 for the TF Lite model) and whether to use Cloud TPUs for training. You can also use the constructor to specify the number of training epochs and the batch size, but you can also specify those in the next step.

## Create and train the model

Now we need to create our model according to the model spec, load our dataset into the model, specify training parameters, and begin training. 

Using Model Maker, we accomplished all of that with [`create()`](https://www.tensorflow.org/lite/api_docs/python/tflite_model_maker/object_detector/create):

In [None]:
model = object_detector.create(train_data=train_data, 
                               model_spec=spec, 
                               validation_data=validation_data, 
                               epochs=200, 
                               batch_size=16, 
                               train_whole_model=True,
                               do_train=True)

In [None]:
#model.model.save_weights('./weights/2000.tf')

## Evaluate the model

In [None]:
model.evaluate(test_data)

## Export to TensorFlow Lite

In [None]:
TFLITE_FILENAME = 'efficientdet.tflite'
LABELS_FILENAME = 'labels.txt'
SAVED_FILENAME = 'model'

In [None]:
model.export(export_dir='./export', tflite_filename=TFLITE_FILENAME, label_filename=LABELS_FILENAME, saved_model_filename=SAVED_FILENAME,
             export_format=[ExportFormat.TFLITE, ExportFormat.LABEL, ExportFormat.SAVED_MODEL])