In [1]:
import tensorflow as tf
import numpy as np

## 1. Dynamic Range Quantization (동적 범위 양자화)

In [2]:
saved_model_dir = 'saved_model/epoch_01'

# TFLite Converter 생성
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)


# 동적 범위 양자화 설정
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# TFLite 모델로 변환
tflite_model = converter.convert()

# 변환된 모델 저장
with open('quantized_model/yamnet_dynamic_quant.tflite', 'wb') as f:
    f.write(tflite_model)


2024-10-13 23:29:59.027526: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2024-10-13 23:29:59.027556: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2024-10-13 23:29:59.028647: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: saved_model/epoch_01
2024-10-13 23:29:59.038334: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2024-10-13 23:29:59.038345: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: saved_model/epoch_01
2024-10-13 23:29:59.062911: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2024-10-13 23:29:59.075440: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2024-10-13 23:29:59.095824: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2024-10-13 23:29:59.27

## 2. Full Integer Quantization (정수 양자화)

In [26]:
saved_model_dir = 'saved_model/converting_test'

# TFLite Converter 생성
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)

# 정수 양자화 설정
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Representative dataset 생성 (실제 데이터로 양자화 범위 설정)
def representative_data_gen():
    for _ in range(100):
        # 입력 데이터의 크기에 맞춰서 임의의 데이터를 생성 (여기서는 (1, 224, 224, 3) 크기)
        yield [np.random.rand(1, 224, 224, 3).astype(np.float32)]

# Representative dataset을 설정
converter.representative_dataset = representative_data_gen

# 훈련 또는 추가 데이터 없이 바로 정수 양자화 진행
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

# 모든 텐서를 8비트 정수로 변환
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

# TFLite 모델로 변환
tflite_model = converter.convert()

# 변환된 모델 저장
with open('quantized_model/yamnet_full_integer_quant.tflite', 'wb') as f:
    f.write(tflite_model)

## 3. Float16 Quantization (FP16 양자화)

In [21]:
saved_model_dir = 'saved_model/converting_test'

# TFLite Converter 생성
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)

# FP16 양자화 설정
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

# TFLite 모델로 변환
tflite_model = converter.convert()

# 변환된 모델 저장
with open('quantized_model/yamnet_fp16_quant.tflite', 'wb') as f:
    f.write(tflite_model)

2024-10-11 11:42:32.756577: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2024-10-11 11:42:32.756589: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2024-10-11 11:42:32.756679: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: saved_model/converting_test
2024-10-11 11:42:32.766821: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2024-10-11 11:42:32.766834: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: saved_model/converting_test
2024-10-11 11:42:32.795385: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2024-10-11 11:42:32.929755: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: saved_model/converting_test
2024-10-11 11:42:32.979947: I tensorflow/cc/saved_model/loader.cc:305] SavedModel load for tags { serve }; 