# **Setup Kaggle (upload kaggle.json)**



In [2]:
from google.colab import files
import os

print("")
uploaded = files.upload()  # pilih kaggle.json

# Pastikan kaggle.json ada
if "kaggle.json" not in uploaded:
    raise ValueError("File kaggle.json tidak ditemukan. Ulangi upload.")

# Simpan ke ~/.kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

print("✅ Kaggle API siap dipakai.")





Saving kaggle.json to kaggle.json
✅ Kaggle API siap dipakai.


# **(Opsional) Mount Google Drive**

In [3]:
from google.colab import drive
drive.mount('/content/drive')

print("")


Mounted at /content/drive



# **Download & Ekstrak Dataset Kaggle**

In [4]:
DATASET_SLUG = "kritikseth/fruit-and-vegetable-image-recognition"

!mkdir -p /content/data
!kaggle datasets download -d {DATASET_SLUG} -p /content/data

import zipfile

zip_files = [f for f in os.listdir('/content/data') if f.endswith('.zip')]
print("File zip yang ditemukan:", zip_files)

os.makedirs('/content/dataset', exist_ok=True)

for z in zip_files:
    zip_path = os.path.join('/content/data', z)
    print("➡️ Ekstrak:", zip_path)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall('/content/dataset')

print("✅ Ekstrak selesai. Struktur folder:")
!ls -R /content/dataset

Dataset URL: https://www.kaggle.com/datasets/kritikseth/fruit-and-vegetable-image-recognition
License(s): CC0-1.0
Downloading fruit-and-vegetable-image-recognition.zip to /content/data
 99% 1.97G/1.98G [00:13<00:00, 242MB/s]
100% 1.98G/1.98G [00:13<00:00, 162MB/s]
File zip yang ditemukan: ['fruit-and-vegetable-image-recognition.zip']
➡️ Ekstrak: /content/data/fruit-and-vegetable-image-recognition.zip
✅ Ekstrak selesai. Struktur folder:
/content/dataset:
test  train  validation

/content/dataset/test:
 apple	        carrot		 garlic     lettuce   peas	    spinach
 banana         cauliflower	 ginger     mango     pineapple     sweetcorn
 beetroot      'chilli pepper'	 grapes     onion     pomegranate   sweetpotato
'bell pepper'   corn		 jalepeno   orange    potato	    tomato
 cabbage        cucumber	 kiwi	    paprika   raddish	    turnip
 capsicum       eggplant	 lemon	    pear     'soy beans'    watermelon

/content/dataset/test/apple:
Image_10.jpg  Image_2.jpg  Image_4.jpg	Image_6.JPG  

# **Import Library Utama**

In [5]:
import os
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

print("✅ TensorFlow version:", tf.__version__)


✅ TensorFlow version: 2.19.0


#**Siapkan Data Generator (Train / Val / Test)**

In [6]:
BASE_DIR = "/content/dataset"

train_dir = os.path.join(BASE_DIR, "train")
val_dir   = os.path.join(BASE_DIR, "validation")
test_dir  = os.path.join(BASE_DIR, "test")

print("Train dir :", train_dir)
print("Val dir   :", val_dir)
print("Test dir  :", test_dir)

# Cek apakah folder ada
for d in [train_dir, val_dir, test_dir]:
    if not os.path.isdir(d):
        raise FileNotFoundError(f"Folder tidak ditemukan: {d}")

IMG_SIZE = (128, 128)   # pastikan nanti sama dengan di app Streamlit kamu
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
)

val_datagen = ImageDataGenerator(
    rescale=1./255
)

test_datagen = ImageDataGenerator(
    rescale=1./255
)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_gen = val_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_gen = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False  # penting untuk evaluasi
)

NUM_CLASSES = len(train_gen.class_indices)
print("Jumlah kelas:", NUM_CLASSES)
print("Mapping kelas (nama -> index):", train_gen.class_indices)

Train dir : /content/dataset/train
Val dir   : /content/dataset/validation
Test dir  : /content/dataset/test
Found 3115 images belonging to 36 classes.
Found 351 images belonging to 36 classes.
Found 359 images belonging to 36 classes.
Jumlah kelas: 36
Mapping kelas (nama -> index): {'apple': 0, 'banana': 1, 'beetroot': 2, 'bell pepper': 3, 'cabbage': 4, 'capsicum': 5, 'carrot': 6, 'cauliflower': 7, 'chilli pepper': 8, 'corn': 9, 'cucumber': 10, 'eggplant': 11, 'garlic': 12, 'ginger': 13, 'grapes': 14, 'jalepeno': 15, 'kiwi': 16, 'lemon': 17, 'lettuce': 18, 'mango': 19, 'onion': 20, 'orange': 21, 'paprika': 22, 'pear': 23, 'peas': 24, 'pineapple': 25, 'pomegranate': 26, 'potato': 27, 'raddish': 28, 'soy beans': 29, 'spinach': 30, 'sweetcorn': 31, 'sweetpotato': 32, 'tomato': 33, 'turnip': 34, 'watermelon': 35}


#**Bangun Model CNN**

In [8]:
model = models.Sequential([
    layers.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),

    layers.Conv2D(32, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(128, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(256, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),

    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(NUM_CLASSES, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

#**Training Model + Simpan Best Model**

In [None]:
BEST_MODEL_PATH = "/content/cnn_model.h5"
print("📁 Best model path:", BEST_MODEL_PATH)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

checkpoint = ModelCheckpoint(
    BEST_MODEL_PATH,
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

EPOCHS = 30

history = model.fit(
    train_gen,
    epochs=EPOCHS,
    validation_data=val_gen,
    callbacks=[early_stop, checkpoint]
)

print("✅ Training selesai.")
print("📌 Model terbaik disimpan di:", BEST_MODEL_PATH)

📁 Best model path: /content/cnn_model.h5


  self._warn_if_super_not_called()


Epoch 1/30
[1m 1/98[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12:26[0m 8s/step - accuracy: 0.0312 - loss: 3.5831



[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 901ms/step - accuracy: 0.0345 - loss: 3.5502
Epoch 1: val_accuracy improved from -inf to 0.05413, saving model to /content/cnn_model.h5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 1s/step - accuracy: 0.0347 - loss: 3.5494 - val_accuracy: 0.0541 - val_loss: 3.1793
Epoch 2/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 843ms/step - accuracy: 0.0802 - loss: 3.2217
Epoch 2: val_accuracy improved from 0.05413 to 0.14530, saving model to /content/cnn_model.h5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 955ms/step - accuracy: 0.0804 - loss: 3.2208 - val_accuracy: 0.1453 - val_loss: 2.8496
Epoch 3/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 851ms/step - accuracy: 0.1608 - loss: 2.8378
Epoch 3: val_accuracy improved from 0.14530 to 0.34473, saving model to /content/cnn_model.h5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 962ms/step - accuracy: 0.1609 - loss: 2.8377 - val_accuracy: 0.3447 - val_loss: 2.2875
Epoch 4/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 858ms/step - accuracy: 0.1928 - loss: 2.6560
Epoch 4: val_accuracy improved from 0.34473 to 0.35043, saving model to /content/cnn_model.h5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 971ms/step - accuracy: 0.1929 - loss: 2.6554 - val_accuracy: 0.3504 - val_loss: 1.9576
Epoch 5/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 889ms/step - accuracy: 0.2410 - loss: 2.4545
Epoch 5: val_accuracy improved from 0.35043 to 0.36467, saving model to /content/cnn_model.h5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 992ms/step - accuracy: 0.2410 - loss: 2.4543 - val_accuracy: 0.3647 - val_loss: 1.9623
Epoch 6/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 886ms/step - accuracy: 0.2604 - loss: 2.3861
Epoch 6: val_accuracy improved from 0.36467 to 0.48718, saving model to /content/cnn_model.h5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 1s/step - accuracy: 0.2604 - loss: 2.3859 - val_accuracy: 0.4872 - val_loss: 1.6447
Epoch 7/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 877ms/step - accuracy: 0.3080 - loss: 2.2436
Epoch 7: val_accuracy improved from 0.48718 to 0.50712, saving model to /content/cnn_model.h5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 1s/step - accuracy: 0.3079 - loss: 2.2438 - val_accuracy: 0.5071 - val_loss: 1.6851
Epoch 8/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 886ms/step - accuracy: 0.2951 - loss: 2.2369
Epoch 8: val_accuracy did not improve from 0.50712
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 994ms/step - accuracy: 0.2953 - loss: 2.2366 - val_accuracy: 0.4957 - val_loss: 1.6454
Epoch 9/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 870ms/step - accuracy: 0.3215 - loss: 2.1633
Epoch 9: val_accuracy improved from 0.50712 to 0.54131, saving model to /content/cnn_model.h5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 989ms/step - accuracy: 0.3216 - loss: 2.1632 - val_accuracy: 0.5413 - val_loss: 1.4921
Epoch 10/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 870ms/step - accuracy: 0.3406 - loss: 2.0999
Epoch 10: val_accuracy improved from 0.54131 to 0.57265, saving model to /content/cnn_model.h5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 984ms/step - accuracy: 0.3406 - loss: 2.1000 - val_accuracy: 0.5726 - val_loss: 1.4322
Epoch 11/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 886ms/step - accuracy: 0.3753 - loss: 2.0571
Epoch 11: val_accuracy did not improve from 0.57265
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 998ms/step - accuracy: 0.3752 - loss: 2.0571 - val_accuracy: 0.5499 - val_loss: 1.3661
Epoch 12/30
[1m83/98[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m13s[0m 897ms/step - accuracy: 0.3626 - loss: 2.0330

#**Plot Akurasi & Loss**

In [None]:
acc = history.history.get('accuracy', [])
val_acc = history.history.get('val_accuracy', [])
loss = history.history.get('loss', [])
val_loss = history.history.get('val_loss', [])

epochs_range = range(1, len(acc) + 1)

plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
plt.plot(epochs_range, acc, label='Train Acc')
plt.plot(epochs_range, val_acc, label='Val Acc')
plt.title('Akurasi')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(epochs_range, loss, label='Train Loss')
plt.plot(epochs_range, val_loss, label='Val Loss')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

#**Evaluasi di Test Set**

In [None]:
test_loss, test_acc = model.evaluate(test_gen)
print(f"✅ Test Loss : {test_loss:.4f}")
print(f"✅ Test Acc  : {test_acc:.4f}")

**Kalau mau, bisa juga load ulang best model dari cnn_model.h5 dan evaluasi:**

In [None]:
# (opsional) pakai best model dari file
best_model = tf.keras.models.load_model(BEST_MODEL_PATH)
best_test_loss, best_test_acc = best_model.evaluate(test_gen)
print(f"⭐ BEST MODEL - Test Loss: {best_test_loss:.4f}")
print(f"⭐ BEST MODEL - Test Acc : {best_test_acc:.4f}")

#**Simpan classes.json + Download Model & Classes**

In [None]:
import json
from google.colab import files

# mapping index -> nama kelas
class_indices = train_gen.class_indices           # nama -> index
idx_to_class = {v: k for k, v in class_indices.items()}  # index -> nama

print("Mapping index -> kelas:")
print(idx_to_class)

# Simpan ke JSON
CLASSES_PATH = "/content/classes.json"
with open(CLASSES_PATH, "w") as f:
    json.dump(idx_to_class, f)

print("✅ classes.json disimpan di:", CLASSES_PATH)

# Download ke laptop (untuk di-upload ke GitHub / Streamlit project)
print("⬇️ Download cnn_model.h5 dan classes.json ke komputer kamu...")
files.download(BEST_MODEL_PATH)   # cnn_model.h5
files.download(CLASSES_PATH)     # classes.json