In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import shutil
tujuan = '/content/drive/MyDrive/drive-download-20250607T161536Z-1-001.zip'
sumber = '/content/'
shutil.copy(tujuan, sumber)

'/content/drive-download-20250607T161536Z-1-001.zip'

In [None]:
!unzip /content/drive-download-20250607T161536Z-1-001.zip

Archive:  /content/drive-download-20250607T161536Z-1-001.zip
  inflating: caption_tokenizer.pkl   
  inflating: symptom_tokenizer.pkl   
  inflating: encoder.h5              
  inflating: decoder.h5              


In [None]:
!pip install -q segmentation-models-pytorch albumentations gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.8/154.8 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m102.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m87.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import gradio as gr
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
from PIL import Image
import cv2
import torch
import torchvision.transforms as transforms
import segmentation_models_pytorch as smp

# Load Keras models and tokenizers
inception_model = load_model('/content/inceptionv3_best_val_loss.h5')
encoder_model = load_model('/content/encoder.h5')
decoder_model = load_model('/content/decoder.h5')

with open('/content/symptom_tokenizer.pkl', 'rb') as f:
    symptom_tokenizer = pickle.load(f)
with open('/content/caption_tokenizer.pkl', 'rb') as f:
    caption_tokenizer = pickle.load(f)

symptom_labels = ['sehat', 'berkerak', 'cekung', 'bintik', 'hitam', 'cokelat']
max_caption_len = 20

# Load U-Net model (PyTorch)
device = torch.device("cpu")

unet_model = smp.Unet(
    encoder_name="vgg16_bn",
    encoder_weights='imagenet',
    in_channels=3,
    classes=3,
)
unet_model.load_state_dict(torch.load('/content/unet_model_baru.pth', map_location=device))
unet_model.to(device)
unet_model.eval()

# Image preprocessing for InceptionV3
def preprocess_image(img):
    img = img.resize((299, 299))
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = tf.keras.applications.inception_v3.preprocess_input(img_array)
    return np.expand_dims(img_array, axis=0)

# PyTorch image transform for U-Net
def preprocess_for_unet(img):
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),  # Converts to [0,1] and CHW format
    ])
    return transform(img).unsqueeze(0).to(device)  # Add batch dimension

# Run U-Net segmentation
def segment_image(img):
    # Resize and convert to RGB for display
    original_img = img.resize((256, 256)).convert("RGB")
    img_tensor = preprocess_for_unet(original_img)

    with torch.no_grad():
        output = unet_model(img_tensor)
    mask = torch.argmax(output.squeeze(), dim=0).cpu().numpy()

    # Only get class 1 region
    class1_mask = (mask == 1).astype(np.uint8)  # binary mask: 1 where class==1

    # Find contours (external boundaries only)
    contours, _ = cv2.findContours(class1_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Convert PIL image to OpenCV image (RGB -> BGR)
    overlay = np.array(original_img)
    overlay = cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR)

    # Draw contours in green (BGR: 0,255,0), thickness=2
    cv2.drawContours(overlay, contours, -1, (0, 255, 0), thickness=1)

    # Convert back to PIL (BGR -> RGB)
    overlay_rgb = cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB)
    return Image.fromarray(overlay_rgb)



# Predict symptoms
def predict_symptoms(img_array):
    probs = inception_model.predict(img_array)[0]
    selected = [symptom_labels[i] for i, p in enumerate(probs) if p > 0.5]
    return selected

# Generate caption from symptoms
def generate_caption(symptoms):
    sequence = symptom_tokenizer.texts_to_sequences([' '.join(symptoms)])
    sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen=5, padding='post')

    states = encoder_model.predict(sequence)

    target_seq = np.array([[caption_tokenizer.word_index['startseq']]])
    stop_condition = False
    decoded_sentence = []

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states)

        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = caption_tokenizer.index_word.get(sampled_token_index, '')

        if sampled_word == 'endseq' or len(decoded_sentence) >= max_caption_len:
            stop_condition = True
        else:
            decoded_sentence.append(sampled_word)

        target_seq = np.array([[sampled_token_index]])
        states = [h, c]

    return ' '.join(decoded_sentence)

# Final function for Gradio
def process_image(image):
    img_array = preprocess_image(image)
    symptoms = predict_symptoms(img_array)
    if not symptoms:
        caption = "No symptoms detected."
    else:
        caption = generate_caption(symptoms)
    segmentation = segment_image(image)
    return caption, segmentation

# Gradio Interface
interface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Textbox(label="Caption"),
        gr.Image(type="pil", label="Segmentasi")
    ],
    title="Image Captioning Penyakit Buah Jeruk"
)

interface.launch(debug=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/554M [00:00<?, ?B/s]

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://add921ddf93c1f5d5c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 220ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 852ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 347ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 292ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0