gardio interface

Install dependencies

In [None]:
!pip install ultralytics gradio opencv-python-headless transformers torch torchvision

Gradio interface

In [1]:
# Imports
import gradio as gr
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image, ImageFilter
import torch
from torchvision import transforms
from transformers import ViTForImageClassification
import json

# Load model for segmentation/blur task

model = YOLO(r"C:\Users\Lenovo\Desktop\external project\best.pt")


# Load ViT model for classification
vit_model_path = r"C:\Users\Lenovo\Desktop\external project\vit_model.pth"
vit_model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=38)
vit_model.load_state_dict(torch.load(vit_model_path, map_location=torch.device("cpu")))
vit_model.eval()

# Load class names
with open(r"C:\Users\Lenovo\Desktop\external project\class_names.json", "r") as f:
    class_names = json.load(f)

# Image Transform for ViT
IMG_SIZE = (224, 224)
valid_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Rule-based chatbot
def rule_based_chat(user_input):
    user_input = user_input.lower()
    if "how to treat" in user_input:
        return "Use organic pesticides and remove infected leaves."
    elif "symptoms" in user_input:
        return "Common symptoms include yellowing, spots, and wilting."
    elif "prevention" in user_input:
        return "Avoid overwatering and ensure good air circulation."
    else:
        return "I'm still learning! Try asking about symptoms, treatment, or prevention."

# Image processing with segmentation + ViT classification
def full_pipeline(img):
    image_np = np.array(img)

    # Use for segmentation/blur task
    results = model(image_np)

    # Apply blur everywhere
    pil_img = Image.fromarray(image_np)
    blurred = pil_img.filter(ImageFilter.GaussianBlur(radius=10))

    # Even if detects something or not, we classify the full image using ViT
    input_tensor = valid_transform(pil_img).unsqueeze(0)
    with torch.no_grad():
        outputs = vit_model(input_tensor)
        pred_idx = outputs.logits.argmax(dim=1).item()
        disease_name = class_names[pred_idx]

    if len(results[0].boxes) > 0:
        # Use the first box for simplicity if YOLO detects a disease region
        box = results[0].boxes[0]
        x1, y1, x2, y2 = map(int, box.xyxy[0])

        # Paste the clear region from the original image onto the blurred image
        clear_patch = pil_img.crop((x1, y1, x2, y2))
        blurred.paste(clear_patch, (x1, y1))

    return np.array(blurred), disease_name

# Gradio interface
image_input = gr.Image(type="pil")
text_input = gr.Textbox(label="Ask a question (e.g., 'how to treat it?')")

iface = gr.Interface(
    fn=lambda img, text: [*full_pipeline(img), rule_based_chat(text)],
    inputs=[image_input, text_input],
    outputs=[gr.Image(label="Segmented Image"), gr.Text(label="Predicted Disease"), gr.Text(label="Bot Answer")],
    title="🌿 Plant Disease Detector & Assistant",
    description="Upload an image to detect disease using , then ask plant-care questions."
)

iface.launch(debug=True)


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.



0: 640x640 (no detections), 417.6ms
Speed: 43.9ms preprocess, 417.6ms inference, 17.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 305.9ms
Speed: 10.9ms preprocess, 305.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 353.0ms
Speed: 13.6ms preprocess, 353.0ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Squash Powdery mildew leaf, 314.9ms
Speed: 17.5ms preprocess, 314.9ms inference, 40.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Apple Scab Leaf, 1 Squash Powdery mildew leaf, 293.5ms
Speed: 12.6ms preprocess, 293.5ms inference, 21.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 284.8ms
Speed: 18.4ms preprocess, 284.8ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Squash Powdery mildew leaf, 471.3ms
Speed: 14.9ms preprocess, 471.3ms inference, 2.4ms postprocess per image at shape (1, 



In [None]:
# Imports
import gradio as gr
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image, ImageFilter
import torch
from torchvision import transforms
from transformers import ViTForImageClassification
import json

# Load model for segmentation/blur task
model = YOLO(r"C:\Users\Lenovo\Desktop\external project\best.pt")

# Load ViT model for classification
vit_model_path = r"C:\Users\Lenovo\Desktop\external project\vit_model.pth"
vit_model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=38)
vit_model.load_state_dict(torch.load(vit_model_path, map_location=torch.device("cpu")))
vit_model.eval()

# Load class names
with open(r"C:\Users\Lenovo\Desktop\external project\class_names.json", "r") as f:
    class_names = json.load(f)

# Image Transform for ViT
IMG_SIZE = (224, 224)
valid_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Rule-based chatbot
def rule_based_chat(user_input):
    user_input = user_input.lower()
    if "how to treat" in user_input:
        return "Use organic pesticides and remove infected leaves."
    elif "symptoms" in user_input:
        return "Common symptoms include yellowing, spots, and wilting."
    elif "prevention" in user_input:
        return "Avoid overwatering and ensure good air circulation."
    else:
        return "I'm still learning! Try asking about symptoms, treatment, or prevention."

# Image processing with segmentation + ViT classification
def full_pipeline(img):
    # Resize input to 224x224
    img_resized = img.resize((224, 224))
    image_np = np.array(img_resized)

    # Use for segmentation/blur task
    results = model(image_np)

    # Apply blur everywhere
    blurred = img_resized.filter(ImageFilter.GaussianBlur(radius=10))

    # ViT classification
    input_tensor = valid_transform(img_resized).unsqueeze(0)
    with torch.no_grad():
        outputs = vit_model(input_tensor)
        pred_idx = outputs.logits.argmax(dim=1).item()
        disease_name = class_names[pred_idx]

    if len(results[0].boxes) > 0:
        # Use the first box detected by YOLO
        box = results[0].boxes[0]
        x1, y1, x2, y2 = map(int, box.xyxy[0])

        # Ensure box coordinates are within 224x224 bounds
        x1 = max(0, min(223, x1))
        y1 = max(0, min(223, y1))
        x2 = max(0, min(224, x2))
        y2 = max(0, min(224, y2))

        # Paste the clear patch from original resized image
        clear_patch = img_resized.crop((x1, y1, x2, y2))
        blurred.paste(clear_patch, (x1, y1))

    # Return the 224x224 processed image and disease prediction
    return np.array(blurred), disease_name

# Gradio interface
image_input = gr.Image(type="pil")
text_input = gr.Textbox(label="Ask a question (e.g., 'how to treat it?')")

iface = gr.Interface(
    fn=lambda img, text: [*full_pipeline(img), rule_based_chat(text)],
    inputs=[image_input, text_input],
    outputs=[gr.Image(label="Segmented Image"), gr.Text(label="Predicted Disease"), gr.Text(label="Bot Answer")],
    title="🌿 Plant Disease Detector & Assistant",
    description="Upload an image to detect disease using YOLO, then ask plant-care questions."
)

iface.launch(debug=True)


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.



0: 640x640 (no detections), 420.1ms
Speed: 15.9ms preprocess, 420.1ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 Bell_pepper leaf spots, 2 Bell_pepper leafs, 501.0ms
Speed: 33.0ms preprocess, 501.0ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)
