<a href="https://colab.research.google.com/github/MoizAhmed2517/Calorie-App/blob/main/Converting_model_into_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Exposing vision model into API

In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [None]:
from unsloth import FastVisionModel # FastLanguageModel for LLMs
import torch
model, tokenizer = FastVisionModel.from_pretrained(
    "Moiz2517/Llama3.2-vision-FoodRecipieGenerator-LORA",
    load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.
)
FastVisionModel.for_inference(model)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.19: Fast Mllama patching. Transformers: 4.50.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/375k [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/477 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/5.15k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/210M [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MllamaForConditionalGeneration(
      (vision_model): MllamaVisionModel(
        (patch_embedding): Conv2d(3, 1280, kernel_size=(14, 14), stride=(14, 14), padding=valid, bias=False)
        (gated_positional_embedding): MllamaPrecomputedPositionEmbedding(
          (tile_embedding): Embedding(9, 8197120)
        )
        (pre_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
          (embedding): Embedding(9, 5120)
        )
        (post_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
          (embedding): Embedding(9, 5120)
        )
        (layernorm_pre): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (layernorm_post): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (transformer): MllamaVisionEncoder(
          (layers): ModuleList(
            (0-12): 13 x MllamaVisionEncoderLayer(
              (self_attn): MllamaVisionSdpaAttention(
               

### Sample Inferencing

In [None]:
instruction = f"""
    You are an expert chef.
    The current picture you are observing is an image of a continental dish.
    Your task is to understand the food image and provide the breakdown of the major ingredients with average quantity for 1 person.
    The unit you need to use is in grams.
    For ingredient you need to use simple names. For example, if mozzarella or cheddar cheese is used, call it 'cheese'. If flour is of different type, call it 'flour'.

    In your response only return JSON.
    output = {{
        "food_name": "XXXXXXXXXX",
        "food_components": [
            {{
                "ingredient": "XXXXXX",
                "quantity": "XXXXXXX",
                "unit": "XXXXXXX"
            }},
            ...
        ]
    }}
"""

In [None]:
from PIL import Image
image = Image.open('food.jpg')

In [None]:
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens = False,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1000,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

{'food_components': [{'ingredient': 'paneer', 'quantity': '120', 'unit': 'grams'}, {'ingredient': 'tomato puree', 'quantity': '60', 'unit': 'grams'}, {'ingredient': 'curry paste', 'quantity': '15', 'unit': 'grams'}, {'ingredient': 'butter', 'quantity': '30', 'unit': 'grams'}, {'ingredient': 'cream', 'quantity': '30', 'unit': 'grams'}, {'ingredient': 'water', 'quantity': '100', 'unit': 'grams'}, {'ingredient': 'garam masala', 'quantity': '2', 'unit': 'grams'}, {'ingredient': salt', 'quantity': '1', 'unit': 'gram'}, {'ingredient': 'ginger', 'quantity': '5', 'unit': 'grams'}, {'ingredient': 'garlic', 'quantity': '5', 'unit': 'grams'}], 'food_name': 'butter_chicken'}<|eot_id|>


### Model expose as API for inference

In [None]:
!pip -q install flask uvicorn pyngrok pillow python-multipart

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/62.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google.colab import userdata
ngrok_auth_token = userdata.get('ngrok')

In [None]:
from flask import Flask, request, jsonify
from PIL import Image
import torch
from io import BytesIO
from transformers import TextStreamer
from google.colab import userdata
from pyngrok import ngrok
import json  # safer than eval
import ast
import re

# Assume model and tokenizer are preloaded globally
# model = ...
# tokenizer = ...

app = Flask(__name__)

@app.route("/analyze_food", methods=["POST"])
def analyze_food():
    print("✅ Received request on /analyze_food")
    if "image" not in request.files:
        print("❌ No image provided")
        return jsonify({"error": "No image file provided"}), 400
    try:
        image_file = request.files["image"]
        print(f"📷 Received image: {image_file.filename}")
        image = Image.open(image_file).convert("RGB")
        print("📦 Image converted to RGB")

        instruction = """
        You are an expert chef.
        The current picture you are observing is an image of a continental dish.
        Your task is to understand the food image and provide the breakdown of the major ingredients with average quantity for 1 person.
        Quantity unit MUST BE in 'grams'.
        For ingredient you need to use simple names. For example, if mozzarella or cheddar cheese is used, call it 'cheese'. If flour is of different type, call it 'flour'.

        In your response only return JSON.
        output = {
            "food_name": "XXXXXXXXXX",
            "food_components": [
                {
                    "ingredient": "XXXXXX",
                    "quantity": "XXXXXXX",
                    "unit": "XXXXXXX"
                },
                ...
            ]
        }
        """

        # instruction = """You are an expert chef. You need to observe continental food and provide breakdown of the major ingredient with average quantity for 1 person. Qauntity unit MUST BE in 'grams'."""
        messages = [
            {"role": "user", "content": [
                {"type": "image"},
                {"type": "text", "text": instruction}
            ]}
        ]

        input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
        inputs = tokenizer(
            image,
            input_text,
            add_special_tokens=False,
            return_tensors="pt"
        ).to("cuda")
        print("✅ Message is tokenized")
        text_streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
        output = model.generate(
            **inputs,
            streamer=text_streamer,
            max_new_tokens=1000,
            use_cache=True,
            temperature=1.5,
            min_p=0.1
        )

        decoded = tokenizer.decode(output[0], skip_special_tokens=True)
        print("📝 Raw model output:", decoded)

        match = re.search(r'assistant\s*\n\s*(\{.*\})', decoded, re.DOTALL)
        if match:
            dict_like_str = match.group(1)
            json_compatible_str = dict_like_str.replace("'", '"')
        try:
            parsed_json = json.loads(json_compatible_str)
            print(json.dumps(parsed_json, indent=2))
            response_data = parsed_json
        except Exception as e:
            print("❌ JSON parsing failed:", e)
            raise ValueError("Failed to parse model output into valid JSON.")

        return jsonify(response_data)
    except json.JSONDecodeError as e:
        print("❌ Error during processing:", e)
        return jsonify({"error": str(e)}), 500

# Set up ngrok tunnel
ngrok.set_auth_token(userdata.get('ngrok'))
ngrok_tunnel = ngrok.connect(8020)
print("Public URL:", ngrok_tunnel.public_url)

# 🚨 This runs in foreground: will block the cell but shows all output live
app.run(host="0.0.0.0", port=8020)

Public URL: https://dd20-34-83-180-104.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8020
 * Running on http://172.28.0.12:8020
INFO:werkzeug:[33mPress CTRL+C to quit[0m


✅ Received request on /analyze_food
📷 Received image: food.jpg
📦 Image converted to RGB
✅ Message is tokenized
{'food_components': [{'ingredient': 'butter', 'quantity': '30', 'unit': 'grams'}, {'ingredient': 'onion', 'quantity': '100', 'unit': 'grams'}, {'ingredient': 'tomato', 'quantity': '150', 'unit': 'grams'}, {'ingredient': 'ginger', 'quantity': '10', 'unit': 'grams'}, {'ingredient': 'garlic', 'quantity': '5', 'unit': 'grams'}, {'ingredient': 'chicken', 'quantity': '100', 'unit': 'grams'}, {'ingredient': 'cashew', 'quantity': '20', 'unit': 'grams'}, {'ingredient': 'cream', 'quantity': '20', 'unit': 'grams'}, {'ingredient': 'cinnamon', 'quantity': '1', 'unit': 'grams'}, {'ingredient': 'coriander', 'quantity': '1', 'unit': 'grams'}, {'ingredient': 'cumin', 'quantity': '1', 'unit': 'grams'}, {'ingredient': 'turmeric', 'quantity': '1', 'unit': 'grams'}, {'ingredient': red chilli', 'quantity': '5', 'unit': 'grams'}, {'ingredient': 'cinnamon', 'quantity': '1', 'unit': 'grams'}], 'food_n

INFO:werkzeug:127.0.0.1 - - [13/Apr/2025 09:15:39] "POST /analyze_food HTTP/1.1" 200 -


📝 Raw model output: user


        You are an expert chef.
        The current picture you are observing is an image of a continental dish.
        Your task is to understand the food image and provide the breakdown of the major ingredients with average quantity for 1 person.
        Quantity unit MUST BE in 'grams'.
        For ingredient you need to use simple names. For example, if mozzarella or cheddar cheese is used, call it 'cheese'. If flour is of different type, call it 'flour'.

        In your response only return JSON.
        output = {
            "food_name": "XXXXXXXXXX",
            "food_components": [
                {
                    "ingredient": "XXXXXX",
                    "quantity": "XXXXXXX",
                    "unit": "XXXXXXX"
                },
               ...
            ]
        }
        assistant

{'food_components': [{'ingredient': 'butter', 'quantity': '30', 'unit': 'grams'}, {'ingredient': 'onion', 'quantity': '100', 'unit': 'grams'}, {'ingr

In [None]:
tunnels = ngrok.get_tunnels()
print("Active tunnels:", tunnels)

Active tunnels: []


In [None]:
# Disconnect the first tunnel (or specify the correct one)
for i in range(len(tunnels)):
  ngrok.disconnect(tunnels[i].public_url)
  print("Ngrok tunnel disconnected.")