Connect GPU

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Import Libraries

In [None]:
import torch
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration
from peft import PeftModel

Model Path with Fine-Tuned Adaptor

In [None]:
base_model_path = "/storage/home/sriramk/BTP_sriramk/pixtral/models--mistral-community--pixtral-12b/snapshots/c2756cbbb9422eba9f6c5c439a214b0392dfc998"
adapter_path = "/storage/home/sriramk/BTP_sriramk/trained/pixtral-lora-finetuned-2"
test_image_path = "/storage/home/sriramk/BTP_sriramk/1D_networks/rates/network_107.png" # Replace with a test image

Load Model and Processor

In [None]:
# Load Base Model
model = LlavaForConditionalGeneration.from_pretrained(
    base_model_path,
    torch_dtype=torch.bfloat16,
    device_map="auto"  # Automatically splits across GPUs if needed, or puts on GPU 0
)

# Load and Merge the LoRA Adapter
print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(model, adapter_path)

# Load Processor (Loaded from the fine-tuned path to ensure config matches)
print("Loading processor...")
processor = AutoProcessor.from_pretrained(adapter_path)

Define Inference Function

In [None]:
def run_inference(image_path, prompt_text):
    image = Image.open(image_path).convert("RGB")

    messages = [
        {
            "role": "user", 
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image", "image": image}
            ]
        }
    ]

    # Apply template
    text = processor.apply_chat_template(messages, add_generation_prompt=True)

    # Prepare inputs and move to device
    inputs = processor(text=text, images=image, return_tensors="pt").to(model.device)
    inputs["pixel_values"] = inputs["pixel_values"].to(model.dtype)
    
    # Generate
    print("Generating response...")
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs, 
            max_new_tokens=1000,
            do_sample=False,        # Change to False for JSON/Code
            # temperature=0.1,      # Only needed if do_sample=True
            repetition_penalty=1.2,
            pad_token_id=processor.tokenizer.eos_token_id
        )

    # Decode Output
    output_text = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
    
    return output_text

Output

In [None]:
# 6. Run Test
prompt = "Analyze the diagram and extract nodes, edges, and generic symbolic ODEs. Also specify if the network is valid or not."
response = run_inference(test_image_path, prompt)

print("\nResult:")
print("-" * 30)
print(response)
print("-" * 30)

Loading base model...


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

Loading LoRA adapter...
Loading processor...
Generating response...

Result:
------------------------------
{"nodes": [{"id": "A"}], "edges": [{"source": "A", "target": "φ", "type": "inhibition", "label": "k1"}, {"source": "A", "target": "A", "type": "inhibition", "label": "k2"}, {"source": "A", "target": "φ", "type": "inhibition", "label": "k3"}, {"source": "φ", "target": "A", "type": "activation", "label": "k4"}], "valid": false}

The diagram contains one node A with various connections:

- There are inhibitory interactions (represented by red lines) from A to φ (labeled k1 and k3).
- There's an inhibitory interaction from A back to itself (self-inhibition labeled k2).
- There’s an activation interaction (represented by a black line) from φ to A (labeled k4).

Given these observations:
- The presence of self-inhibitory loop (k2) indicates that A inhibits its own production.
- Inhibitory regulation from A to both φ (k1 and k3), combined with activation feedback from φ to A (k4), sugge