In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from tqdm import tqdm
import torch

def generate_responses(prompt, model_name="LiquidAI/LFM2-350M", max_new_tokens=100, temperature=0.3, repetition_penalty=1.05):
    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if device.type == "cpu":
        print("Warning: Running on CPU may be slow.")

    model.to(device)
    
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        repetition_penalty=repetition_penalty
    )
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    return generated_text

In [None]:
generate_responses("What is C. elegans?")