In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr

# مدل و توکنایزر
model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# پیام‌های اولیه مکالمه
def format_messages(user_prompt):
    system_message = "<|im_start|>system\n<|im_end|>\n"
    user_message = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
    assistant_prefix = "<|im_start|>assistant\n"
    full_prompt = system_message + user_message + assistant_prefix
    return full_prompt

# تابع چت
def chat(user_input):
    prompt = format_messages(user_input)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # پاسخ مدل بعد از آخرین <|im_start|>assistant
    if "<|im_start|>assistant" in response:
        response = response.split("<|im_start|>assistant")[-1].strip()
    if "<|im_end|>" in response:
        response = response.split("<|im_end|>")[0].strip()
    
    return response

# رابط گرافیکی Gradio
interface = gr.Interface(
    fn=chat,
    inputs=gr.Textbox(lines=3, placeholder="پیام خود را وارد کنید..."),
    outputs="text",
    title="💬 SmolLM2 Chatbot",
    description="مدل سبک و مکالمه‌محور SmolLM2 از Hugging Face با فرمت قالب رسمی"
)

interface.launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


