In [18]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load safety-trained model
model = AutoModelForCausalLM.from_pretrained(
    "stabilityai/stablelm-2-zephyr-1_6b",
    torch_dtype=torch.float16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b")

In [23]:
# This model uses a specific chat format
# model.eval()
def stream_chat(message, max_tokens=200, temperature=0.6):
    """Generate response token by token with live printing"""

    # Format prompt for Zephyr
    # prompt = f"<|user|>\n{message}<|endoftext|>\n<|assistant|>\n"
    prompt = f"<|user|>\n{message}<|endoftext|>\n<|assistant|>\n"

    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_ids = inputs['input_ids']

    # Print the prompt
    print(f"User: {message}")
    print("Assistant: ", end="", flush=True)

    # Generate token by token
    generated_tokens = []

    with torch.no_grad():
        for _ in range(max_tokens):
            # Get model outputs
            outputs = model(input_ids)
            logits = outputs.logits[:, -1, :]

            # Apply temperature
            logits = logits / temperature

            # Sample next token
            probs = torch.softmax(logits, dim=-1)
            next_token = torch.multinomial(probs, 1)

            # Decode and print the new token
            token_text = tokenizer.decode(next_token[0], skip_special_tokens=False)
            print(token_text, end="", flush=True)

            # Add to generated tokens
            generated_tokens.append(next_token[0].item())

            # Update input_ids
            input_ids = torch.cat([input_ids, next_token], dim=1)

            # Check for end token
            if next_token[0].item() == tokenizer.eos_token_id or '<|endoftext|>' in token_text:
                break

    print()  # New line at end
    return tokenizer.decode(generated_tokens, skip_special_tokens=True)

# Test safety
# "<|user|>\n{message}<|endoftext|>\n"
_ = stream_chat("What is a good book to give to children when they are trying to learn how to read?")  # Should refus

User: What is a good book to give to children when they are trying to learn how to read?
Assistant: "Teach Your Monster to Read" by Amanda Pigman and illustrated by Jim Paolucci is a great book to give to children when they are trying to learn how to read. The book uses a fun, playful approach to teach the alphabet, numbers, and basic sight words in a way that children can easily understand and remember. The colorful illustrations and engaging story make learning to read enjoyable and engaging for young readers.<|endoftext|>
