In [None]:
# Import required libraries
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
import torch

## Download the model

In [None]:
# ----------------------------------------------------------
# Step 1: Authenticate with Hugging Face
# ----------------------------------------------------------
#   1. Create an account at https://huggingface.co/
#   2. Generate a Personal Access Token (PAT) from: Settings → Access Tokens
#   3. Use that token to log in and enable model downloads.
# login(token="PasteYourTokerHere")  

In [None]:
# ----------------------------------------------------------
# Step 2: Select and download a Stable Diffusion model
# ----------------------------------------------------------

# Choose a local directory where the downloaded model files
local_dir = "./stable-code-3b"
model_id = "stabilityai/stable-code-3b"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=True
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float32,   # Use float16 if running on GPU
    use_auth_token=True
)

# ----------------------------------------------------------
# Step 3: Save the downloaded model locally
# ----------------------------------------------------------
tokenizer.save_pretrained(local_dir)
model.save_pretrained(local_dir)

print(f"Model saved locally at: {local_dir}")

## Loading model from Local

In [1]:
# Import required libraries
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Step 1: Define local model path
local_model_dir = "./stable-code-3b"

# Step 2: Quantization config 
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

# Step 3: Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    local_model_dir,
    local_files_only=True
)

# Step 4: Load model
model = AutoModelForCausalLM.from_pretrained(
    local_model_dir,
    quantization_config=bnb_config,
    device_map="auto",
    local_files_only=True
)

print("✅ Stable-Code-3B loaded in 4-bit")
print("Model device:", next(model.parameters()).device)


Loading checkpoint shards: 100%|██████████| 3/3 [00:38<00:00, 12.97s/it]


✅ Stable-Code-3B loaded in 4-bit
Model device: cuda:0


In [None]:
# Provide a prompt for text generation
prompt = '''Write a Python function that checks if a number is prime or not.'''

# Tokenize the prompt and move inputs to the model device (GPU)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate output
with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        max_new_tokens=120,   # length of generated text
        temperature=0.2,      # lower = more deterministic
        do_sample=True        # allow sampling / creativity
    )

# Decode and print the generated text
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Write a Python function that checks if a number is prime or not.

# In[2]:


def is_prime(number):
    if number == 1:
        return False
    for i in range(2, number):
        if number % i == 0:
            return False
    return True


# In[3]:


is_prime(2)


# In[4]:


is_prime(3)


# In[5]:


is_prime(4)


# In[6]:


is_prime(5)


# In[


## Test code

In [30]:
def is_prime(n):
    if n == 1:
        return False
    for i in range(2, n):
        if n % i == 0:
            return False
    return True

is_prime(13)

True