# GPT-OSS Weight Adjustment
Run Cell 0 first to clear disk, then proceed.

In [None]:
# Cell 0: Clear disk space
!rm -rf ~/.cache/huggingface
!rm -rf /content/gpt-oss-20b
!rm -rf /content/hf_cache
!df -h

In [2]:
# Cell 1: Install (RESTART RUNTIME AFTER THIS)
!pip install --upgrade -qqq uv
try: import numpy; get_numpy = f"numpy=={numpy.__version__}"
except: get_numpy = "numpy"
!uv pip install -qqq \
    "torch>=2.8.0" "triton>=3.4.0" {get_numpy} torchvision bitsandbytes "transformers==4.56.2" \
    "unsloth_zoo[base] @ git+https://github.com/unslothai/unsloth-zoo" \
    "unsloth[base] @ git+https://github.com/unslothai/unsloth" \
    git+https://github.com/triton-lang/triton.git@05b2c186c1b6c9a08375389d5efe9cb4c401c075#subdirectory=python/triton_kernels
!uv pip install --upgrade --no-deps transformers==4.56.2 tokenizers
print("\n*** RESTART RUNTIME NOW ***")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.3/23.3 MB[0m [31m119.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h[2mUsing Python 3.12.12 environment at: /usr[0m
[2K[2mResolved [1m2 packages[0m [2min 15ms[0m[0m                                          [0m
[2mAudited [1m2 packages[0m [2min 0.22ms[0m[0m

*** RESTART RUNTIME NOW ***


In [None]:
# Cell 2: Check GPU + disk
!nvidia-smi
!df -h

In [None]:
# Cell 3: Load model
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/gpt-oss-20b",
    max_seq_length=512,
    load_in_4bit=True,
    offload_embedding=True,
)
print("Model loaded.")

In [None]:
# Cell 4: Check GPU after load
!nvidia-smi

In [None]:
# Cell 5: See weight names
for name, param in model.named_parameters():
    print(name, param.shape, param.dtype)

In [None]:
# Cell 6: Adjust weights (norms are not quantized)
import torch
layer = 0
with torch.no_grad():
    for name, param in model.named_parameters():
        if f"layers.{layer}" in name and "norm" in name:
            param.data *= 0.9
            print(f"Adjusted: {name}")
print("Done.")

Adjusted: model.layers.0.input_layernorm.weight
Adjusted: model.layers.0.post_attention_layernorm.weight
Done.


In [None]:
# Cell 7: Test inference
prompt = "Hello, how are you?"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0]))