In [1]:
import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer # convert to Harmony response format
from transformers import AutoModelForCausalLM, Mxfp4Config, TrainingArguments
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

In [2]:
dataset = load_dataset("json", data_files="/app/Files/training_set.jsonl", split="train")

In [3]:

dataset[0]

{'messages': [{'role': 'system',
   'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
  {'role': 'user', 'content': 'What is a vector space in linear algebra?'},
  {'role': 'assistant',
   'content': 'A vector space is a collection of vectors where you can add them together and multiply them by scalars, following specific rules.'}]}

In [4]:
dataset[0]['messages']

[{'role': 'system',
  'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
 {'role': 'user', 'content': 'What is a vector space in linear algebra?'},
 {'role': 'assistant',
  'content': 'A vector space is a collection of vectors where you can add them together and multiply them by scalars, following specific rules.'}]

In [5]:
tokenizer = AutoTokenizer.from_pretrained("openai/gpt-oss-20b")

In [6]:
messages = dataset[0]['messages']
convo = tokenizer.apply_chat_template(messages, tokenize=False)

In [7]:
convo

'<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.\nKnowledge cutoff: 2024-06\nCurrent date: 2025-08-24\n\nReasoning: medium\n\n# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>developer<|message|># Instructions\n\nYou are a mathematician who is specialized in linear algebra and also statistics.\n\n<|end|><|start|>user<|message|>What is a vector space in linear algebra?<|end|><|start|>assistant<|channel|>final<|message|>A vector space is a collection of vectors where you can add them together and multiply them by scalars, following specific rules.<|return|>'

In [8]:
quantization_config = Mxfp4Config(dequantize=True)

model_kwargs = dict(
    attn_implementation='eager', 
    torch_dtype=torch.bfloat16,
    quantization_config=quantization_config,
    use_cache=False, 
    device_map='auto')

model = AutoModelForCausalLM.from_pretrained('openai/gpt-oss-20b',**model_kwargs)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [9]:
messages = [{'role':'user', 'content':'what is a vector space?'}]

prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True,return_tensors='pt').to(model.device)

out = model.generate(prompt, max_new_tokens=512)
response = tokenizer.batch_decode(out)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [10]:
response

['<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.\nKnowledge cutoff: 2024-06\nCurrent date: 2025-08-24\n\nReasoning: medium\n\n# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>user<|message|>what is a vector space?<|end|><|start|>assistant<|channel|>analysis<|message|>The user asks: "what is a vector space?" They want a definition, perhaps with explanation. The context: maybe some background: vector space over a field. Provide definition, examples, properties, axioms. Might want simple explanation with example of R^n. Provide formal definition and discussion of key axioms. Also mention linear transformations, subspaces, basis, dimension. The user didn\'t specify any level or application. Likely a basic explanation is suitable. But should we anticipate advanced interest? Provide accessible explanation, with formal definition and intuitive explanation. And mention that vector spaces can be over 

In [11]:
peft_config = LoraConfig(
    r=8,                 
    lora_alpha=16,
    target_modules='all-linear', 
    target_parameters=[
        "7.mlp.experts.gate_up_proj",
        "7.mlp.experts.down_proj",
        "15.mlp.experts.gate_up_proj",
        "15.mlp.experts.down_proj",
        "23.mlp.experts.gate_up_proj",
        "23.mlp.experts.down_proj"
    ]
)

peft_model = get_peft_model(model, peft_config)



In [12]:
filepath = '/app/Files/OSS_Training_Output/'

training_args = TrainingArguments(    
    output_dir=os.path.join(filepath,"gpt-oss-20b-fine-tuning"),
    save_strategy="steps",
    save_steps=50,
    fp16=True,  
    learning_rate=2e-4,
    gradient_checkpointing=True,
    num_train_epochs=1,
    logging_steps=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4, 
    warmup_ratio=0.03,
    lr_scheduler_type="cosine_with_min_lr",
    lr_scheduler_kwargs={"min_lr_rate":0.1},
    report_to="none",
)

In [13]:
trainer = SFTTrainer(
    model=peft_model,
    args=training_args,
    train_dataset=dataset,
    processing_class=tokenizer
)

In [None]:
trainer.train()

Step,Training Loss
1,4.0187
2,3.9725
3,3.5964
4,3.0796
5,2.6557
6,2.267
7,1.8935
8,1.621
9,1.4731
10,1.2935
