In [1]:
import sys
print(sys.executable)
print(sys.version)


/app/cloned_repo/LLM-World/.venv/bin/python
3.11.14 (main, Dec 17 2025, 21:07:37) [Clang 21.1.4 ]


In [2]:
!nvidia-smi

Fri Jan  9 16:05:15 2026       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.161.08             Driver Version: 535.161.08   CUDA Version: 12.6     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          On  | 00000000:07:00.0 Off |                    0 |
| N/A   28C    P0              59W / 400W |  40025MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM4-40GB          On  | 00000000:0F:00.0 Off |  

In [3]:
from transformers import BitsAndBytesConfig
import os
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


# Data

In [4]:
!pwd

/app/cloned_repo/LLM-World/Notebooks


In [5]:
trn_data = %pwd
trn_data = trn_data.replace('Notebooks','Files/training_set.jsonl')
val_data = trn_data.replace('training_set','validation_set')
print(trn_data)      
print(val_data)      
print(os.path.exists(trn_data))
print(os.path.exists(val_data))


/app/cloned_repo/LLM-World/Files/training_set.jsonl
/app/cloned_repo/LLM-World/Files/validation_set.jsonl
True
True


In [6]:
## Convert to huggingface dataset

data = load_dataset("json", data_files={
    "train":trn_data,
    "valid":val_data
})
data
                    

DatasetDict({
    train: Dataset({
        features: ['messages'],
        num_rows: 409
    })
    valid: Dataset({
        features: ['messages'],
        num_rows: 20
    })
})

In [7]:
data['train'][:5]

{'messages': [[{'role': 'system',
    'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
   {'role': 'user', 'content': 'What is a vector space in linear algebra?'},
   {'role': 'assistant',
    'content': 'A vector space is a collection of vectors where you can add them together and multiply them by scalars, following specific rules.'}],
  [{'role': 'system',
    'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
   {'role': 'user', 'content': 'Can you explain eigenvalues in simple terms?'},
   {'role': 'assistant',
    'content': 'Eigenvalues are special numbers that show how a matrix stretches or shrinks vectors along certain directions.'}],
  [{'role': 'system',
    'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
   {'role': 'user',
    'content': 'What is the difference between variance and standard deviation?'},
   {'role': 'assistant',
    

In [8]:
data['valid'][:5]

{'messages': [[{'role': 'system',
    'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
   {'role': 'user',
    'content': 'What is the difference between matrix multiplication and element-wise multiplication?'},
   {'role': 'assistant',
    'content': 'Matrix multiplication involves dot products between rows and columns, while element-wise multiplication multiplies corresponding entries directly.'}],
  [{'role': 'system',
    'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
   {'role': 'user', 'content': 'What is an orthogonal matrix?'},
   {'role': 'assistant',
    'content': 'An orthogonal matrix is a square matrix whose rows and columns are orthonormal vectors. Its inverse is the same as its transpose.'}],
  [{'role': 'system',
    'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
   {'role': 'user', 'content': 'What is covariance in statisti

In [9]:
data['train'][0]['messages']

[{'role': 'system',
  'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
 {'role': 'user', 'content': 'What is a vector space in linear algebra?'},
 {'role': 'assistant',
  'content': 'A vector space is a collection of vectors where you can add them together and multiply them by scalars, following specific rules.'}]

# Model

In [10]:
# https://huggingface.co/ibm-granite/granite-4.0-h-1b

In [11]:
from huggingface_hub import login
from dotenv import load_dotenv

In [12]:
load_dotenv('/app/cloned_repo/LLM-World/.env')

True

In [13]:
login(token=os.getenv('HF_TOKEN'))

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [14]:
file_dir = %pwd
outp_dir = file_dir.replace('Notebooks','Files/sm_output')
outp_dir

'/app/cloned_repo/LLM-World/Files/sm_output'

In [15]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import get_peft_model, LoraConfig, TaskType

In [16]:
model_id = "ibm-granite/granite-4.0-h-1b"

tokenizer = AutoTokenizer.from_pretrained(model_id) # Load Tokenizer
model = AutoModelForCausalLM.from_pretrained(model_id) # Load base model

The fast path is not available because one of `(selective_state_update, causal_conv1d_fn, causal_conv1d_update)` is None. Falling back to the naive implementation. To install follow https://github.com/state-spaces/mamba/#installation and https://github.com/Dao-AILab/causal-conv1d


In [17]:
## Serialization (dict -> str) for ability for model to read
def serialize_message(input_example):
    chat_str = ""
    for message in input_example['messages']:
        role = message['role']
        content = message['content']
        if role == "system":
            chat_str += f"System: {content}\n"
        elif role == "user":
            chat_str += f"User: {content}\n"
        elif role == "assistant":
            chat_str += f"Assistant: {content}\n"
    input_example["text"] = chat_str
    return input_example
        

In [18]:
## Tokenize the data
def tokenize_text(example):
    return tokenizer(
        example['text'],
        padding='max_length',
        truncation=True,
        max_length=512)

In [19]:
data = data.map(serialize_message)
data

DatasetDict({
    train: Dataset({
        features: ['messages', 'text'],
        num_rows: 409
    })
    valid: Dataset({
        features: ['messages', 'text'],
        num_rows: 20
    })
})

In [20]:
data['train'][0]

{'messages': [{'role': 'system',
   'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
  {'role': 'user', 'content': 'What is a vector space in linear algebra?'},
  {'role': 'assistant',
   'content': 'A vector space is a collection of vectors where you can add them together and multiply them by scalars, following specific rules.'}],
 'text': 'System: You are a mathematician who is specialized in linear algebra and also statistics.\nUser: What is a vector space in linear algebra?\nAssistant: A vector space is a collection of vectors where you can add them together and multiply them by scalars, following specific rules.\n'}

In [21]:
data = data.map(tokenize_text,batched=True)
data

DatasetDict({
    train: Dataset({
        features: ['messages', 'text', 'input_ids', 'attention_mask'],
        num_rows: 409
    })
    valid: Dataset({
        features: ['messages', 'text', 'input_ids', 'attention_mask'],
        num_rows: 20
    })
})

In [22]:
data['train'][0]

{'messages': [{'role': 'system',
   'content': 'You are a mathematician who is specialized in linear algebra and also statistics.'},
  {'role': 'user', 'content': 'What is a vector space in linear algebra?'},
  {'role': 'assistant',
   'content': 'A vector space is a collection of vectors where you can add them together and multiply them by scalars, following specific rules.'}],
 'text': 'System: You are a mathematician who is specialized in linear algebra and also statistics.\nUser: What is a vector space in linear algebra?\nAssistant: A vector space is a collection of vectors where you can add them together and multiply them by scalars, following specific rules.\n',
 'input_ids': [100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,
  100256,


In [23]:
# HugingFace Trainer (Basic setup)
from transformers import Trainer, TrainingArguments

train_args = TrainingArguments(
    output_dir=outp_dir,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=8,
    eval_strategy="steps",
    save_steps=500,
    logging_steps=100,
    num_train_epochs=3,
    learning_rate=2e-5,
    # bf16=True,
    no_cuda=True, # dont use gpu when setting up variables If not using LoRA
    # use_cpu=True, # dont use gpu when setting up variables If not using LoRA
    fp16=True,
    push_to_hub=False,
    hub_model_id=None,
    hub_token=None
)

# better for training 1B+ OOM error possible without
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj","v_proj"]
)

model = get_peft_model(model,lora_config)

trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=data['train'],
    eval_dataset=data['valid'],
    processing_class=tokenizer
)



In [24]:
save_dir = file_dir.replace('Notebooks','Files/sm_artifacts')
save_dir

'/app/cloned_repo/LLM-World/Files/sm_artifacts'

In [None]:
trainer.train()
trainer.save_model(save_dir)
trainer.tokenizer.save_pretrained(save_dir)

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

  3


[34m[1mwandb[0m: You chose "Don't visualize my results"


[34m[1mwandb[0m: Detected [huggingface_hub.inference, mcp] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


* Trackio project initialized: huggingface
* Trackio metrics will be synced to Hugging Face Dataset: marfok/trackio-dataset
* Creating new space: https://huggingface.co/spaces/marfok/trackio
* View dashboard by going to: https://marfok-trackio.hf.space/


* Created new run: marfok-1767974757


GraniteMoeHybrid requires an initialized `HybridMambaAttentionDynamicCache` to return a cache. Because one was not provided, no cache will be returned.
