# install the needed libaries

In [2]:
##### %%capture

# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
!pip install torch==2.3.0+cu121 torchvision==0.18.0+cu121 torchaudio==2.3.0+cu121 -f https://download.pytorch.org/whl/torch_stable.html

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[kaggle-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-osumxkue/unsloth_449c26e5080f47b78e9667d430b7dae8
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-osumxkue/unsloth_449c26e5080f47b78e9667d430b7dae8
  Resolved https://github.com/unslothai/unsloth.git to commit f26d4e739ed507de7a9088da53d10fd02f58d160
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting bitsandbytes>=0.43.3 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[kaggle-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting unsloth-zoo>=2024.11.1 (from unsloth@ git+http

In [8]:
%%capture
!pip install mlflow pyngrok

# import the needed libaries

In [9]:
from unsloth import FastLanguageModel
import torch

In [10]:
from unsloth import is_bfloat16_supported


In [None]:
import mlflow
import mlflow.pytorch
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from transformers import TrainerCallback
import os
from accelerate import Accelerator
import re
from pyngrok import ngrok
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
import time
from trl import  DataCollatorForCompletionOnlyLM

# Data Preparation

In [None]:
train_prompt = """Below is a description for a time series data. Write a response that gives the name of the best fitting machine learning algorithm in one word without explanation.
The best algorithm name should be one of this search space algorithms: AdaboostRegressor, ElasticNetRegressor,  ExtraTreesRegressor,  LassoRegressor,  LightgbmRegressor, SVR, GaussianProcessRegressor, RandomForestRegressor or  XGBoostRegressor.

### DESCRIPTION:
{}

### RESPONSE:
{}"""




def formatting_prompts_func(examples,EOS_TOKEN):

    inputs       = examples["series_description"]

    outputs      = examples["algorithm"]

    texts = []

    for input, output in zip( inputs, outputs):

        # Must add EOS_TOKEN, otherwise your generation will go on forever!

        text = train_prompt.format( input, output) + EOS_TOKEN

        texts.append(text)

    return { "text" : texts}

In [13]:
dataset = load_dataset('csv', data_files="/kaggle/input/regression-univariate-train/Regression_Univariate_train.csv")

dataset

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['dataset_name', 'series_description', 'algorithm', 'hyperparameters'],
        num_rows: 828
    })
})

In [14]:
train_data = dataset['train']

# Step 1: Split into 80% train and 20% remaining (val + test)
train_valid_test_split = train_data.train_test_split(test_size=0.2, seed=42)
train_data = train_valid_test_split['train']
remaining_data = train_valid_test_split['test']

# Step 2: Split the remaining data into 50% validation and 50% test
valid_test_split = remaining_data.train_test_split(test_size=0.5, seed=42)
valid_data = valid_test_split['train']
test_data = valid_test_split['test']



# define and train the model

In [15]:

max_seq_length = 2048# Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-2b",
    max_seq_length # Add LoRA adapters so we only need to update 1 to 10% of all parameters!
= max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2024.11.7: Fast Gemma patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.01G [00:00<?, ?B/s]

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


generation_config.json:   0%|          | 0.00/154 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/40.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

In [16]:
tokenizer.padding_side = 'right'
tokenizer.add_eos_token = True
tokenizer.pad_token = tokenizer.eos_token
tokenizer_config = {
            "Tokenizer": tokenizer.__class__.__name__,
            "padding_side": tokenizer.padding_side,
            "add_eos_token": tokenizer.add_eos_token,
            "pad_token": tokenizer.pad_token
        }

In [17]:
instruction_template="DESCRIPTION:"

# Set MLflow's tracking URI and experiment
mlflow.set_tracking_uri("https://5bb1-156-204-128-49.ngrok-free.app")
mlflow.set_experiment("best model gamma-2b")
mlflow.pytorch.autolog(disable=True)



In [18]:
class MLFlowLoggingCallback(TrainerCallback):
    def __init__(self):
        self.training_loss = []
        self.eval_loss = []

    def on_train_begin(self, args, state, control, **kwargs):
        # Initialize logging at the start of training
        print("Training started.")

    def on_log(self, args, state, control, logs=None, **kwargs):
        # Log training and evaluation losses
        if 'loss' in logs:
            self.training_loss.append(logs['loss'])
            mlflow.log_metric("training_loss", logs['loss'], step=state.global_step)

        if 'eval_loss' in logs:
            self.eval_loss.append(logs['eval_loss'])
            mlflow.log_metric("validation_loss", logs['eval_loss'], step=state.global_step)

        if 'eval_f1' in logs:
            mlflow.log_metric("validation_f1", logs['eval_f1'], step=state.global_step)

    def on_train_end(self, args, state, control, **kwargs):
        # Log final results at the end of training
        print("Training completed.")

In [19]:
def configure_and_train_model(
    r=128,
    lora_alpha=16,
    lora_dropout=0.1,
    use_gradient_checkpointing="none",
    random_state=2048,
    use_rslora=False,
    loftq_config=None,
    train_dataset=None,
    valid_dataset=None,
    batch_size=2,
    grad_accum_steps=16,
    warmup_steps=5,
    max_steps=-1,
    learning_rate=2e-3,
    weight_decay=0.01,
    lr_scheduler_type="linear",
    output_dir="outputs"
):
    global model
    # Load model and tokenizer with specified settings
    lora_config = {
        "r": r,
        "lora_alpha": lora_alpha,
        "lora_dropout": lora_dropout,
        "use_gradient_checkpointing": use_gradient_checkpointing,
        "random_state": random_state,
        "use_rslora": use_rslora,
        "loftq_config": loftq_config,
    }
    with mlflow.start_run():
      mlflow.set_tag("model_name", "gamma-2b")
      # Format datasets with EOS token
      train_dataset = train_dataset.map(
            lambda batch: formatting_prompts_func(batch, EOS_TOKEN=tokenizer.eos_token),
            batched=True
        )
      valid_dataset = valid_dataset.map(
            lambda batch: formatting_prompts_func(batch, EOS_TOKEN=tokenizer.eos_token),
            batched=True
        )

      # Configure PEFT model
      model = FastLanguageModel.get_peft_model(
          model,
          r=r,
          target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
          lora_alpha=lora_alpha,
          lora_dropout=lora_dropout,
          bias="none",
          use_gradient_checkpointing=use_gradient_checkpointing,
          random_state=random_state,
          use_rslora=use_rslora,
          loftq_config=loftq_config
      )
    

      # Training arguments
      training_args = TrainingArguments(
          per_device_train_batch_size=batch_size,
          gradient_accumulation_steps=grad_accum_steps,
          warmup_steps=warmup_steps,
          max_steps=max_steps,
          learning_rate=learning_rate,
          fp16=not is_bfloat16_supported(),
          bf16=is_bfloat16_supported(),
          logging_steps=1,
          optim="adamw_8bit",
          weight_decay=weight_decay,
          lr_scheduler_type=lr_scheduler_type,
          seed=random_state,
          output_dir=output_dir,
          eval_strategy="steps",
          save_strategy="epoch"
      )
      mlflow.log_params(vars(training_args))

      mlflow.log_params(tokenizer_config)
      mlflow.log_params(lora_config)
      # Trainer setup
      trainer = SFTTrainer(
          model=model,
          tokenizer=tokenizer,
          train_dataset=train_dataset,
          eval_dataset=valid_dataset,
          dataset_text_field="text",
          max_seq_length=2048,
          dataset_num_proc=2,
          packing=False,
          args=training_args
          )
      # Initialize the callback
      mlflow_callback = MLFlowLoggingCallback()

      # Train the model with the callback
      trainer.add_callback(mlflow_callback)
      # Train the model
      trainer.train()
      return model

In [20]:

#@title Show current memory stats

gpu_stats = torch.cuda.get_device_properties(0)

start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)

max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)

print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")

print(f"{start_gpu_memory} GB of memory reserved.")


GPU = Tesla T4. Max memory = 14.741 GB.
2.305 GB of memory reserved.


In [21]:
# Configure and train the model with specified parameters
model = configure_and_train_model(
    r=128,                 # LoRA rank parameter
    lora_alpha=16,        # Scaling factor for LoRA layers
    lora_dropout=0.1,       # Set dropout to 0 (optimized)
    use_gradient_checkpointing="none",  # Memory optimization
    random_state=3407,    # Seed for reproducibility
    use_rslora=False,     # Disable rank-stabilized LoRA
    loftq_config=None,    # Leave as None for default
    train_dataset=train_data,
    valid_dataset=valid_data,
    batch_size=2,         # Set batch size for training
    grad_accum_steps=16,   # Gradient accumulation steps
    warmup_steps=10,       # Warmup steps for learning rate scheduler
    max_steps=150,         # Total steps for quick testing
    learning_rate=2e-3,   # Learning rate
    weight_decay=0.01,    # Weight decay for optimizer
    lr_scheduler_type="linear",  # Learning rate scheduler type
    output_dir="outputs"  # Directory for saving results
)

Map:   0%|          | 0/662 [00:00<?, ? examples/s]

Map:   0%|          | 0/83 [00:00<?, ? examples/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2024.11.7 patched 18 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


Map (num_proc=2):   0%|          | 0/662 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/83 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 662 | Num Epochs = 8
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 16
\        /    Total batch size = 32 | Total steps = 150
 "-____-"     Number of trainable parameters = 156,893,184
2024/11/16 22:18:54 ERROR mlflow.utils.async_logging.async_logging_queue: Run Id b735ae6d403e4f39bbbfc67860b77b58: Failed to log run data: Exception: INVALID_PARAMETER_VALUE: Changing param values is not allowed. Params were already logged='[{'key': 'logging_strategy', 'old_value': 'IntervalStrategy.STEPS', 'new_value': 'steps'}, {'key': 'save_strategy', 'old_value': 'IntervalStrategy.EPOCH', 'new_value': 'epoch'}, {'key': 'accelerator_config', 'old_value': 'AcceleratorConfig(split_batches=False, dispatch_batches=None, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=No

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113925900003273, max=1.0…

Training started.


Step,Training Loss,Validation Loss
1,2.4728,2.485648
2,2.4705,2.388825
3,2.3825,2.17122
4,2.1516,1.922029
5,1.9125,1.614556
6,1.6207,1.273228
7,1.2831,1.013885
8,1.0174,0.914271
9,0.9232,0.856998
10,0.8705,0.815456


Training completed.


2024/11/16 23:39:33 INFO mlflow.tracking._tracking_service.client: 🏃 View run stylish-cod-396 at: https://5bb1-156-204-128-49.ngrok-free.app/#/experiments/1/runs/b735ae6d403e4f39bbbfc67860b77b58.
2024/11/16 23:39:33 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://5bb1-156-204-128-49.ngrok-free.app/#/experiments/1.


In [22]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

Peak reserved memory = 11.752 GB.
Peak reserved memory for training = 9.447 GB.
Peak reserved memory % of max memory = 79.723 %.
Peak reserved memory for training % of max memory = 64.087 %.


# inference

In [23]:
test_prompt = """Below is a description for a time series data. Write a response that gives the name of the best fitting machine learning algorithm in one word without explanation.
The best algorithm name should be one of this search space algorithms: AdaboostRegressor, ElasticNetRegressor,  ExtraTreesRegressor,  LassoRegressor,  LightgbmRegressor, SVR, GaussianProcessRegressor, RandomForestRegressor or  XGBoostRegressor.

### DESCRIPTION:
{}

### RESPONSE:"""


def formatting_test_prompts_func(examples):
    global tokenizer

    inputs = examples["series_description"]
    texts = []
    for input in  inputs:
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = test_prompt.format( input)
        texts.append(text)
    return { "text" : texts }

In [24]:
test_dataset = test_data.map(formatting_test_prompts_func, batched = True)
test_dataset

Map:   0%|          | 0/83 [00:00<?, ? examples/s]

Dataset({
    features: ['dataset_name', 'series_description', 'algorithm', 'hyperparameters', 'text'],
    num_rows: 83
})

In [25]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[test_dataset['text'][0]], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<bos>Below is a description for a time series data. Write a response that gives the name of the best fitting machine learning algorithm in one word without explanation.\nThe best algorithm name should be one of this search space algorithms: AdaboostRegressor, ElasticNetRegressor,  ExtraTreesRegressor,  LassoRegressor,  LightgbmRegressor, SVR, GaussianProcessRegressor, RandomForestRegressor or  XGBoostRegressor.\n\n### DESCRIPTION:\nA univariate time-series dataset  consists of 48 samples with a missing values percentage of 0.0% imputed using FBProphet model and 0.0% detected outliers. The target series has a sampling rate of 1440 minutes, minimum value of -1.0577518085465023, maximum value of 0.5039540640878073, median value of -0.5925601507310199, mean value of -0.49076897775897593, and average standard deviation of 0.21373626312059368 for the 10 percentiles. The series is detected as non-stationary using dickey fuller testand it turns into a stationary series using first order diff

In [27]:
inputs = tokenizer(
[test_dataset['text'][1]], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 5, use_cache = True)
tokenizer.batch_decode(outputs)

['<bos>Below is a description for a time series data. Write a response that gives the name of the best fitting machine learning algorithm in one word without explanation.\nThe best algorithm name should be one of this search space algorithms: AdaboostRegressor, ElasticNetRegressor,  ExtraTreesRegressor,  LassoRegressor,  LightgbmRegressor, SVR, GaussianProcessRegressor, RandomForestRegressor or  XGBoostRegressor.\n\n### DESCRIPTION:\nA univariate time-series dataset  consists of 81 samples with a missing values percentage of 0.0% imputed using FBProphet model and 0.0% detected outliers. The target series has a sampling rate of 44640 minutes, minimum value of 3523.548387096774, maximum value of 6434.0, median value of 4978.387096774193, mean value of 4995.464271746457, and average standard deviation of 0.06947531163920204 for the 10 percentiles. The series is detected as stationary using dickey fuller test.The series has 7 significant lags observed using the partial autocorrelation func

In [28]:
test_responses=[]
# get all test data inference result
for test_prompt in test_dataset['text']:
  inputs= tokenizer(
  [test_prompt], return_tensors = "pt").to("cuda")

  outputs = model.generate(**inputs, max_new_tokens = 5, use_cache = True)
  test_responses.append(tokenizer.batch_decode(outputs))


In [29]:
# Convert the dataset to a Pandas DataFrame
df = test_dataset.to_pandas()
df['model_responses']= test_responses
df.to_csv('test_model_result_unsloth.csv', index=False)

In [38]:
predictions = []
for response in test_responses:
    result = response[0].split('\n\n### RESPONSE:')[1].split('</s>')[0].strip()
    result = result.replace('\n', '').replace('.', '')  # Remove \n and .
    if result == '<eos>LIGHTgbmRegressor':
        result = '<eos>LightgbmRegressor'
    elif result == '<eos>ADABoostRegressor':
        result = '<eos>AdaboostRegressor'
        
    predictions.append(result)

predictions


['<eos>XGBoostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>LightgbmRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>LightgbmRegressor',
 '<eos>XGBoostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>XGBoostRegressor',
 '<eos>AdaboostRegressor',
 '<eos>XGBoost

In [39]:
import re

def extract_model_name(prediction):
    """
    Extract the model name from a prediction string using regex.
    Corrects 'ADABoostRegressor' to 'AdaBoostRegressor'.
    """
    # Adjust regex to match model names more precisely
    match = re.search(r"\b[A-Za-z]+(?:[A-Za-z0-9]*)\b", prediction)
    if match:
        model_name = match.group()
        return model_name
    return None  # Return None if no match is found

# Apply the extraction to all predictions
extracted_predictions = [extract_model_name(pred.split('<eos>')[-1].strip()) for pred in predictions]

# Print the extracted model names
print(extracted_predictions)


['XGBoostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'XGBoostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'LightgbmRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'XGBoostRegressor', 'LightgbmRegressor', 'XGBoostRegressor', 'XGBoostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'XGBoostRegressor', 'AdaboostRegressor', 'XGBoostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'XGBoostRegressor', 'XGBoostRegressor', 'XGBoostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'XGBoostRegressor', 'XGBoostRegressor', 'XGBoostRegressor', 'AdaboostRegressor', 'XGBoostRegressor', 'AdaboostRegressor', 'XGBoostRegressor', 'LightgbmRegressor', 'LightgbmRegressor', 'XGBoostRegressor', 'AdaboostRegressor', 'AdaboostRegressor', 'ElasticNetRegressor', 'LightgbmRegressor', 'LightgbmRegressor', 'Adab

In [40]:
len(extracted_predictions)

83

In [41]:
actual_data= df['algorithm']
len(actual_data)

83

In [42]:
actual_data

0     GaussianProcessRegressor
1     GaussianProcessRegressor
2     GaussianProcessRegressor
3          ElasticNetRegressor
4             XGBoostRegressor
                ...           
78         ElasticNetRegressor
79            XGBoostRegressor
80           AdaboostRegressor
81           AdaboostRegressor
82              LassoRegressor
Name: algorithm, Length: 83, dtype: object

In [43]:
from sklearn.metrics import f1_score

# Assuming `predictions` and `actual_data` are lists or arrays of labels
f1 = f1_score(actual_data, extracted_predictions, average='micro')  # Use 'macro' or 'micro' as needed


print("F1 Score:", f1)


F1 Score: 0.1566265060240964


In [44]:
from sklearn.metrics import accuracy_score
# Calculate the accuracy
accuracy = accuracy_score(actual_data, extracted_predictions)

# Print the accuracy
print("Accuracy:", accuracy)

Accuracy: 0.1566265060240964


# save tuned model

To save the final model as LoRA adapters

In [45]:
# Local saving

model.save_pretrained("lora_model")

tokenizer.save_pretrained("lora_model")


('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.model',
 'lora_model/added_tokens.json',
 'lora_model/tokenizer.json')

In [46]:
# Online saving on HF
from huggingface_hub import login

new_model_adabtor= "RanaHossny213/gamma_tuned-2b"
login(token="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")  # Use your Hugging Face token

# Push the model and tokenizer to the Hugging Face hub
model.push_to_hub(new_model_adabtor)
tokenizer.push_to_hub(new_model_adabtor)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


README.md:   0%|          | 0.00/576 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/628M [00:00<?, ?B/s]

Saved model to https://huggingface.co/RanaHossny213/gamma_tuned-2b


No files have been modified since last commit. Skipping to prevent empty commit.


In [47]:
# Save and Merge to 4bit

model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit_forced",token = "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")

model.push_to_hub_merged("model", tokenizer, save_method = "merged_4bit_forced", token = "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")

Unsloth: Merging 4bit and LoRA weights to 4bit...
This might take 5 minutes...




Done.
Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 10 minutes for Llama-7b... Done.
Unsloth: Merging 4bit and LoRA weights to 4bit...
This might take 5 minutes...
Done.
Unsloth: Saving 4bit Bitsandbytes model. Please wait...


README.md:   0%|          | 0.00/582 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/2.07G [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


Saved merged_4bit model to https://huggingface.co/model


In [48]:
# Save just LoRA adapters

model.save_pretrained_merged("model", tokenizer, save_method = "lora",token = "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")

model.push_to_hub_merged("model", tokenizer, save_method = "lora", token = "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")

Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... Done.
Unsloth: Saving LoRA adapters. Please wait...


No files have been modified since last commit. Skipping to prevent empty commit.


Saved lora model to https://huggingface.co/model


# i couldnot store gguf becouse the storage of kaggle but i was stored old version of the model(not the best):
https://huggingface.co/RanaHossny213/gamma-ft-gguf