In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    import torch; v = re.match(r"[0-9\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2

In [None]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name())
print(torch.cuda.get_device_capability())

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)


In [None]:
# Parameter-Efficient Fine-Tuning (PEFT)
# LoRA (Low-Rank Adaptation)
model = FastLanguageModel.get_peft_model(
    model,    # pre-trained model
    r = 16,
    # q_proj, k_proj, v_proj: Các lớp tạo Query, Key, Value trong cơ chế Attention.
    # o_proj: Lớp chiếu đầu ra của Attention.
    # gate_proj, up_proj, down_proj: Các lớp trong mạng Feed-Forward (FFN)
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    # Các ma trận Low-Rank (A.B) được nhân với lora_alpha/r để điều chỉnh mức độ ảnh hưởng của chúng lên trọng số gốc.
    lora_alpha = 32,

    # Dropout là kỹ thuật regularization, ngẫu nhiên bỏ qua một tỷ lệ đơn vị để tránh overfitting.
    lora_dropout = 0,

    # Quy định cách xử lý bias (độ lệch) trong các lớp LoRA.
    bias = "none",

    # Kích hoạt gradient checkpointing để tiết kiệm bộ nhớ khi huấn luyện.
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,

    # RS-LoRA là một biến thể của LoRA, cải thiện tính ổn định khi dùng rank cao.
    # Nếu bạn tăng r và gặp vấn đề ổn định, có thể thử bật True.
    use_rslora = False,

    # LoFTQ kết hợp quantization với LoRA để nén mô hình thêm.
    loftq_config = None,
)

In [None]:
chat_prompt = """### Instruction
{}

### Input
{}

### Response
{}"""

In [None]:
EOS_TOKEN = tokenizer.eos_token    # Must add EOS_TOKEN

def formatting_prompts_func(examples):
    instruction = ""
    inputs = examples["question"]
    outputs = examples["answer"]
    texts = []

    for input, output in zip(inputs, outputs):
        text = chat_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)

    return { "text": texts, }
pass

In [None]:
from datasets import load_dataset

dataset_train = load_dataset("tyanfarm/hotels-questions-answers-mixed", split = "train")
# dataset_val = load_dataset("tyanfarm/hotels-questions-answers-mixed", split = "validation")
dataset_train = dataset_train.map(formatting_prompts_func, batched = True,)
# dataset_val = dataset_val.map(formatting_prompts_func, batched = True,)

In [None]:
# dataset[len(dataset)-5:len(dataset)]

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_train,
    # eval_dataset = dataset_val,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = True,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        #max_steps = 120,
        num_train_epochs = 4,
        learning_rate = 2e-4,
        #fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "### Instruction\n\n\n### Input\n",
    response_part = "### Response\n",
)

In [None]:
import wandb
wandb.login(key="")

In [None]:
trainer_stats = trainer.train()

In [None]:
FastLanguageModel.for_inference(model) # For faster Inference

inputs = tokenizer(
[
    chat_prompt.format(
        "", # instruction - leave this blank!
        "Norfolk Saigon Hotel có phòng Superior không?", # input
        "", # output - leave this blank!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(
    **inputs, 
    max_new_tokens=512, 
    temperature=0.1,
    top_k=10,
    top_p=0.95)
decoded_output = tokenizer.batch_decode(outputs)[0]
tokenizer.batch_decode(outputs)

In [None]:
from huggingface_hub import login

login(token="")

In [None]:
from huggingface_hub import HfApi

model_name = "tyanfarm/llama3-8b-hotels-information-mixed-finetuned"
api = HfApi()
api.create_repo(repo_id=model_name, repo_type="model", exist_ok=True)

In [None]:
model.push_to_hub(model_name)
tokenizer.push_to_hub(model_name)

In [None]:
!pip install flask python-telegram-bot

In [None]:
import requests

TOKEN = ""
CHAT_ID = ""


def send_message(text: str):
    url = f"https://api.telegram.org/bot{TOKEN}/sendMessage"
    payload = {
        "chat_id": CHAT_ID,
        "text": text,
        "parse_mode": "MarkdownV2"  # optional: 'MarkdownV2' or 'HTML'
    }
    response = requests.post(url, json=payload)
    return response.json()

text = """
Training done \!
```kaggle-finetuning
tyanfarm/llama3-8b-hotels-information-mixed-finetuned
```
"""

send_message(text)