In [1]:
import torch
from datasets import Dataset, load_dataset
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,AutoConfig,AutoModel
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from datasets import Dataset, DatasetDict
import pandas as pd
from datasets import load_dataset

data = pd.read_csv("open/train.csv",encoding='utf-8')
selected_columns = data.loc[:, ['Question', 'Answer']]
tds = Dataset.from_pandas(selected_columns)
ds = DatasetDict()
ds['train'] = tds

In [3]:
def create_text_column(example):
    # 'text' 컬럼 생성
    text = f"### Instruction:\n{example['Question']}\n\n### Response:\n{example['Answer']}"
    example["text"] = text
    return example

# 'text' 컬럼 생성
datasethk = ds.map(create_text_column)

Map: 100%|██████████| 496/496 [00:00<00:00, 15993.04 examples/s]


In [4]:
datasethk['train']['text'][:2]

['### Instruction:\n2024년 중앙정부 재정체계는 어떻게 구성되어 있나요?\n\n### Response:\n2024년 중앙정부 재정체계는 예산(일반·특별회계)과 기금으로 구분되며, 2024년 기준으로 일반회계 1개, 특별회계 21개, 기금 68개로 구성되어 있습니다.',
 '### Instruction:\n2024년 중앙정부의 예산 지출은 어떻게 구성되어 있나요?\n\n### Response:\n2024년 중앙정부의 예산 지출은 일반회계 356.5조원, 21개 특별회계 81.7조원으로 구성되어 있습니다.']

In [5]:
## set base model
base_model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
#new_model = "/content/gdrive/MyDrive/Colab Notebooks/llama/02. Fine Tuning/llama3_meta_hkcode_0602"

### Load basemodel
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)
baseModel = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
    quantization_config=bnb_config
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:25<00:00,  6.37s/it]


In [6]:
### Load basemodel's tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
# Must add EOS_TOKEN at response last line
tokenizer.pad_token = tokenizer.eos_token
EOS_TOKEN = tokenizer.eos_token
def prompt_eos(sample):
    sample['text'] = sample['text']+EOS_TOKEN
    return sample
datasethk = datasethk.map(prompt_eos)


Map: 100%|██████████| 496/496 [00:00<00:00, 14217.59 examples/s]


In [7]:
lora_config = LoraConfig(
    r=16,
    lora_alpha = 32,
    lora_dropout = 0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",], 
    task_type="CAUSAL_LM",
)

trainer = SFTTrainer(
    model=base_model,
    train_dataset=datasethk['train'],
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    packing=False,
    peft_config=lora_config,
    args=TrainingArguments(
        output_dir="outputs",
        num_train_epochs = 2,
        max_steps=1000,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        optim="paged_adamw_8bit",
        warmup_ratio=0.03,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=100,
    )
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
Loading checkpoint shards: 100%|██████████| 4/4 [00:14<00:00,  3.52s/it]
Map: 100%|██████████| 496/496 [00:00<00:00, 17656.93 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
max_steps is given, it will override any value given in num_train_epochs


In [8]:
trainer.train()


  attn_output = torch.nn.functional.scaled_dot_product_attention(
  3%|▎         | 33/1000 [10:04<6:13:21, 23.17s/it]

KeyboardInterrupt: 

In [9]:
ADAPTER_MODEL = "llama3_lora"

trainer.model.save_pretrained(ADAPTER_MODEL)

new_basemodel  = AutoModelForCausalLM.from_pretrained(base_model, device_map='auto', torch_dtype=torch.float16)
loramodel  = PeftModel.from_pretrained(new_basemodel, ADAPTER_MODEL, device_map='auto', torch_dtype=torch.float16)
mergedModel = loramodel.merge_and_unload()

Loading checkpoint shards: 100%|██████████| 4/4 [00:31<00:00,  7.84s/it]


In [10]:
import huggingface_hub
huggingface_hub.login("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")
mergedModel.push_to_hub("chan1121/dacon4000")
tokenizer.push_to_hub("chan1121/dacon4000", use_temp_dir=True)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\HANMAC\.cache\huggingface\token
Login successful


model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]
[A

[A[A


[A[A[A
[A


model-00001-of-00004.safetensors:   0%|          | 1.90M/4.98G [00:00<04:49, 17.2MB/s]


model-00001-of-00004.safetensors:   0%|          | 4.69M/4.98G [00:00<04:19, 19.2MB/s]


[A[A[A
model-00001-of-00004.safetensors:   0%|          | 6.59M/4.98G [00:00<15:16, 5.42MB/s]
model-00001-of-00004.safetensors:   0%|          | 8.29M/4.98G [00:01<12:01, 6.89MB/s]


model-00001-of-00004.safetensors:   0%|          | 11.5M/4.98G [00:01<07:37, 10.8MB/s]
model-00001-of-00004.safetensors:   0%|          | 14.5M/4.98G [00:01<05:49, 14.2MB/s]
[A


model-00001-of-00004.safetensors:   0%|          | 16.7M/4.98G [00:01<10:02, 8.23MB/s]
[A


model-00001-of-00004.safetensors:   0%|          | 19.1M/4.98G [00:01<08:09, 10.1MB/s]
[A


model-00001-of-00004.safetensors:   0%|          | 21.9M/4.98G [00:02<06:42, 12.3MB/s]
model-00001-of-00004.safetensors:   0%|          | 23.8M/4.98G [00:02<06:08

CommitInfo(commit_url='https://huggingface.co/chan1121/dacon4000/commit/d9d86b3e5a594a86d1ab38bd70065eb60e2cc6b7', commit_message='Upload tokenizer', commit_description='', oid='d9d86b3e5a594a86d1ab38bd70065eb60e2cc6b7', pr_url=None, pr_revision=None, pr_num=None)

In [3]:
from langchain.llms import HuggingFacePipeline

def setup_llm_pipeline():
    # 4비트 양자화 설정
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    # 모델 ID  chan1121/dacon-9b rtzr/ko-gemma-2-9b-it
    tok = "chan1121/dacon4000"
    model_id = "chan1121/dacon4000"  
    # 토크나이저 로드 및 설정
    tokenizer = AutoTokenizer.from_pretrained(tok)
    tokenizer.use_default_system_prompt = False
    pad_token_id = tokenizer.eos_token_id
    # 모델 로드 및 양자화 설정 적용
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
        do_sample=True)

    # HuggingFacePipeline 객체 생성
    text_generation_pipeline = pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        temperature=0.2,
        return_full_text=False,
        max_new_tokens=400,
        pad_token_id=tokenizer.eos_token_id,
    )

    hf = HuggingFacePipeline(pipeline=text_generation_pipeline)

    return hf

# LLM 파이프라인
llm = setup_llm_pipeline()

Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.54s/it]
  warn_deprecated(


In [2]:
from langchain.llms import HuggingFacePipeline

bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    # 모델 ID  chan1121/dacon-9b rtzr/ko-gemma-2-9b-it
tok = "chan1121/dacon4000"
model_id = "chan1121/dacon4000"  
    # 토크나이저 로드 및 설정
tokenizer = AutoTokenizer.from_pretrained(tok)
tokenizer.use_default_system_prompt = False
pad_token_id = tokenizer.eos_token_id
terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    # 모델 로드 및 양자화 설정 적용
model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
        do_sample=True)

    # HuggingFacePipeline 객체 생성
text_generation_pipeline = pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        temperature=0.2,
        return_full_text=False,
        max_new_tokens=400,
        #pad_token_id=tokenizer.eos_token_id,
        eos_token_id = terminators,
    )

hf = HuggingFacePipeline(pipeline=text_generation_pipeline)


Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.53s/it]
  warn_deprecated(


In [12]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate

template = """
    다음 정보를 바탕으로 질문에 답하세요:
    {context}

    ### 질문:
    {question}
    
    질문의 핵심만 파악하여 간결하게 1-2문장으로 답변하고, 불필요한 설명은 피하며 요구된 정보만 제공하세요.
    
    ### 답변:

    <|eot_id|>
    """
    
prompt = PromptTemplate.from_template(template) 
hf.invoke(template)

' 한국의 수도는 서울입니다.'