In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, GenerationConfig
import sys
import os
import torch
sys.path.append(os.path.join(os.getcwd(), "peft/src/"))
from peft import PeftModel, PeftConfig

def get_huggingface_token(config_file_path:str='config.yaml'):
    import yaml
    token = None
    with open(config_file_path, "r") as config_file:
        config = yaml.safe_load(config_file)
        token = config["huggingface"]["token"]
    return token

#model_name = 'meta-llama/Meta-Llama-3-8B-Instruct'
#lora_weights = "yspkm/Meta-Llama-3-8B-Instruct-lora-math"
#model_name = 'google/gemma-2-9b-it'
#model_name = 'mistralai/Mistral-7B-Instruct-v0.3'
#lora_weights = "yspkm/Mistral-7B-Instruct-v0.3-lora-math"
model_name = 'google/gemma-2b-it'
lora_weights = "yspkm/gemma-2b-it-lora-math"
#lora_weights = 'trained_models/mistral/math'
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #load_in_8bit=True,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
    token=get_huggingface_token()
) 
tokenizer = AutoTokenizer.from_pretrained(model_name)
peft_model = PeftModel.from_pretrained(
    model,
    lora_weights,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
)
#peft_model = torch.compile(peft_model)

#base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3", device_map='auto')

#config = PeftConfig.from_pretrained("yspkm/Mistral-7B-Instruct-v0.3-lora-math")
#model = PeftModel.from_pretrained(base_model, config)
#model = PeftModel.from_pretrained(base_model, "yspkm/Mistral-7B-Instruct-v0.3-lora-math")

'''
generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.75,
    top_k=40,
    do_sample=True,
    num_beams=5)
'''
#text_generator = pipeline('text-generation', model=peft_model, tokenizer=tokenizer, max_new_tokens=256, device_map='auto', config=generation_config)
text_generator = pipeline('text-generation', model=peft_model, tokenizer=tokenizer, max_new_tokens=256, device_map='auto')

In [None]:
peft_model.print_trainable_parameters()

In [None]:
#prompt = "Please choose the correct solution to the question: dresser\n\nSolution1: replace drawer with bobby pin \n\nSolution2: finish, woodgrain with  bobby pin \n\nAnswer format: solution1/solution2"
#instruction = "Jack received 9 emails in the morning, 10 emails in the afternoon and 7 emails in the evening. How many more emails did Jack receive in the morning than in the evening?"
instruction = "Zach wants to ride the Ferris wheel , the roller coaster , and the log ride . The Ferris wheel costs 2 tickets , the roller coaster costs 7 tickets and the log ride costs 1 ticket . Zach has 1 ticket . How many more tickets should Zach buy ?"
#instruction = "Paige had 27 files on her computer. She deleted 9 of them and put the rest into folders with 6 files in each one. How many folders did Paige end up with?"
prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.  

                ### Instruction:
                {instruction}
                
                ### Response:""" 
generated_text = text_generator(prompt)[0]['generated_text']
print(generated_text)

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

import sys
import os
sys.path.append(os.path.join(os.getcwd(), "peft/src/"))
from peft import PeftModel, PeftConfig

model_name = 'google/gemma-2-9b-it'
lora_weights = "yspkm/gemma-2-9b-it-lora-math"
def get_huggingface_token(config_file_path:str='config.yaml'):
    import yaml
    token = None
    with open(config_file_path, "r") as config_file:
        config = yaml.safe_load(config_file)
        token = config["huggingface"]["token"]
    return token

HF_TOKEN = get_huggingface_token()

import re

# 모델 이름 설정 (예: bert-base-uncased)

# Hugging Face 모델 로드
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
    token=HF_TOKEN
) 
tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
peft_model = PeftModel.from_pretrained(
    model,
    lora_weights,
    torch_dtype=torch.float16,
    #merge_weights=True,
    device_map="auto",
    trust_remote_code=True,
)
#peft_model.eval()
#peft_model = peft_model.compile()
#text_generator = pipeline('text-generation', model=peft_model, tokenizer=tokenizer, max_new_tokens=256, device_map='auto')
def get_prompt(instruction:str):
    return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.  

                ### Instruction:
                {instruction}
                
                ### Response:""" 

def generate_text(instruction: str, text_generator):
    prompt = get_prompt(instruction)
    print(f"Prompt: {prompt}\n\n")
    generated_text = text_generator(prompt)[0]['generated_text']
    print(generated_text)
    return prompt, generated_text 

In [None]:
peft_model = peft_model.merge_and_unload()

In [None]:
from torchinfo import summary
summary(peft_model, depth=100)

In [None]:
instruction = "Zach wants to ride the Ferris wheel , the roller coaster , and the log ride . The Ferris wheel costs 2 tickets , the roller coaster costs 7 tickets and the log ride costs 1 ticket . Zach has 1 ticket . How many more tickets should Zach buy ?"
%time generate_text(instruction, text_generator)