In [None]:
!pip install bitsandbytes==0.43.1
!pip install accelerate==0.30.1
!pip install transformers==4.42.3
!pip install gradio==4.29.0

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
)

# Specify the model ID for the pre-trained model from Hugging Face
model_id = "microsoft/Phi-3-mini-4k-instruct"

# Load the tokenizer using the specified model ID
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load the model for causal language modeling using the specified model ID
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="cuda:0",
    trust_remote_code=True,
)

# Mutli Turn Prompt Engineering
- 여러 차례의 대화 턴(turn)을 통해 LLM에 정보를 제공하고,
이를 바탕으로 더욱 정교한 응답을 생성하도록 하는 Prompt Engineering 기법
- 자연스러운 대화 생성 가능 : Chatbot에서 대부분 지원되어야 함.
- 문맥 이해 강화 : 한 번의 턴 만을 고려하는 대신, 여러 턴의 대화를 고려하면 모델이 문맥을 더 잘 이해함
- 예시
    ```
    Input
    질문 : 미국 수도가 어디야??
    대답 : 미국의 수도는 워싱턴 D.C. 입니다.
    질문 : 그럼 한국은?

    Output
    대답 : 한국의 수도는 서울입니다.
    ```

In [None]:
# Define the prompt
messages = [
    {"role": "user", "content": "HI What's your name?"},
    {"role": "assistant", "content": "My name is joonhyung kim"},
    {"role": "user", "content": "Would you like to say it again? What's your name?"},
]

for message in messages:
    message["content"] = message["content"].replace("\n", "")

# Tokenize the prompt
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)


In [None]:
# Generate the model's output based on the input IDs
outputs = model.generate(
    input_ids,               # The input tensor containing the tokenized messages
    max_new_tokens=1024,     # Maximum number of new tokens to generate in the response
    do_sample=False,         # Disable sampling; use deterministic decoding (e.g., greedy or beam search)
    # temperature=0.3,       # Temperature for sampling; lower values make the model more deterministic
    # top_p=0.9,             # Top-p sampling; only keep the top tokens with cumulative probability <= top_p
)


# Zero-Shot Prompt Engineering

In [None]:
messages = [
    {"role": "user", "content": "Who do you like more, mom or dad?"}
]

for message in messages:
    message["content"] = message["content"].replace("\n", "")

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)


In [None]:
outputs = model.generate(
    input_ids,
    max_new_tokens=1024,
    do_sample=False,
    # temperature=0.3,
    # top_p=0.9,
)


In [None]:
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))

# Generated Knowledge Prompting Engineering
- LLM으로부터 먼저 지식을 생성하게 하도록 지시하고, 생성된 지식을 Prompt에 포함하여 답변을 생성하도록 하는 기법


## Step 1

In [None]:
messages = [
    {"role": "user", "content": "Question : Is Greece larger than mexico?"},
    {"role": "assistant", "content": "Knowledge : Greece is approximately 131,957 sq km, while Mexico is approximately 1,964,375 sq km, making Mexico 1,389% larger than Greece."},
    {"role": "user", "content": "Question : Is the Eiffel Tower taller than the Leaning Tower of Pisa?"},
    {"role": "assistant", "content": "Knowledge : The Eiffel Tower is approximately 330 meters tall, while the Leaning Tower of Pisa is approximately 56 meters tall, making the Eiffel Tower considerably taller."},
    {"role": "user", "content": "Question : Is the population of Canada greater than Australia?"},
    {"role": "assistant", "content": "Knowledge : As of current data, the population of Canada is approximately 37 million, while the population of Australia is approximately 25 million, making Canada's population greater than Australia's."},
    {"role": "user", "content": "Question : Can you explain about Mother?"}
]

for message in messages:
    message["content"] = message["content"].replace("\n", "")

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)


In [None]:
outputs = model.generate(
    input_ids,
    max_new_tokens=1024,
    do_sample=False,
    # temperature=0.3,
    # top_p=0.9,
)


In [None]:
response = outputs[0][input_ids.shape[-1]:]
Knowledge_1 = tokenizer.decode(response, skip_special_tokens=True)
print(Knowledge_1)

In [None]:
messages = [
    {"role": "user", "content": "Question : Is Greece larger than mexico?"},
    {"role": "assistant", "content": "Knowledge : Greece is approximately 131,957 sq km, while Mexico is approximately 1,964,375 sq km, making Mexico 1,389% larger than Greece."},
    {"role": "user", "content": "Question : Is the Eiffel Tower taller than the Leaning Tower of Pisa?"},
    {"role": "assistant", "content": "Knowledge : The Eiffel Tower is approximately 330 meters tall, while the Leaning Tower of Pisa is approximately 56 meters tall, making the Eiffel Tower considerably taller."},
    {"role": "user", "content": "Question : Is the population of Canada greater than Australia?"},
    {"role": "assistant", "content": "Knowledge : As of current data, the population of Canada is approximately 37 million, while the population of Australia is approximately 25 million, making Canada's population greater than Australia's."},
    {"role": "user", "content": "Question : Can you explain about Father?"}
]

for message in messages:
    message["content"] = message["content"].replace("\n", "")

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)


In [None]:
outputs = model.generate(
    input_ids,
    max_new_tokens=1024,
    do_sample=False,
    # temperature=0.3,
    # top_p=0.9,
)


In [None]:
response = outputs[0][input_ids.shape[-1]:]
Knowledge_2 = tokenizer.decode(response, skip_special_tokens=True)
print(Knowledge_2)

## Step2
- Step1 응답 결과를 프롬프트에 다시 넣음

In [None]:
messages = [
    {"role": "user", "content": f"""
    {Knowledge_1}{Knowledge_2}
    Question : Given the Knowledge, Who do you like more, mom or dad?
    """}
]

for message in messages:
    message["content"] = message["content"].replace("\n", "")

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)


In [None]:
outputs = model.generate(
    input_ids,
    max_new_tokens=1024,
    do_sample=False,
    # temperature=0.3,
    # top_p=0.9,
)


In [None]:
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))