# Set up

In [1]:
!pip install accelerate==0.29.3 bitsandbytes==0.43.1





In [3]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
import accelerate
import bitsandbytes

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# Check GPU usage:
print("free(Gb):", torch.cuda.mem_get_info()[0]/1000000000, "total(Gb):", torch.cuda.mem_get_info()[1]/1000000000)


free(Gb): 10.708058112 total(Gb): 11.81089792


# Load Model and Tokenizer
- We can write a wrapper function to load and generate text
- load_in_8bit = True would speed up the inference significantly!

In [6]:

# Get token from your huggingface page
token = "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"
llama = "meta-llama/Llama-2-7b-hf"
load_in_8bit = True

In [7]:
tokenizer = AutoTokenizer.from_pretrained(llama,
                                          use_auth_token=token)

model = AutoModelForCausalLM.from_pretrained(
    llama,
    use_auth_token=token,
    device_map='auto',
    load_in_8bit=load_in_8bit,
)



The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
# Check gpu usage
print("free(Gb):", torch.cuda.mem_get_info()[0]/1000000000, "total(Gb):", torch.cuda.mem_get_info()[1]/1000000000)



free(Gb): 3.1981568 total(Gb): 11.81089792


# Prompt Overview

- A prompt contains any of the following elements:

    - Instruction - a specific task or instruction you want the model to perform

    - Context - external information or additional context that can steer the model to better responses

    - Input Data - the input or question that we are interested to find a response for

    - Output Indicator - the type or format of the output.



In [32]:
# Example not following the prompt elements!
prompt = """What is the sentiment of:
Hi Amit, thanks for the thoughtful birthday card!"""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> What is the sentiment of:
Hi Amit, thanks for the thoughtful birthday card!
Hi Amit, thanks for the birthday card!
Hi Amit, thanks for the birthday card.
Hi Amit, thanks for the birthday card.
Hi Amit, thanks for the birthday card.
Hi Amit, thanks for the birthday card.
Hi Amit, thanks for the birthday card.
Hi Amit, thanks for the birthday card.
Hi Amit, thanks for the birthday card.
Hi Amit


In [31]:
# Example following the prompt elements!

prompt = """Classify the text into neutral, negative or positive:
Text: Hi Amit, thanks for the thoughtful birthday card!
Sentiment:"""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> Classify the text into neutral, negative or positive:
Text: Hi Amit, thanks for the thoughtful birthday card!
Sentiment: Neutral
Text: This is the worst birthday card ever!
Sentiment: Negative
Text: Happy birthday! I hope you have a great day!
Text: I hope you have a great birthday!
Text: Happy birthday!
Text: I hope you have a great birthday!
Text: Happy birthday! I hope you have a great day!
Text: Happy birthday! I hope you have a great day!
Text


In [33]:
# Example following the prompt elements!

prompt = """Classify the text into neutral, negative or positive:
Text: Hi Amit, thanks for the thoughtful birthday card!
Sentiment: """
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> Classify the text into neutral, negative or positive:
Text: Hi Amit, thanks for the thoughtful birthday card!
Sentiment: 0.00%
Sentiment: 0.00%
Text: Hi Amit, thanks for the thoughtful birthday card!
Sentiment: 0.00%
Sentiment: 0.00%
Text: Hi Amit, thanks for the thoughtful birthday card!
Sentiment: 0.00%
Sentiment: 0.00%
Text: Hi Amit, thanks


### One-shot Prompting


In [None]:
# Example following the prompt elements!

prompt = """Classify the text into neutral, negative or positive:
Text: Hi Amit, thanks for the thoughtful birthday card!
Sentiment:"""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

### Few-shot Prompting
- 0x0A = new line token

In [22]:
prompt = """Message: Hi Dad, you're 20 minutes late to my piano recital!
Sentiment: Negative

Message: Can't wait to order pizza for dinner tonight
Sentiment: Positive

Message: Hi Amit, thanks for the thoughtful birthday card!
Sentiment:"""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> Message: Hi Dad, you're 20 minutes late to my piano recital!
Sentiment: Negative

Message: Can't wait to order pizza for dinner tonight
Sentiment: Positive

Message: Hi Amit, thanks for the thoughtful birthday card!
Sentiment: Positive

Message: I'm going to the beach for my vacation this weekend!
Sentiment: Positive

Message: I'm feeling a little sick today.
Sentiment: Negative

Message: I'm going to the beach for my vacation this weekend!
Sentiment: Positive

Message: I'm feeling a little sick today.
Sentiment: Negative

Message: I'


In [23]:
print(tokenizer.tokenize(prompt))

['▁Message', ':', '▁Hi', '▁D', 'ad', ',', '▁you', "'", 're', '▁', '2', '0', '▁minutes', '▁late', '▁to', '▁my', '▁piano', '▁rec', 'ital', '!', '<0x0A>', 'S', 'ent', 'iment', ':', '▁Neg', 'ative', '<0x0A>', '<0x0A>', 'Message', ':', '▁Can', "'", 't', '▁wait', '▁to', '▁order', '▁p', 'izza', '▁for', '▁dinner', '▁ton', 'ight', '<0x0A>', 'S', 'ent', 'iment', ':', '▁Pos', 'itive', '<0x0A>', '<0x0A>', 'Message', ':', '▁Hi', '▁Am', 'it', ',', '▁thanks', '▁for', '▁the', '▁thought', 'ful', '▁birth', 'day', '▁card', '!', '<0x0A>', 'S', 'ent', 'iment', ':']


### Notice,new line at the begining and end does not work

In [None]:
prompt = """ 
Message: Hi Dad, you're 20 minutes late to my piano recital!
Sentiment: Negative

Message: Can't wait to order pizza for dinner tonight
Sentiment: Positive

Message: Hi Amit, thanks for the thoughtful birthday card!
Sentiment:
""" 
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"],
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0]) print(response)

### Notice, there should be no new lien at the end (at the begining is OK)
### This is becuase we want to keep the sample format as the previous two demonstrations

In [28]:
prompt = """
Message: Hi Dad, you're 20 minutes late to my piano recital!
Sentiment: Negative

Message: Can't wait to order pizza for dinner tonight
Sentiment: Positive

Message: Hi Amit, thanks for the thoughtful birthday card!
Sentiment:"""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
Message: Hi Dad, you're 20 minutes late to my piano recital!
Sentiment: Negative

Message: Can't wait to order pizza for dinner tonight
Sentiment: Positive

Message: Hi Amit, thanks for the thoughtful birthday card!
Sentiment: Positive

Message: You are my hero!
Sentiment: Positive

Message: I'm so proud of you!
Sentiment: Positive

Message: I'm so sorry about your grandmother's passing.
Sentiment: Negative

Message: I'm so sorry about your grandmother's passing.
Sentiment: Negative

Message: I'm so sorry about your grandm


In [30]:
print(tokenizer.tokenize(prompt))


['▁', '<0x0A>', 'Message', ':', '▁Hi', '▁D', 'ad', ',', '▁you', "'", 're', '▁', '2', '0', '▁minutes', '▁late', '▁to', '▁my', '▁piano', '▁rec', 'ital', '!', '<0x0A>', 'S', 'ent', 'iment', ':', '▁Neg', 'ative', '<0x0A>', '<0x0A>', 'Message', ':', '▁Can', "'", 't', '▁wait', '▁to', '▁order', '▁p', 'izza', '▁for', '▁dinner', '▁ton', 'ight', '<0x0A>', 'S', 'ent', 'iment', ':', '▁Pos', 'itive', '<0x0A>', '<0x0A>', 'Message', ':', '▁Hi', '▁Am', 'it', ',', '▁thanks', '▁for', '▁the', '▁thought', 'ful', '▁birth', 'day', '▁card', '!', '<0x0A>', 'S', 'ent', 'iment', ':']


## Chain of Thought (CoT)

Introduced in Wei et al. (2022), chain-of-thought (CoT) prompting enables complex reasoning capabilities through intermediate reasoning steps. You can combine it with few-shot prompting to get better results on more complex tasks that require reasoning before responding.

![image.png](https://www.promptingguide.ai/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fcot.1933d9fe.png&w=1920&q=75)


In [43]:
prompt = """
Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls.How many tennis balls does he have now? 
A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.

Q: The cafeteria had 23 apples. If they used 20 to make lunch and bought 6 more, how many apples do they have?
A:"""

### Few Shot CoT

#### NOTICE! The resoning is incorrect!

In [40]:
prompt = """
The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1.
A: Adding all the odd numbers (9, 15, 1) gives 25. The answer is False.

The odd numbers in this group add up to an even number: 17,  10, 19, 4, 8, 12, 24.
A: Adding all the odd numbers (17, 19) gives 36. The answer is True.

The odd numbers in this group add up to an even number: 16,  11, 14, 4, 8, 13, 24.
A: Adding all the odd numbers (11, 13) gives 24. The answer is True.

The odd numbers in this group add up to an even number: 17,  9, 10, 12, 13, 4, 2.
A: Adding all the odd numbers (17, 9, 13) gives 39. The answer is False.

The odd numbers in this group add up to an even number: 15, 32, 5, 13, 82, 7, 1. 
A:"""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1.
A: Adding all the odd numbers (9, 15, 1) gives 25. The answer is False.

The odd numbers in this group add up to an even number: 17,  10, 19, 4, 8, 12, 24.
A: Adding all the odd numbers (17, 19) gives 36. The answer is True.

The odd numbers in this group add up to an even number: 16,  11, 14, 4, 8, 13, 24.
A: Adding all the odd numbers (11, 13) gives 24. The answer is True.

The odd numbers in this group add up to an even number: 17,  9, 10, 12, 13, 4, 2.
A: Adding all the odd numbers (17, 9, 13) gives 39. The answer is False.

The odd numbers in this group add up to an even number: 15, 32, 5, 13, 82, 7, 1. 
A: Adding all the odd numbers (15, 32) gives 47. The answer is True.

The odd numbers in this group add up to an even number: 1, 3, 5, 7, 9, 11, 13, 15, 17.
A: Adding all the odd numbers (1, 3, 5, 7, 9, 11, 13,


### One Shot CoT
- Notice, in the original paper, even one-shot CoT worked!
    - Keep in mind that the authors claim that this is an **emergent** ability that arises with **sufficiently large language models**.
- Notice, the reasoning is incorrect
	- it seems like the mode ==does not tell the difference between even and odd number==
		- missed some odd number
		- incorrectly identify 32 as odd number
		- ==the addition is correct==, the sum of 15, 32, 5, 13, 82, 7, 1 is indeed 155

In [41]:
prompt = """
The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1.
A: Adding all the odd numbers (9, 15, 1) gives 25. The answer is False.

The odd numbers in this group add up to an even number: 15, 32, 5, 13, 82, 7, 1. 
A:"""

inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1.
A: Adding all the odd numbers (9, 15, 1) gives 25. The answer is False.

The odd numbers in this group add up to an even number: 15, 32, 5, 13, 82, 7, 1. 
A: Adding all the odd numbers (15, 32, 5, 13, 82, 7, 1) gives 155. The answer is True.

The odd numbers in this group add up to an even number: 15, 32, 5, 13, 82, 7, 1. 
A: Adding all the odd numbers (15, 32, 5


#### Exmaple from Wei et al. 2022

In [50]:
prompt = """
Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now? 
A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
Q: The cafeteria had 23 apples. If they used 20 to make lunch and bought 6 more. How many apples do they have?
A:"""

inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now? 
A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
Q: The cafeteria had 23 apples. If they used 20 to make lunch and bought 6 more. How many apples do they have?
A: 23 apples - 20 apples = 3 apples. 23 - 20 = 3. The answer is 3.
Q: 60000000000000000000000000000000000000000000000000000000000000


### Zero Shot CoT

One recent idea that came out more recently is the idea of zero-shot CoT (Kojima et al. 2022) that essentially involves adding **"Let's think step by step"** to the original prompt. Let's try a simple problem and see how the model performs:

![image.png](https://www.promptingguide.ai/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fzero-cot.79793bee.png&w=1920&q=75)

#### Without CoT

In [52]:
prompt = """
I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples and ate 1.
How many apples did I remain with?"""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples and ate 1.
How many apples did I remain with?

### Solution

The solution to the puzzle is 10.

## Proof

We note that the puzzle asks for the number of apples we remain with.

Let $x$ be the number of apples we started with.

Let $y$ be the number of apples we have left with.

Let $z$ be the number of apples we have given away.

We note that:

$\


### With CoT


In [53]:
prompt = """
I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples and ate 1. How many apples did I remain with?
Let's think step by step."""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples and ate 1. How many apples did I remain with?
Let's think step by step.
 
**Step 1:** I bought 10 apples.
 
**Step 2:** I gave 2 apples to the neighbor and 2 to the repairman.
 
**Step 3:** I then went and bought 5 more apples and ate 1.

So, I remained with 5 apples.


### Note:

This question is asked in the interview of some companies like


In [54]:
prompt = """
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples and ate 1. How many apples did I remain with?
A: Let's think step by step."""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples and ate 1. How many apples did I remain with?
A: Let's think step by step.

- 10 apples
- 2 apples to the neighbor and 2 to the repairman
- 5 more apples and ate 1.

10 - 2 - 2 = 6

6 - 5 - 1 = 4

4 - 1 = 3

3 - 1 = 2

2 - 1 = 1

1 - 1 = 0




In [58]:
prompt = """
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples. How many apples did I remain with?
A: Let's think step by step."""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples. How many apples did I remain with?
A: Let's think step by step.

**Step 1.** 10 apples - 2 apples = 8 apples

**Step 2.** 8 apples - 2 apples = 6 apples

**Step 3.** 6 apples - 2 apples = 4 apples

**Step 4.** 4 apples - 2 apples = 2 apples

**Step 5.** 2 app


In [55]:
prompt = """
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples. How many apples did I remain with?
A: Let's think step by step."""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman.I then went and bought 5 more apples. How many apples did I remain with?
A: Let's think step by step.

1. I bought 10 apples.
2. I gave 2 apples to the neighbor and 2 to the repairman.
3. I then went and bought 5 more apples.

The total number of apples I bought is 10+5=15.

The number of apples I have now is 15-2-2=11.




</s>


In [61]:
prompt = """
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples. How many apples did I remain with?
A: Let's think step by step."""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples. How many apples did I remain with?
A: Let's think step by step.

1. I bought 10 apples.
2. I gave 2 apples to the neighbor and 2 to the repairman.
3. I then went and bought 5 more apples.

I have 5 apples left.

# 숫자 감소

# 문제 출처

[네이버 �������


In [59]:
prompt = """
Explain each intermediate step.Only when you are done with all your steps, provide the answer based on your intermediate steps.
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples. How many apples did I remain with?
A: Let's think step by step."""
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to(model.device)
outputs = model.generate(input_ids=inputs["input_ids"], 
                         attention_mask=inputs["attention_mask"],
                         max_new_tokens=100)
response = tokenizer.decode(outputs[0])
print(response)

<s> 
Explain each intermediate step.Only when you are done with all your steps, provide the answer based on your intermediate steps.
Q: I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples. How many apples did I remain with?
A: Let's think step by step.
I bought 10 apples.
I gave 2 apples to the neighbor and 2 to the repairman.
I then went and bought 5 more apples.
So now I have 10 + 2 + 2 + 5 = 19 apples.
Now, I have 19 apples and I gave 2 apples to the neighbor and 2 to the repairman.
So now I have 19 -
