In [1]:
!pip install groq python-dotenv numpy tqdm datasets
!pip install ipywidgets




In [2]:
from groq import Groq
from dotenv import load_dotenv
from datasets import load_dataset

import os
from tqdm import tqdm
import re
import random
import pprint

from typing import List, Dict, Any

load_dotenv()
random.seed(0)

client = Groq()
gsm8k_dataset = load_dataset("gsm8k", "main")

gsm8k_train = gsm8k_dataset["train"]
gsm8k_test  = gsm8k_dataset["test"]



In [5]:
def generate_response_using_Llama(
        prompt: str,
        model: str = "llama-3.1-8b-instant"
    ):
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant that solves math problems."
                },
                {
                    "role": "user", 
                    "content": prompt
                }
            ],
            model=model,
            temperature=0.3, ### 수정해도 됩니다!
            stream=False
        )
        return chat_completion.choices[0].message.content
    
    except Exception as e:
        print(f"API call error: {str(e)}")
        return None

#### 응답 잘 나오는지 확인해보기

In [6]:
response = generate_response_using_Llama(
    prompt="Hello world!",
)
print(response)

Hello world! I'm here to help with any math problems you might have. What's on your mind? Do you have a specific problem you'd like me to solve, or would you like some help with a particular math concept?


#### GSM8K 데이터셋 확인해보기

In [7]:
print("[Question]")
for l in gsm8k_test['question'][0].split("."):
    print(l)
print("="*100)
print("[Answer]")
print(gsm8k_test['answer'][0])

[Question]
Janet’s ducks lay 16 eggs per day
 She eats three for breakfast every morning and bakes muffins for her friends every day with four
 She sells the remainder at the farmers' market daily for $2 per fresh duck egg
 How much in dollars does she make every day at the farmers' market?
[Answer]
Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.
She makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.
#### 18


#### Util 함수들
- extract_final_answer: LLM의 응답을 parse하여 최종 결과만 추출 (정답과 비교하기 위해)
- run_benchmark_test: 벤치마크 테스트
- save_final_result: 결과물 제출을 위한 함수

In [8]:
### 수정해도 됩니다!
def extract_final_answer(response: str):
    regex = r"(?:Answer:|Model response:)\s*\$?([0-9,]+)\b|([0-9,]+)\s*(meters|cups|miles|minutes)"
    matches = re.finditer(regex, response, re.MULTILINE)
    results = [match.group(1) if match.group(1) else match.group(2).replace(",", "") for match in matches]

    if len(results) == 0:
        additional_regex = r"\$?([0-9,]+)"
        additional_matches = re.finditer(additional_regex, response, re.MULTILINE)
        results.extend([match.group(1).replace(",", "") for match in additional_matches])

    return results[-1] if results else None

In [26]:
### 수정해도 됩니다!
def run_benchmark_test(
        dataset,
        prompt: str,
        model: str = "llama-3.1-8b-instant",
        num_samples: int = 50,
        VERBOSE: bool = False
    ):
    correct = 0
    total   = 0
    results = []

    for i in tqdm(range(min(num_samples, len(dataset)))):
        question = dataset[i]["question"]
        correct_answer = float(re.findall(r'\d+(?:\.\d+)?', dataset[i]["answer"].split('####')[-1])[0])

        response = generate_response_using_Llama(
            prompt=prompt.format(question=question),
            model=model
        )

        if response:
            if VERBOSE:
                print("="*50)
                print(response)
                print("="*50)
            predicted_answer_str = extract_final_answer(response)

            # 수정됨
            if predicted_answer_str:
                try:
                    predicted_answer = float(predicted_answer_str.replace(",", ""))
                except ValueError:
                    if VERBOSE:
                        print(f"Warning: Could not parse answer '{predicted_answer}'")
                    predicted_answer = None

            is_correct = False
            if predicted_answer is not None:
                diff = abs(predicted_answer - correct_answer)
                is_correct = diff < 1e-5

            if is_correct:
                correct += 1
            total += 1
            
            results.append({
                'question': question,
                'correct_answer': correct_answer,
                'predicted_answer': predicted_answer,
                'response': response,
                'correct': is_correct
            })

            if (i + 1) % 5 == 0:
                current_acc = correct/total if total > 0 else 0
                print(f"Progress: [{i+1}/{num_samples}]")
                print(f"Current Acc.: [{current_acc:.2%}]")

    return results, correct/total if total > 0 else 0

In [14]:
def save_final_result(results: List[Dict[str, Any]], accuracy: float, filename: str) -> None:
    result_str = f"====== ACCURACY: {accuracy} ======\n\n"
    result_str += f"[Details]\n"
    
    for idx, result in enumerate(results):
        result_str += f"Question {idx+1}: {result['question']}\n"
        result_str += f"Correct Answer: {result['correct_answer']}\n"
        result_str += f"Predicted Answer: {result['predicted_answer']}\n"
        result_str += f"Correct: {result['correct']}\n\n"
    
    with open(filename, "w", encoding="utf-8") as f:
        f.write(result_str)

#### Direct prompting with few-shot example

In [15]:
def construct_direct_prompt(num_examples: int = 3) -> str:
    train_dataset = gsm8k_train

    sampled_indices = random.sample(
        [i for i in range(len(train_dataset['question']))],
        num_examples
    )

    prompt = "Instruction:\nSolve the following mathematical question and generate ONLY the answer after a tag, 'Answer:' without any rationale.\n"

    for i in range(num_examples):
        cur_question = train_dataset['question'][i]
        cur_answer = train_dataset['answer'][i].split("####")[-1].strip()

        prompt += f"\n[Example {i+1}]\n"
        prompt += f"Question:\n{cur_question}\n"
        prompt += f"Answer:{cur_answer}\n"

    prompt += "\nQuestion:\n{question}\nAnswer:"

    return prompt

In [16]:
### 어떤 방식으로 저장되는지 확인해보세요!
PROMPT = construct_direct_prompt(3)
VERBOSE = False

results, accuracy = run_benchmark_test(
    dataset=gsm8k_test,
    prompt=PROMPT,
    VERBOSE=VERBOSE,
    num_samples=10
)
save_final_result(results, accuracy, "example.txt")

 50%|█████     | 5/10 [00:02<00:02,  2.15it/s]

Progress: [5/10]
Current Acc.: [80.00%]


100%|██████████| 10/10 [00:04<00:00,  2.13it/s]

Progress: [10/10]
Current Acc.: [60.00%]





In [27]:
# TODO: 0 shot, 3 shot, 5 shot direct prompting을 통해 벤치마크 테스트를 한 후, 각각 direct_prompting_{shot: int}.txt로 저장해주세요!
# 예시: shot이 5인 경우 direct_prompting_5.txt
# 항상 num_samples=50 입니다!

shots = [0, 3, 5]
    
print("\n--- Starting Direct Prompting Benchmarks ---")
for shot in shots:
    filename = f"direct_prompting_{shot}.txt"
    print(f"Processing Direct Prompting {shot}-shot...")
    results, acc = run_benchmark_test(
        dataset=gsm8k_test,
        prompt=construct_direct_prompt(shot),
        VERBOSE=False,
        num_samples=50
    )
    save_final_result(results, acc, filename)


--- Starting Direct Prompting Benchmarks ---
Processing Direct Prompting 0-shot...


 10%|█         | 5/50 [00:01<00:15,  2.82it/s]

Progress: [5/50]
Current Acc.: [80.00%]


 20%|██        | 10/50 [00:03<00:14,  2.67it/s]

Progress: [10/50]
Current Acc.: [70.00%]


 30%|███       | 15/50 [00:05<00:12,  2.70it/s]

Progress: [15/50]
Current Acc.: [73.33%]


 40%|████      | 20/50 [00:07<00:10,  2.76it/s]

Progress: [20/50]
Current Acc.: [80.00%]


 50%|█████     | 25/50 [00:09<00:09,  2.68it/s]

Progress: [25/50]
Current Acc.: [84.00%]


 60%|██████    | 30/50 [00:13<00:25,  1.25s/it]

Progress: [30/50]
Current Acc.: [83.33%]


 70%|███████   | 35/50 [00:24<00:31,  2.13s/it]

Progress: [35/50]
Current Acc.: [85.71%]


 80%|████████  | 40/50 [00:43<00:34,  3.44s/it]

Progress: [40/50]
Current Acc.: [85.00%]


 90%|█████████ | 45/50 [00:53<00:11,  2.29s/it]

Progress: [45/50]
Current Acc.: [84.44%]


100%|██████████| 50/50 [01:06<00:00,  1.33s/it]


Progress: [50/50]
Current Acc.: [86.00%]
Processing Direct Prompting 3-shot...


 10%|█         | 5/50 [00:17<02:58,  3.97s/it]

Progress: [5/50]
Current Acc.: [100.00%]


 20%|██        | 10/50 [00:36<02:35,  3.89s/it]

Progress: [10/50]
Current Acc.: [70.00%]


 30%|███       | 15/50 [00:55<02:10,  3.72s/it]

Progress: [15/50]
Current Acc.: [80.00%]


 40%|████      | 20/50 [01:13<01:48,  3.62s/it]

Progress: [20/50]
Current Acc.: [80.00%]


 50%|█████     | 25/50 [01:31<01:30,  3.62s/it]

Progress: [25/50]
Current Acc.: [80.00%]


 60%|██████    | 30/50 [01:49<01:11,  3.59s/it]

Progress: [30/50]
Current Acc.: [83.33%]


 70%|███████   | 35/50 [02:07<00:53,  3.54s/it]

Progress: [35/50]
Current Acc.: [85.71%]


 80%|████████  | 40/50 [02:25<00:36,  3.67s/it]

Progress: [40/50]
Current Acc.: [87.50%]


 90%|█████████ | 45/50 [02:45<00:19,  3.87s/it]

Progress: [45/50]
Current Acc.: [84.44%]


100%|██████████| 50/50 [03:06<00:00,  3.73s/it]


Progress: [50/50]
Current Acc.: [84.00%]
Processing Direct Prompting 5-shot...


 10%|█         | 5/50 [00:30<04:38,  6.19s/it]

Progress: [5/50]
Current Acc.: [80.00%]


 20%|██        | 10/50 [00:54<03:26,  5.16s/it]

Progress: [10/50]
Current Acc.: [70.00%]


 30%|███       | 15/50 [01:27<03:37,  6.21s/it]

Progress: [15/50]
Current Acc.: [66.67%]


 40%|████      | 20/50 [01:58<03:32,  7.09s/it]

Progress: [20/50]
Current Acc.: [70.00%]


 50%|█████     | 25/50 [02:37<02:40,  6.41s/it]

Progress: [25/50]
Current Acc.: [68.00%]


 60%|██████    | 30/50 [03:00<01:37,  4.89s/it]

Progress: [30/50]
Current Acc.: [70.00%]


 70%|███████   | 35/50 [03:22<01:09,  4.61s/it]

Progress: [35/50]
Current Acc.: [74.29%]


 80%|████████  | 40/50 [03:53<01:07,  6.79s/it]

Progress: [40/50]
Current Acc.: [77.50%]


 90%|█████████ | 45/50 [04:17<00:25,  5.16s/it]

Progress: [45/50]
Current Acc.: [75.56%]


100%|██████████| 50/50 [04:42<00:00,  5.66s/it]

Progress: [50/50]
Current Acc.: [76.00%]





### Chain-of-Thought prompting with few-shot example
```text
[Question]
Janet’s ducks lay 16 eggs per day
 She eats three for breakfast every morning and bakes muffins for her friends every day with four
 She sells the remainder at the farmers' market daily for $2 per fresh duck egg
 How much in dollars does she make every day at the farmers' market?
====================================================================================================
[Answer]
Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.
She makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.
#### 18
```

[Answer] 아래의 정답을 도출하는 과정을 예시로 달아주면 CoT의 few shot이 되겠죠?

In [28]:
def construct_CoT_prompt(num_examples: int = 3) -> str:
    train_dataset = gsm8k_train

    sampled_indices = random.sample(
        [i for i in range(len(train_dataset['question']))],
        num_examples
    )
    # CoT prompt instruction
    prompt = "Instruction:\nSolve the following mathematical problems step by step. Show your reasoning and put the final answer after '####'.\n"

    for i, idx in enumerate(sampled_indices):
        cur_question = train_dataset['question'][idx]
        cur_answer = train_dataset['answer'][idx] # Take the FULL answer including reasoning

        prompt += f"\n[Example {i+1}]\n"
        prompt += f"Question:\n{cur_question}\n"
        prompt += f"Answer:\n{cur_answer}\n"

    prompt += "\nQuestion:\n{question}\nAnswer:"

    return prompt

In [29]:
# TODO: 0 shot, 3 shot, 5 shot CoT prompting을 통해 벤치마크 테스트를 한 후, 각각 CoT_prompting_{shot: int}.txt로 저장해주세요!
# 예시: shot이 5인 경우 CoT_prompting_5.txt
# 항상 num_samples=50 입니다!

print("\n--- Starting CoT Prompting Benchmarks ---")
for shot in shots:
    filename = f"CoT_prompting_{shot}.txt"
    print(f"Processing CoT Prompting {shot}-shot...")
    results, acc = run_benchmark_test(
        dataset=gsm8k_test,
        prompt=construct_CoT_prompt(shot),
        VERBOSE=False,
        num_samples=50
    )
    save_final_result(results, acc, filename)


--- Starting CoT Prompting Benchmarks ---
Processing CoT Prompting 0-shot...


 10%|█         | 5/50 [00:02<00:21,  2.13it/s]

Progress: [5/50]
Current Acc.: [100.00%]


 20%|██        | 10/50 [00:04<00:20,  1.92it/s]

Progress: [10/50]
Current Acc.: [80.00%]


 30%|███       | 15/50 [00:07<00:17,  1.99it/s]

Progress: [15/50]
Current Acc.: [73.33%]


 40%|████      | 20/50 [00:13<00:44,  1.49s/it]

Progress: [20/50]
Current Acc.: [70.00%]


 50%|█████     | 25/50 [00:31<00:58,  2.34s/it]

Progress: [25/50]
Current Acc.: [68.00%]


 60%|██████    | 30/50 [00:43<00:47,  2.35s/it]

Progress: [30/50]
Current Acc.: [70.00%]


 70%|███████   | 35/50 [00:52<00:27,  1.86s/it]

Progress: [35/50]
Current Acc.: [74.29%]


 80%|████████  | 40/50 [01:03<00:22,  2.26s/it]

Progress: [40/50]
Current Acc.: [70.00%]


 90%|█████████ | 45/50 [01:18<00:16,  3.22s/it]

Progress: [45/50]
Current Acc.: [68.89%]


100%|██████████| 50/50 [01:29<00:00,  1.79s/it]


Progress: [50/50]
Current Acc.: [72.00%]
Processing CoT Prompting 3-shot...


 10%|█         | 5/50 [00:38<05:37,  7.50s/it]

Progress: [5/50]
Current Acc.: [80.00%]


 20%|██        | 10/50 [01:17<05:12,  7.82s/it]

Progress: [10/50]
Current Acc.: [80.00%]


 30%|███       | 15/50 [01:56<04:32,  7.78s/it]

Progress: [15/50]
Current Acc.: [80.00%]


 40%|████      | 20/50 [02:29<03:31,  7.07s/it]

Progress: [20/50]
Current Acc.: [80.00%]


 50%|█████     | 25/50 [03:07<03:05,  7.40s/it]

Progress: [25/50]
Current Acc.: [76.00%]


 60%|██████    | 30/50 [03:40<02:01,  6.10s/it]

Progress: [30/50]
Current Acc.: [76.67%]


 70%|███████   | 35/50 [04:14<01:43,  6.92s/it]

Progress: [35/50]
Current Acc.: [80.00%]


 80%|████████  | 40/50 [04:55<01:14,  7.43s/it]

Progress: [40/50]
Current Acc.: [77.50%]


 90%|█████████ | 45/50 [05:30<00:37,  7.52s/it]

Progress: [45/50]
Current Acc.: [77.78%]


100%|██████████| 50/50 [06:09<00:00,  7.39s/it]


Progress: [50/50]
Current Acc.: [80.00%]
Processing CoT Prompting 5-shot...


 10%|█         | 5/50 [00:49<07:38, 10.18s/it]

Progress: [5/50]
Current Acc.: [100.00%]


 20%|██        | 10/50 [01:49<08:05, 12.13s/it]

Progress: [10/50]
Current Acc.: [80.00%]


 30%|███       | 15/50 [02:35<05:54, 10.14s/it]

Progress: [15/50]
Current Acc.: [80.00%]


 40%|████      | 20/50 [03:27<05:09, 10.32s/it]

Progress: [20/50]
Current Acc.: [75.00%]


 50%|█████     | 25/50 [04:17<04:07,  9.88s/it]

Progress: [25/50]
Current Acc.: [76.00%]


 60%|██████    | 30/50 [05:09<03:26, 10.34s/it]

Progress: [30/50]
Current Acc.: [76.67%]


 70%|███████   | 35/50 [05:59<02:28,  9.90s/it]

Progress: [35/50]
Current Acc.: [80.00%]


 80%|████████  | 40/50 [06:50<01:43, 10.31s/it]

Progress: [40/50]
Current Acc.: [75.00%]


 90%|█████████ | 45/50 [07:43<00:53, 10.63s/it]

Progress: [45/50]
Current Acc.: [75.56%]


100%|██████████| 50/50 [08:34<00:00, 10.28s/it]

Progress: [50/50]
Current Acc.: [76.00%]





### Construct your prompt!!

목표: 본인만의 프롬프트를 통해 정답률을 더 끌어올려보기!
- gsm8k의 train 데이터셋에서 예시를 가져온 다음 (자유롭게!)
- 그 예시들에 대한 풀이 과정을 만들어주세요!
- 모든 것들이 자유입니다! Direct Prompting, CoT Prompting을 한 결과보다 정답률만 높으면 돼요.

In [None]:
### 자유롭게 수정해도 됩니다! 완전히 새로 함수를 만들어도 돼요.
def construct_my_prompt(num_examples: int = 3) -> str:
    """
    [구조적 추론 (Structured Reasoning) 프롬프트 생성 함수]
    
    전략의 의도:
    1. 목표 정의 (Goal Definition): 연산을 시작하기 전에 문제의 핵심 요구사항을 먼저 파악하게 하여, 엉뚱한 방향으로 풀이가 진행되는 것을 방지합니다.
    2. 구조화된 출력 (Structured Output): [Goal] -> [Solution] -> [Answer]라는 명확한 헤더를 제시하여, 모델이 사고 과정을 체계적으로 정리하고, 답변 파싱(Parsing)의 정확도를 높입니다.
    3. 논리적 일관성 (Consistency): 계획과 실행 단계를(혹은 목표와 풀이를) 분리함으로써, 초기 단계의 잘못된 가정에 매몰되지 않고 논리적인 흐름을 유지할 가능성을 높입니다.
    """
    train_dataset = gsm8k_train
    
    # Use a fixed seed or consistent sampling if needed
    indices = list(range(len(train_dataset['question'])))
    sampled_indices = random.sample(indices, num_examples)

    # Improved Instruction: clear separation of Logic (Plan) vs Math (Work)
    prompt = (
        "Instruction:\n"
        "You are a math expert. Solve the problem by following this strict format:\n"
        "1. [Plan]: Break down the problem into logical steps without calculating yet.\n"
        "2. [Work]: Perform the calculations step-by-step based on your plan.\n"
        "3. [Check]: Quickly verify if your answer makes sense (e.g., estimation or reverse check).\n"
        "4. [Answer]: State the final number after '####'.\n"
    )

    for i, idx in enumerate(sampled_indices):
        cur_question = train_dataset['question'][idx]
        cur_answer = train_dataset['answer'][idx] 
        
        prompt += f"\n[Example {i+1}]\n"
        prompt += f"Question:\n{cur_question}\n"
        prompt += f"[Plan]: Identify the operations needed to solve for the target value.\n"
        prompt += f"[Work]:\n{cur_answer}\n"
        prompt += f"[Check]: The steps follow logically.\n"

    prompt += "\nQuestion:\n{question}\n"
    prompt += "[Plan]:" # Trigger the model to start planning

    return prompt



In [33]:
# TODO: 만든 0 shot, 3 shot, 5 shot example과 프롬프트를 통해 벤치마크 테스트를 한 후, 각각 My_prompting_{shot: int}.txt로 저장해주세요!
# 예시: shot이 5인 경우 My_prompting_5.txt
# 항상 num_samples=50 입니다!

print("\n--- Starting My Custom Prompting Benchmarks ---")
for shot in shots:
    filename = f"My_prompting_new_{shot}.txt"
    print(f"Processing My Custom Prompting {shot}-shot...")
    results, acc = run_benchmark_test(
        dataset=gsm8k_test,
        prompt=construct_my_prompt(shot),
        VERBOSE=False,
        num_samples=50
    )
    save_final_result(results, acc, filename)


--- Starting My Custom Prompting Benchmarks ---
Processing My Custom Prompting 0-shot...


 10%|█         | 5/50 [00:03<00:30,  1.46it/s]

Progress: [5/50]
Current Acc.: [60.00%]


 20%|██        | 10/50 [00:06<00:25,  1.54it/s]

Progress: [10/50]
Current Acc.: [60.00%]


 30%|███       | 15/50 [00:09<00:17,  2.02it/s]

Progress: [15/50]
Current Acc.: [46.67%]


 40%|████      | 20/50 [00:23<01:14,  2.47s/it]

Progress: [20/50]
Current Acc.: [50.00%]


 50%|█████     | 25/50 [00:36<01:05,  2.61s/it]

Progress: [25/50]
Current Acc.: [48.00%]


 60%|██████    | 30/50 [00:49<00:51,  2.59s/it]

Progress: [30/50]
Current Acc.: [40.00%]


 70%|███████   | 35/50 [01:02<00:39,  2.63s/it]

Progress: [35/50]
Current Acc.: [34.29%]


 80%|████████  | 40/50 [01:16<00:28,  2.81s/it]

Progress: [40/50]
Current Acc.: [32.50%]


 90%|█████████ | 45/50 [01:31<00:14,  2.91s/it]

Progress: [45/50]
Current Acc.: [33.33%]


100%|██████████| 50/50 [01:46<00:00,  2.14s/it]


Progress: [50/50]
Current Acc.: [34.00%]
Processing My Custom Prompting 3-shot...


 10%|█         | 5/50 [00:44<06:52,  9.16s/it]

Progress: [5/50]
Current Acc.: [80.00%]


 20%|██        | 10/50 [01:31<06:15,  9.39s/it]

Progress: [10/50]
Current Acc.: [70.00%]


 30%|███       | 15/50 [02:18<05:21,  9.17s/it]

Progress: [15/50]
Current Acc.: [66.67%]


 40%|████      | 20/50 [03:04<04:36,  9.21s/it]

Progress: [20/50]
Current Acc.: [70.00%]


 50%|█████     | 25/50 [03:49<03:42,  8.89s/it]

Progress: [25/50]
Current Acc.: [64.00%]


 60%|██████    | 30/50 [04:35<03:04,  9.21s/it]

Progress: [30/50]
Current Acc.: [60.00%]


 70%|███████   | 35/50 [05:10<01:41,  6.76s/it]

Progress: [35/50]
Current Acc.: [65.71%]


 80%|████████  | 40/50 [05:50<01:24,  8.43s/it]

Progress: [40/50]
Current Acc.: [60.00%]


 90%|█████████ | 45/50 [06:34<00:45,  9.06s/it]

Progress: [45/50]
Current Acc.: [60.00%]


100%|██████████| 50/50 [07:12<00:00,  8.65s/it]


Progress: [50/50]
Current Acc.: [60.00%]
Processing My Custom Prompting 5-shot...


 10%|█         | 5/50 [00:51<07:44, 10.32s/it]

Progress: [5/50]
Current Acc.: [80.00%]


 20%|██        | 10/50 [01:40<06:40, 10.01s/it]

Progress: [10/50]
Current Acc.: [70.00%]


 32%|███▏      | 16/50 [02:30<03:35,  6.34s/it]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499999, Requested 920. Please try again in 2m38.8032s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499998, Requested 961. Please try again in 2m45.7152s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 34%|███▍      | 17/50 [02:31<02:28,  4.49s/it]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499997, Requested 921. Please try again in 2m38.6304s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 38%|███▊      | 19/50 [02:31<01:10,  2.28s/it]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499996, Requested 924. Please try again in 2m38.976s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499995, Requested 900. Please try again in 2m34.656s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 40%|████      | 20/50 [02:31<00:49,  1.64s/it]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499994, Requested 935. Please try again in 2m40.5312s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 44%|████▍     | 22/50 [02:32<00:25,  1.11it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499993, Requested 931. Please try again in 2m39.6672s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499992, Requested 915. Please try again in 2m36.7296s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 46%|████▌     | 23/50 [02:32<00:18,  1.48it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499991, Requested 926. Please try again in 2m38.4576s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 50%|█████     | 25/50 [02:32<00:10,  2.38it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499990, Requested 909. Please try again in 2m35.3472s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499989, Requested 907. Please try again in 2m34.8288s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 54%|█████▍    | 27/50 [02:32<00:06,  3.34it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499988, Requested 935. Please try again in 2m39.494399999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499987, Requested 935. Please try again in 2m39.3216s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 58%|█████▊    | 29/50 [02:33<00:04,  4.51it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499986, Requested 927. Please try again in 2m37.7664s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499985, Requested 918. Please try again in 2m36.0384s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 60%|██████    | 30/50 [02:33<00:03,  5.07it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499984, Requested 939. Please try again in 2m39.494399999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 64%|██████▍   | 32/50 [02:33<00:03,  4.78it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499983, Requested 906. Please try again in 2m33.6192s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499982, Requested 931. Please try again in 2m37.7664s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 66%|██████▌   | 33/50 [02:33<00:03,  5.29it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499981, Requested 911. Please try again in 2m34.1376s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 70%|███████   | 35/50 [02:34<00:02,  5.42it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499980, Requested 899. Please try again in 2m31.8912s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499979, Requested 911. Please try again in 2m33.792s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 74%|███████▍  | 37/50 [02:34<00:02,  6.05it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499978, Requested 919. Please try again in 2m35.0016s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499977, Requested 913. Please try again in 2m33.792s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 76%|███████▌  | 38/50 [02:34<00:01,  6.22it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499976, Requested 931. Please try again in 2m36.7296s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 80%|████████  | 40/50 [02:35<00:01,  5.82it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499975, Requested 914. Please try again in 2m33.6192s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499974, Requested 944. Please try again in 2m38.6304s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 82%|████████▏ | 41/50 [02:35<00:01,  5.08it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499973, Requested 915. Please try again in 2m33.4464s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 84%|████████▍ | 42/50 [02:35<00:02,  3.85it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499970, Requested 995. Please try again in 2m46.752s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 88%|████████▊ | 44/50 [02:36<00:01,  4.51it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499969, Requested 952. Please try again in 2m39.1488s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499968, Requested 924. Please try again in 2m34.1376s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 92%|█████████▏| 46/50 [02:36<00:00,  5.49it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499967, Requested 943. Please try again in 2m37.248s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499966, Requested 965. Please try again in 2m40.8768s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


 96%|█████████▌| 48/50 [02:36<00:00,  5.47it/s]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499965, Requested 961. Please try again in 2m40.0128s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499964, Requested 917. Please try again in 2m32.2368s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


100%|██████████| 50/50 [02:37<00:00,  3.15s/it]

API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499963, Requested 908. Please try again in 2m30.5088s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
API call error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01kges0kwrf5s8hjs9tz93s5w8` service tier `on_demand` on tokens per day (TPD): Limit 500000, Used 499962, Requested 913. Please try again in 2m31.2s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}





### 보고서 작성하기
#### 아래의 내용이 포함되면 됩니다!

1. Direct Prompting, CoT Prompting, My Prompting을 0 shot, 3 shot, 5 shot 정답률을 표로 보여주세요!
2. CoT Prompting이 Direct Prompting에 비해 왜 좋을 수 있는지에 대해서 서술해주세요!
3. 본인이 작성한 프롬프트 기법이 CoT에 비해서 왜 더 좋을 수 있는지에 대해서 설명해주세요!
4. 최종적으로, `PROMPTING.md`에 보고서를 작성해주세요!