In [None]:
# anthropic 모듈이 설치되야지.
%pip install anthropic

# import 
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
from anthropic import Anthropic
from IPython.display import Markdown, display

In [None]:
# 요거는 항상 필요하지
load_dotenv(override = True)

In [None]:
# API KEY prefix 찍어보기
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')

if openai_api_key:
    print(f"OpenAI API key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API key is not set.")

if anthropic_api_key:
    print(f"Anthropic API key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API key is not set. This is optional.")

if google_api_key:
    print(f"Google API key exists and begins {google_api_key[:2]}")
else:
    print("Google API key is not set. This is optional.")

if deepseek_api_key:
    print(f"Deepseek API key exists and begins {deepseek_api_key[:3]}")
else:
    print("Deepseek API key is not set. This is optional.")

if groq_api_key:
    print(f"Groq API key exists and begins {groq_api_key[:4]}")
else:
    print("Groq API key is not set. This is optional.")

In [None]:
# LLM 대상 질문을 만들어줘.
request = "please come up with a challenging, nuanced question that I can ask a number of LLMs \
    to evaluate their intellegence. \
    Answer only with the question, no other explanations."
message = [{"role":"user", "content":request}]

In [None]:
# 만든 message 한번 확인
message

In [None]:
# gpt-4.1-mini, 니가 질문 만들어라.
openai = OpenAI()
response = openai.chat.completions.create(
    model="gpt-4.1-mini",
    messages=message
)

# 만든 질문이 요렇게 생겼네.
question = response.choices[0].message.content
display(Markdown(question))

In [None]:
# 답변 받을 변수들을 정리하고,
competitors=[]
answers=[]
message=[{"role":"user", "content":question}]

In [None]:
# OpenAI 부터 질문 시작
model_name="gpt-4.1-mini"
response = openai.chat.completions.create(
    model=model_name,
    messages=message
)
answer = response.choices[0].message.content
display(Markdown(answer))

# 답변은 차곡차곡 배열에 저장
competitors.append(model_name)
answers.append(answer)

In [None]:
# 이번엔 Anthropic
model_name = "claude-3-7-sonnet-latest"

# Anthropic은 다른 API사용.
claude = Anthropic()
response = claude.messages.create(
    model=model_name,
    messages=message,
    max_tokens=1000
)
answer = response.content[0].text
display(Markdown(answer))

# 답변은 차곡차곡 배열에 쌓고,
competitors.append(model_name)
answers.append(answer)

In [None]:
# 이번에 Google
model_name = "gemini-2.0-flash"

# openai library 사용. base url은
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
openai = OpenAI(api_key=google_api_key, base_url=GEMINI_BASE_URL)

response = openai.chat.completions.create(
    model=model_name,
    messages=message
)
answer = response.choices[0].message.content
display(Markdown(answer))

# 답변은 차곡차곡 배열에 쌓고,
competitors.append(model_name)
answers.append(answer)

In [None]:
# DeepSeek도 해보자
model_name = "deepseek-chat"

# base url은
DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
openai = OpenAI(api_key=deepseek_api_key, base_url=DEEPSEEK_BASE_URL)

# call
response = openai.chat.completions.create(
    model=model_name,
    messages=message
)

# 답변은
answer = response.choices[0].message.content
display(Markdown(answer))

# 답변은 차곡차곡 배열에 쌓고,
competitors.append(model_name)
answers.append(answer)

In [None]:
# 마지막으로 groq
model_name = "llama-3.3-70b-versatile"

# base url은
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
openai = OpenAI(api_key=groq_api_key, base_url=GROQ_BASE_URL)

# call
response = openai.chat.completions.create(
    model=model_name,
    messages=message
)
answer = response.choices[0].message.content
display(Markdown(answer))

# 답변은 차곡차곡 배열에 쌓고,
competitors.append(model_name)
answers.append(answer)

In [None]:
# ollama는 나중에

In [None]:
# 쌓은 배열 한번 찍어보고
print(competitors)
print(answers)

In [None]:
# zip을 이용해서 이쁘게 찍는다
for competitor, answer in zip(competitors, answers):
    print(f"Competitor : {competitor}\n\n{answer}\n\n")

In [None]:
# 이번엔 enumerate를 써서 찍는다
together=""
for index, answer in enumerate(answers):
    together += f"# Response from competitor {index + 1}\n\n"
    together += answer + "\n\n"

print(together)

In [None]:
# 답변을 비교할 시간
# 비교 prompt 작성하고
judge = f"""You are judging a competition between {len(competitors)} competitors.
Each model has been given this question.

{message}

Your job is to evaluate each response from clarity and strength of argument, 
and rank them in order of best of worst.
Respond with JSON, and only JSON, with the following format:
{{"result": ["best competitor number", "seconde best competitor number", ...]}}

Here are the responses from each competitor:

{together}

Now respond with the JSON with the rank order of the competitors, nothing else.
Do not include markdown formatting or code blocks.
"""

print(judge)

In [20]:
# 등수를 매기자
judge_message = [{"role":"user", "content":judge}]

openai = OpenAI()
response = openai.chat.completions.create(
    model="o3-mini",
    messages=judge_message
)
results = response.choices[0].message.content
print(results)

{"result": ["2", "1"]}


In [22]:
# 등수를 예쁘게 표시하면
result_dict = json.loads(results)
ranks = result_dict["result"]
for index, result in enumerate(ranks):
    competitor = competitors[int(result)-1]
    print(f"Rank {index+1} : {competitor}")

Rank 1 : gemini-2.0-flash
Rank 2 : gpt-4.1-mini
