In [3]:
import os
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who won the world series in 2020?"},
        {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
        {"role": "user", "content": "Where was it played?"}
    ]
)

print(response)

{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "The 2020 World Series was played at Globe Life Field in Arlington, Texas.",
        "role": "assistant"
      }
    }
  ],
  "created": 1682651514,
  "id": "chatcmpl-7A8oUy0JXdEqufn4rduD8jEAe4uAY",
  "model": "gpt-3.5-turbo-0301",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 17,
    "prompt_tokens": 57,
    "total_tokens": 74
  }
}


In [4]:
response["choices"][0]["message"]["content"]

'The 2020 World Series was played at Globe Life Field in Arlington, Texas.'

In [5]:
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "您是个翻译员，帮助用户把中文翻译成英文。"},
        {"role": "user", "content": "你好！"},
    ]
)

print(response["choices"][0]["message"]["content"])

{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "Hello!",
        "role": "assistant"
      }
    }
  ],
  "created": 1682668292,
  "id": "chatcmpl-7ADB6cQVKVO1hbwyG2oaxQ1iTP1RD",
  "model": "gpt-3.5-turbo-0301",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 2,
    "prompt_tokens": 44,
    "total_tokens": 46
  }
}


# 中文翻译英文的机器人

In [13]:
class Conversation:
    def __init__(self, prompt):
        self.prompt = prompt
        self.messages = [{"role": "system", "content": self.prompt}]
    
    def ask(self, question):
        self.messages.append({"role": "user", "content": question})

        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=self.messages
            )
        except Exception as e:
            return e
        
        assistant_message = response["choices"][0]["message"]["content"]
        self.messages.append({"role": "assistant", "content": assistant_message})

        return assistant_message

In [17]:
def translate(conv, input):
    output = conv.ask(input)
    print(f"User: {input}")
    print(f"Assistant: {output}")
    print()

conv = Conversation("您是个翻译员，帮助用户把中文翻译成英文。")

translate(conv, "你好！")
translate(conv, "你在哪里？")
translate(conv, "你在干什么？")

User: 你好！
Assistant: Hello!

User: 你在哪里？
Assistant: Where are you located?

User: 你在干什么？
Assistant: What are you doing?



# 计算 Chat API 调用的 Token

In [76]:
import tiktoken

def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
    """Returns the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model == "gpt-3.5-turbo":
        print("Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301.")
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
    elif model == "gpt-4":
        print("Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.")
        return num_tokens_from_messages(messages, model="gpt-4-0314")
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif model == "gpt-4-0314":
        tokens_per_message = 3
        tokens_per_name = 1
    else:
        raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens

In [77]:
messages = [
  {"role": "system", "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English."},
  {"role": "system", "name":"example_user", "content": "New synergies will help drive top-line growth."},
  {"role": "system", "name": "example_assistant", "content": "Things working well together will increase revenue."},
  {"role": "system", "name":"example_user", "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage."},
  {"role": "system", "name": "example_assistant", "content": "Let's talk later when we're less busy about how to do better."},
  {"role": "user", "content": "This late pivot means we don't have time to boil the ocean for the client deliverable."}
]

model = "gpt-3.5-turbo"

print(f"{num_tokens_from_messages(messages, model)} prompt tokens counted.")
# Should show ~126 total_tokens

127 prompt tokens counted.


In [78]:
openai.ChatCompletion.create(model=model, messages=messages, temperature=0, max_tokens=1)['usage']

<OpenAIObject at 0x13ab8e0c0> JSON: {
  "completion_tokens": 1,
  "prompt_tokens": 127,
  "total_tokens": 128
}

In [83]:
class Conversation:
    def __init__(self, prompt):
        self.model = "gpt-3.5-turbo"
        self.prompt = prompt
        self.messages = [{"role": "system", "content": self.prompt}]
    
    def ask(self, question, pprint=True):
        self.messages.append({"role": "user", "content": question})

        try:
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=self.messages
            )
        except Exception as e:
            return e

        if pprint:
            print(f"tiktoken: {self.num_tokens(self.messages, self.model)}\ntokens: {response['usage']}")
        
        assistant_message = response["choices"][0]["message"]["content"]
        self.messages.append({"role": "assistant", "content": assistant_message})

        return assistant_message
    
    def num_tokens(self, messages, model="gpt-3.5-turbo-0301"):
        """Returns the number of tokens used by a list of messages."""
        try:
            encoding = tiktoken.encoding_for_model(model)
        except KeyError:
            print("Warning: model not found. Using cl100k_base encoding.")
            encoding = tiktoken.get_encoding("cl100k_base")
        if model == "gpt-3.5-turbo":
            print("Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301.")
            return self.num_tokens(messages, model="gpt-3.5-turbo-0301")
        elif model == "gpt-4":
            print("Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.")
            return self.num_tokens(messages, model="gpt-4-0314")
        elif model == "gpt-3.5-turbo-0301":
            tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
            tokens_per_name = -1  # if there's a name, the role is omitted
        elif model == "gpt-4-0314":
            tokens_per_message = 3
            tokens_per_name = 1
        else:
            raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
        num_tokens = 0
        for message in messages:
            num_tokens += tokens_per_message
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":
                    num_tokens += tokens_per_name
        num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
        return num_tokens

In [84]:
conv = Conversation("您是个翻译员，帮助用户把中文翻译成英文。")
translate(conv, "你好！")

tiktoken: 44
tokens: {
  "completion_tokens": 2,
  "prompt_tokens": 44,
  "total_tokens": 46
}
User: 你好！
Assistant: Hello!



In [85]:
translate(conv, "你在哪里？")

tiktoken: 62
tokens: {
  "completion_tokens": 5,
  "prompt_tokens": 62,
  "total_tokens": 67
}
User: 你在哪里？
Assistant: Where are you located?



In [86]:
translate(conv, "你在干什么？")

tiktoken: 85
tokens: {
  "completion_tokens": 5,
  "prompt_tokens": 85,
  "total_tokens": 90
}
User: 你在干什么？
Assistant: What are you doing?



# 插入文本

In [99]:
prefix = "床前明月光，\n"
suffix = "\n低头思故乡。"
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=f"{prefix}{suffix}",
    max_tokens=64
)
print(response["choices"][0]["text"])



疑是地上霜，

一夜凝碧万里层。


In [101]:
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prefix,
    suffix=suffix,
    max_tokens=64
)
print(response["choices"][0]["text"])


疑是地上霜。

举头望明月，

低头思故乡。

七言律诗

Chuáng qián míng y


In [102]:
prefix = "床前明月光，\n"
suffix = "低头思故乡。"
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=f"{prefix}{suffix}",
    max_tokens=64
)
print(response["choices"][0]["text"])



床前明月光，
疑是地上霜。
举头望明月，
低头思故乡。


In [103]:
prefix = "床前明月光，\n"
suffix = "低头思故乡。"
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prefix,
    suffix=suffix,
    max_tokens=64
)
print(response["choices"][0]["text"])


疑是地上霜；

举头望明月，

低头思故乡。

Chuángqián míngyuè guāng,

Yí sh


In [104]:
prefix = "床前明月光，"
suffix = "低头思故乡。"
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prefix,
    suffix=suffix,
    max_tokens=64
)
print(response["choices"][0]["text"])


疑是地上霜。
举头望明月，
低头思故乡。

Before my bed, the bright moonlight,
I think it is frost on the ground.
I looked up at


In [105]:
prefix = "床前明月光，"
suffix = "低头思故乡。"
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prefix,
    suffix=suffix,
    max_tokens=64
)
print(response)

{
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "\n\u7591\u662f\u5730\u4e0a\u971c\u3002\n\u4e3e\u5934\u671b\u660e\u6708\uff0c\n\u4f4e\u5934\u601d\u6545\u4e61\u3002\n\nChu\u0101ng qi\u00e1n m\u00edngyu\u00e8 gu\u0101ng,\nG\u01d0 sh\u00ec d"
    }
  ],
  "created": 1682826755,
  "id": "cmpl-7AsOxY1iuCqFIJeh1AE174k1dGWrk",
  "model": "text-davinci-003",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 64,
    "prompt_tokens": 23,
    "total_tokens": 87
  }
}
