Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor and support plugins for OpenAIBot #621

Merged
merged 5 commits into from
Mar 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 14 additions & 156 deletions bot/chatgpt/chat_gpt_bot.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# encoding:utf-8

from bot.bot import Bot
from bot.chatgpt.chat_gpt_session import ChatGPTSession
from bot.openai.open_ai_image import OpenAIImage
from bot.session_manager import Session, SessionManager
from bridge.context import ContextType
from bridge.reply import Reply, ReplyType
from config import conf, load_config
Expand All @@ -10,21 +13,20 @@
import openai
import time


# OpenAI对话模型API (可用)
class ChatGPTBot(Bot):
class ChatGPTBot(Bot,OpenAIImage):
def __init__(self):
super().__init__()
openai.api_key = conf().get('open_ai_api_key')
if conf().get('open_ai_api_base'):
openai.api_base = conf().get('open_ai_api_base')
proxy = conf().get('proxy')
self.sessions = SessionManager(model= conf().get("model") or "gpt-3.5-turbo")
if proxy:
openai.proxy = proxy
if conf().get('rate_limit_chatgpt'):
self.tb4chatgpt = TokenBucket(conf().get('rate_limit_chatgpt', 20))
if conf().get('rate_limit_dalle'):
self.tb4dalle = TokenBucket(conf().get('rate_limit_dalle', 50))

self.sessions = SessionManager(ChatGPTSession, model= conf().get("model") or "gpt-3.5-turbo")

def reply(self, query, context=None):
# acquire reply content
Expand All @@ -45,19 +47,19 @@ def reply(self, query, context=None):
reply = Reply(ReplyType.INFO, '配置已更新')
if reply:
return reply
session = self.sessions.build_session_query(query, session_id)
logger.debug("[OPEN_AI] session query={}".format(session))
session = self.sessions.session_query(query, session_id)
logger.debug("[OPEN_AI] session query={}".format(session.messages))

# if context.get('stream'):
# # reply in stream
# return self.reply_text_stream(query, new_query, session_id)

reply_content = self.reply_text(session, session_id, 0)
logger.debug("[OPEN_AI] new_query={}, session_id={}, reply_cont={}, completion_tokens={}".format(session, session_id, reply_content["content"], reply_content["completion_tokens"]))
logger.debug("[OPEN_AI] new_query={}, session_id={}, reply_cont={}, completion_tokens={}".format(session.messages, session_id, reply_content["content"], reply_content["completion_tokens"]))
if reply_content['completion_tokens'] == 0 and len(reply_content['content']) > 0:
reply = Reply(ReplyType.ERROR, reply_content['content'])
elif reply_content["completion_tokens"] > 0:
self.sessions.save_session(reply_content["content"], session_id, reply_content["total_tokens"])
self.sessions.session_reply(reply_content["content"], session_id, reply_content["total_tokens"])
reply = Reply(ReplyType.TEXT, reply_content["content"])
else:
reply = Reply(ReplyType.ERROR, reply_content['content'])
Expand Down Expand Up @@ -86,7 +88,7 @@ def compose_args(self):
"presence_penalty":conf().get('presence_penalty', 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容
}

def reply_text(self, session, session_id, retry_count=0) -> dict:
def reply_text(self, session:ChatGPTSession, session_id, retry_count=0) -> dict:
'''
call openai's ChatCompletion to get the answer
:param session: a conversation session
Expand All @@ -98,7 +100,7 @@ def reply_text(self, session, session_id, retry_count=0) -> dict:
if conf().get('rate_limit_chatgpt') and not self.tb4chatgpt.get_token():
return {"completion_tokens": 0, "content": "提问太快啦,请休息一下再问我吧"}
response = openai.ChatCompletion.create(
messages=session, **self.compose_args()
messages=session.messages, **self.compose_args()
)
# logger.info("[ChatGPT] reply={}, total_tokens={}".format(response.choices[0]['message']['content'], response["usage"]["total_tokens"]))
return {"total_tokens": response["usage"]["total_tokens"],
Expand Down Expand Up @@ -128,31 +130,6 @@ def reply_text(self, session, session_id, retry_count=0) -> dict:
self.sessions.clear_session(session_id)
return {"completion_tokens": 0, "content": "请再问我一次吧"}

def create_img(self, query, retry_count=0):
try:
if conf().get('rate_limit_dalle') and not self.tb4dalle.get_token():
return False, "请求太快了,请休息一下再问我吧"
logger.info("[OPEN_AI] image_query={}".format(query))
response = openai.Image.create(
prompt=query, #图片描述
n=1, #每次生成图片的数量
size="256x256" #图片大小,可选有 256x256, 512x512, 1024x1024
)
image_url = response['data'][0]['url']
logger.info("[OPEN_AI] image_url={}".format(image_url))
return True, image_url
except openai.error.RateLimitError as e:
logger.warn(e)
if retry_count < 1:
time.sleep(5)
logger.warn("[OPEN_AI] ImgCreate RateLimit exceed, 第{}次重试".format(retry_count+1))
return self.create_img(query, retry_count+1)
else:
return False, "提问太快啦,请休息一下再问我吧"
except Exception as e:
logger.exception(e)
return False, str(e)


class AzureChatGPTBot(ChatGPTBot):
def __init__(self):
Expand All @@ -164,123 +141,4 @@ def compose_args(self):
args = super().compose_args()
args["engine"] = args["model"]
del(args["model"])
return args

class SessionManager(object):
def __init__(self, model = "gpt-3.5-turbo-0301"):
if conf().get('expires_in_seconds'):
sessions = ExpiredDict(conf().get('expires_in_seconds'))
else:
sessions = dict()
self.sessions = sessions
self.model = model

def build_session(self, session_id, system_prompt=None):
session = self.sessions.get(session_id, [])
if len(session) == 0:
if system_prompt is None:
system_prompt = conf().get("character_desc", "")
system_item = {'role': 'system', 'content': system_prompt}
session.append(system_item)
self.sessions[session_id] = session
return session

def build_session_query(self, query, session_id):
'''
build query with conversation history
e.g. [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who won the world series in 2020?"},
{"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
{"role": "user", "content": "Where was it played?"}
]
:param query: query content
:param session_id: session id
:return: query content with conversaction
'''
session = self.build_session(session_id)
user_item = {'role': 'user', 'content': query}
session.append(user_item)
try:
total_tokens = num_tokens_from_messages(session, self.model)
max_tokens = conf().get("conversation_max_tokens", 1000)
total_tokens = self.discard_exceed_conversation(session, max_tokens, total_tokens)
logger.debug("prompt tokens used={}".format(total_tokens))
except Exception as e:
logger.debug("Exception when counting tokens precisely for prompt: {}".format(str(e)))

return session

def save_session(self, answer, session_id, total_tokens):
max_tokens = conf().get("conversation_max_tokens", 1000)
session = self.sessions.get(session_id)
if session:
# append conversation
gpt_item = {'role': 'assistant', 'content': answer}
session.append(gpt_item)

# discard exceed limit conversation
tokens_cnt = self.discard_exceed_conversation(session, max_tokens, total_tokens)
logger.debug("raw total_tokens={}, savesession tokens={}".format(total_tokens, tokens_cnt))

def discard_exceed_conversation(self, session, max_tokens, total_tokens):
dec_tokens = int(total_tokens)
# logger.info("prompt tokens used={},max_tokens={}".format(used_tokens,max_tokens))
while dec_tokens > max_tokens:
# pop first conversation
if len(session) > 2:
session.pop(1)
elif len(session) == 2 and session[1]["role"] == "assistant":
session.pop(1)
break
elif len(session) == 2 and session[1]["role"] == "user":
logger.warn("user message exceed max_tokens. total_tokens={}".format(dec_tokens))
break
else:
logger.debug("max_tokens={}, total_tokens={}, len(sessions)={}".format(max_tokens, dec_tokens, len(session)))
break
try:
cur_tokens = num_tokens_from_messages(session, self.model)
dec_tokens = cur_tokens
except Exception as e:
logger.debug("Exception when counting tokens precisely for query: {}".format(e))
dec_tokens = dec_tokens - max_tokens
return dec_tokens

def clear_session(self, session_id):
self.sessions[session_id] = []

def clear_all_session(self):
self.sessions.clear()

# refer to https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
def num_tokens_from_messages(messages, model):
"""Returns the number of tokens used by a list of messages."""
import tiktoken
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
logger.debug("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
if model == "gpt-3.5-turbo":
return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
elif model == "gpt-4":
return num_tokens_from_messages(messages, model="gpt-4-0314")
elif model == "gpt-3.5-turbo-0301":
tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
tokens_per_name = -1 # if there's a name, the role is omitted
elif model == "gpt-4-0314":
tokens_per_message = 3
tokens_per_name = 1
else:
logger.warn(f"num_tokens_from_messages() is not implemented for model {model}. Returning num tokens assuming gpt-3.5-turbo-0301.")
return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
num_tokens = 0
for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
return num_tokens
return args
92 changes: 92 additions & 0 deletions bot/chatgpt/chat_gpt_session.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from bot.session_manager import Session
from common.log import logger
'''
e.g. [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who won the world series in 2020?"},
{"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
{"role": "user", "content": "Where was it played?"}
]
'''
class ChatGPTSession(Session):
def __init__(self, session_id, system_prompt=None, model= "gpt-3.5-turbo"):
super().__init__(session_id, system_prompt)
self.messages = []
self.model = model
self.reset()

def reset(self):
system_item = {'role': 'system', 'content': self.system_prompt}
self.messages = [system_item]

def add_query(self, query):
user_item = {'role': 'user', 'content': query}
self.messages.append(user_item)

def add_reply(self, reply):
assistant_item = {'role': 'assistant', 'content': reply}
self.messages.append(assistant_item)

def discard_exceeding(self, max_tokens, cur_tokens= None):
precise = True
try:
cur_tokens = num_tokens_from_messages(self.messages, self.model)
except Exception as e:
precise = False
if cur_tokens is None:
raise e
logger.debug("Exception when counting tokens precisely for query: {}".format(e))
while cur_tokens > max_tokens:
if len(self.messages) > 2:
self.messages.pop(1)
elif len(self.messages) == 2 and self.messages[1]["role"] == "assistant":
self.messages.pop(1)
if precise:
cur_tokens = num_tokens_from_messages(self.messages, self.model)
else:
cur_tokens = cur_tokens - max_tokens
break
elif len(self.messages) == 2 and self.messages[1]["role"] == "user":
logger.warn("user message exceed max_tokens. total_tokens={}".format(cur_tokens))
break
else:
logger.debug("max_tokens={}, total_tokens={}, len(messages)={}".format(max_tokens, cur_tokens, len(self.messages)))
break
if precise:
cur_tokens = num_tokens_from_messages(self.messages, self.model)
else:
cur_tokens = cur_tokens - max_tokens
return cur_tokens


# refer to https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
def num_tokens_from_messages(messages, model):
"""Returns the number of tokens used by a list of messages."""
import tiktoken
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
logger.debug("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
if model == "gpt-3.5-turbo":
return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
elif model == "gpt-4":
return num_tokens_from_messages(messages, model="gpt-4-0314")
elif model == "gpt-3.5-turbo-0301":
tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
tokens_per_name = -1 # if there's a name, the role is omitted
elif model == "gpt-4-0314":
tokens_per_message = 3
tokens_per_name = 1
else:
logger.warn(f"num_tokens_from_messages() is not implemented for model {model}. Returning num tokens assuming gpt-3.5-turbo-0301.")
return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
num_tokens = 0
for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
return num_tokens
Loading