Skip to content

Commit

Permalink
1. Add feature: support modify language.
Browse files Browse the repository at this point in the history
2. Add python package pdfminer.six

3. Decouple API and plugins.
  • Loading branch information
yym68686 committed Nov 29, 2023
1 parent 43bed96 commit 08bee25
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 156 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Join the [Telegram Group](https://t.me/+_01cz9tAkUc1YzZl) chat to share your use

## ✨ Features

✅ Supports GPT3.5 and GPT4/GPT4 Turbo API, DALLE 3
✅ Supports GPT3.5, GPT4/GPT4 Turbo and Claude2.1 API, DALLE 3

✅ Supports online search using duckduckgo and Google🔍. DuckDuckGo search is provided by default, and the official API for Google search needs to be applied by the user. It can provide real-time information that GPT could not answer before, such as Weibo hot search today, weather in a certain place today, and the progress of a certain person or news.

Expand Down
64 changes: 48 additions & 16 deletions bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from utils.chatgpt2api import Chatbot as GPT
from utils.chatgpt2api import claudebot
from telegram.constants import ChatAction
from utils.agent import docQA, get_doc_from_local
from utils.agent import docQA, get_doc_from_local, claudeQA
from telegram import BotCommand, InlineKeyboardButton, InlineKeyboardMarkup
from telegram.ext import CommandHandler, MessageHandler, ApplicationBuilder, filters, CallbackQueryHandler, Application, AIORateLimiter
from config import WEB_HOOK, PORT, BOT_TOKEN
Expand All @@ -32,7 +32,7 @@
translator_prompt = "You are a translation engine, you can only translate text and cannot interpret it, and do not explain. Translate the text to {}, please do not explain any sentences, just translate or leave them as they are. this is the content you need to translate: "
@decorators.Authorization
async def command_bot(update, context, language=None, prompt=translator_prompt, title="", robot=None, has_command=True):
if update.message.reply_to_message is None or update.message.reply_to_message.text:
if update.message.reply_to_message is None or update.message.reply_to_message.text or update.message.reply_to_message.document is None:
if has_command == False or len(context.args) > 0:
message = update.message.text if config.NICK is None else update.message.text[botNicKLength:].strip() if update.message.text[:botNicKLength].lower() == botNick else None
if has_command:
Expand All @@ -52,15 +52,15 @@ async def command_bot(update, context, language=None, prompt=translator_prompt,
reply_to_message_id=update.message.message_id,
)
else:
if update.message.reply_to_message.document is None:
message = (
f"格式错误哦~,需要回复一个文件,我才知道你要针对哪个文件提问,注意命令与问题之间的空格\n\n"
f"请输入 `要问的问题`\n\n"
f"例如已经上传某文档 ,问题是 蘑菇怎么分类?\n\n"
f"先左滑文档进入回复模式,在聊天框里面输入 `蘑菇怎么分类?`\n\n"
)
await context.bot.send_message(chat_id=update.effective_chat.id, text=escape(message), parse_mode='MarkdownV2', disable_web_page_preview=True)
return
# if update.message.reply_to_message.document is None:
# message = (
# f"格式错误哦~,需要回复一个文件,我才知道你要针对哪个文件提问,注意命令与问题之间的空格\n\n"
# f"请输入 `要问的问题`\n\n"
# f"例如已经上传某文档 ,问题是 蘑菇怎么分类?\n\n"
# f"先左滑文档进入回复模式,在聊天框里面输入 `蘑菇怎么分类?`\n\n"
# )
# await context.bot.send_message(chat_id=update.effective_chat.id, text=escape(message), parse_mode='MarkdownV2', disable_web_page_preview=True)
# return
print("\033[32m", update.effective_user.username, update.effective_user.id, update.message.text, "\033[0m")
await context.bot.send_chat_action(chat_id=update.message.chat_id, action=ChatAction.TYPING)
pdf_file = update.message.reply_to_message.document
Expand All @@ -74,7 +74,10 @@ async def command_bot(update, context, language=None, prompt=translator_prompt,

file_name = pdf_file.file_name
docpath = os.getcwd() + "/" + file_name
result = await pdfQA(file_url, docpath, question)
if "cluade" in config.GPT_ENGINE:
result = await claudeQA(file_url, question)
else:
result = await pdfQA(file_url, docpath, question)
print(result)
await context.bot.send_message(chat_id=update.message.chat_id, text=escape(result), parse_mode='MarkdownV2', disable_web_page_preview=True)

Expand Down Expand Up @@ -306,6 +309,9 @@ async def delete_message(update, context, messageid, delay=10):
InlineKeyboardButton("搜索已打开", callback_data="搜索"),
InlineKeyboardButton("联网解析PDF已打开", callback_data="pdf"),
],
[
InlineKeyboardButton("🇨🇳 中文", callback_data="language"),
],
[
InlineKeyboardButton("gpt4free已关闭", callback_data="gpt4free"),
],
Expand All @@ -330,7 +336,6 @@ async def button_press(update, context):
callback_query = update.callback_query
await callback_query.answer()
data = callback_query.data
print(data)
if "gpt-" in data or "claude" in data:
config.GPT_ENGINE = data
if config.API and "gpt-" in data:
Expand Down Expand Up @@ -437,6 +442,33 @@ async def button_press(update, context):
else:
first_buttons[2][1] = InlineKeyboardButton("联网解析PDF已打开", callback_data="pdf")

info_message = (
f"`Hi, {update.effective_user.username}!`\n\n"
f"**Default engine:** `{config.GPT_ENGINE}`\n"
f"**temperature:** `{config.temperature}`\n"
f"**API_URL:** `{config.API_URL}`\n\n"
f"**API:** `{replace_with_asterisk(config.API)}`\n\n"
f"**WEB_HOOK:** `{config.WEB_HOOK}`\n\n"
)
message = await callback_query.edit_message_text(
text=escape(info_message),
reply_markup=InlineKeyboardMarkup(first_buttons),
parse_mode='MarkdownV2'
)
elif "language" in data:
if config.LANGUAGE == "Simplified Chinese":
first_buttons[3][0] = InlineKeyboardButton("🇺🇸 English", callback_data="language")
config.LANGUAGE = "English"
else:
first_buttons[3][0] = InlineKeyboardButton("🇨🇳 中文", callback_data="language")
config.LANGUAGE = "Simplified Chinese"
config.systemprompt = f"You are ChatGPT, a large language model trained by OpenAI. Respond conversationally in {config.LANGUAGE}. Knowledge cutoff: 2021-09. Current date: [ {config.Current_Date} ]"
if config.API:
config.ChatGPTbot = GPT(api_key=f"{config.API}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature)
config.ChatGPTbot.reset(convo_id=str(update.effective_chat.id), system_prompt=config.systemprompt)
if config.ClaudeAPI:
config.ChatGPTbot = claudebot(api_key=f"{config.ClaudeAPI}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature)

info_message = (
f"`Hi, {update.effective_user.username}!`\n\n"
f"**Default engine:** `{config.GPT_ENGINE}`\n"
Expand All @@ -453,9 +485,9 @@ async def button_press(update, context):
elif "gpt4free" in data:
config.USE_G4F = not config.USE_G4F
if config.USE_G4F == False:
first_buttons[3][0] = InlineKeyboardButton("gpt4free已关闭", callback_data="gpt4free")
first_buttons[4][0] = InlineKeyboardButton("gpt4free已关闭", callback_data="gpt4free")
else:
first_buttons[3][0] = InlineKeyboardButton("gpt4free已打开", callback_data="gpt4free")
first_buttons[4][0] = InlineKeyboardButton("gpt4free已打开", callback_data="gpt4free")

info_message = (
f"`Hi, {update.effective_user.username}!`\n\n"
Expand Down Expand Up @@ -594,7 +626,7 @@ async def post_init(application: Application) -> None:
application.add_handler(CommandHandler("search", lambda update, context: search(update, context, title=f"`🤖️ {config.GPT_ENGINE}`\n\n", robot=config.ChatGPTbot)))
application.add_handler(CallbackQueryHandler(button_press))
application.add_handler(CommandHandler("reset", reset_chat))
application.add_handler(CommandHandler("en2zh", lambda update, context: command_bot(update, context, "simplified chinese", robot=config.ChatGPTbot)))
application.add_handler(CommandHandler("en2zh", lambda update, context: command_bot(update, context, config.LANGUAGE, robot=config.ChatGPTbot)))
application.add_handler(CommandHandler("zh2en", lambda update, context: command_bot(update, context, "english", robot=config.ChatGPTbot)))
application.add_handler(CommandHandler("info", info))
application.add_handler(CommandHandler("qa", qa))
Expand Down
3 changes: 2 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@
SEARCH_USE_GPT = (os.environ.get('SEARCH_USE_GPT', "True") == "False") == False
API_URL = os.environ.get('API_URL', 'https://api.openai.com/v1/chat/completions')
PDF_EMBEDDING = (os.environ.get('PDF_EMBEDDING', "True") == "False") == False
LANGUAGE = os.environ.get('LANGUAGE', 'Simplified Chinese')

from datetime import datetime
current_date = datetime.now()
Current_Date = current_date.strftime("%Y-%m-%d")
systemprompt = f"You are ChatGPT, a large language model trained by OpenAI. Knowledge cutoff: 2021-09. Current date: [ {Current_Date} ]"
systemprompt = f"You are ChatGPT, a large language model trained by OpenAI. Respond conversationally in {LANGUAGE}. Knowledge cutoff: 2021-09. Current date: [ {Current_Date} ]"

from utils.chatgpt2api import Chatbot as GPT
from utils.chatgpt2api import Imagebot, claudebot
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ duckduckgo-search==3.9.6
# duckduckgo-search==3.8.5
langchain==0.0.271
oauth2client==3.0.0
pdfminer.six
g4f==0.1.8.8
20 changes: 20 additions & 0 deletions test/test_pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from pdfminer.high_level import extract_text
text = extract_text('/Users/yanyuming/Library/Mobile Documents/iCloud~QReader~MarginStudy/Documents/论文/VersatileGait- A Large-Scale Synthetic Gait Dataset with Fine-Grained Attributes and Complicated Scenarios.pdf')
# print(repr(text))
print(text)

# from io import StringIO
# from pdfminer.high_level import extract_text_to_fp
# from pdfminer.layout import LAParams
# output_string = StringIO()
# with open('/Users/yanyuming/Desktop/Gait review.pdf', 'rb') as fin:
# extract_text_to_fp(fin, output_string, laparams=LAParams(),
# output_type='html', codec=None)
# print(output_string.getvalue().strip())

# from io import StringIO
# from pdfminer.high_level import extract_text_to_fp
# output_string = StringIO()
# with open('/Users/yanyuming/Library/Mobile Documents/iCloud~QReader~MarginStudy/Documents/论文/VersatileGait- A Large-Scale Synthetic Gait Dataset with Fine-Grained Attributes and Complicated Scenarios.pdf', 'rb') as fin:
# extract_text_to_fp(fin, output_string)
# print(output_string.getvalue().strip())
36 changes: 34 additions & 2 deletions utils/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,17 @@ async def pdfQA(docurl, docpath, query_message, model="gpt-3.5-turbo"):
vector_store = Chroma(persist_directory=persist_db_path, embedding_function=embeddings)
qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(), return_source_documents=True)
result = qa({"query": query_message})
print(2)
return result['result']

async def claudeQA(docurl, query_message):
from pdfminer.high_level import extract_text
filename = get_doc_from_url(docurl)
docpath = os.getcwd() + "/" + filename
text = extract_text(docpath)
print(text)
prompt = f"""你需要回答的问题是:{query_message}"""
return text

def pdf_search(docurl, query_message, model="gpt-3.5-turbo"):
chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.bot_api_url.v1_url, model_name=model, openai_api_key=os.environ.get('API', None))
embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=os.environ.get('API', None))
Expand Down Expand Up @@ -309,7 +317,7 @@ def gptsearch(result, llm):
return response


def get_google_search_results(prompt: str, context_max_tokens: int):
def get_search_results(prompt: str, context_max_tokens: int):
start_time = record_time.time()

urls_set = []
Expand Down Expand Up @@ -417,6 +425,29 @@ def get_google_search_results(prompt: str, context_max_tokens: int):
print("text len", text_len)
return useful_source_text

def search_web_and_summary(
prompt: str,
engine: str = "gpt-3.5-turbo",
context_max_tokens: int = 4096,
):
chainStreamHandler = ChainStreamHandler()
if config.USE_G4F:
chatllm = EducationalLLM(callback_manager=CallbackManager([chainStreamHandler]))
else:
chatllm = ChatOpenAI(streaming=True, callback_manager=CallbackManager([chainStreamHandler]), temperature=config.temperature, openai_api_base=config.bot_api_url.v1_url, model_name=engine, openai_api_key=config.API)
useful_source_text = get_search_results(prompt, context_max_tokens)
summary_prompt = PromptTemplate(
input_variables=["web_summary", "question"],
template=(
# "You are a text analysis expert who can use a search engine. You need to response the following question: {question}. Search results: {web_summary}. Your task is to thoroughly digest all search results provided above and provide a detailed and in-depth response in Simplified Chinese to the question based on the search results. The response should meet the following requirements: 1. Be rigorous, clear, professional, scholarly, logical, and well-written. 2. If the search results do not mention relevant content, simply inform me that there is none. Do not fabricate, speculate, assume, or provide inaccurate response. 3. Use markdown syntax to format the response. Enclose any single or multi-line code examples or code usage examples in a pair of ``` symbols to achieve code formatting. 4. Detailed, precise and comprehensive response in Simplified Chinese and extensive use of the search results is required."
"You need to response the following question: {question}. Search results: {web_summary}. Your task is to think about the question step by step and then answer the above question in {language} based on the Search results provided. Please response in {language} and adopt a style that is logical, in-depth, and detailed. Note: In order to make the answer appear highly professional, you should be an expert in textual analysis, aiming to make the answer precise and comprehensive. Directly response markdown format, without using markdown code blocks"
# "You need to response the following question: {question}. Search results: {web_summary}. Your task is to thoroughly digest the search results provided above, dig deep into search results for thorough exploration and analysis and provide a response to the question based on the search results. The response should meet the following requirements: 1. You are a text analysis expert, extensive use of the search results is required and carefully consider all the Search results to make the response be in-depth, rigorous, clear, organized, professional, detailed, scholarly, logical, precise, accurate, comprehensive, well-written and speak in Simplified Chinese. 2. If the search results do not mention relevant content, simply inform me that there is none. Do not fabricate, speculate, assume, or provide inaccurate response. 3. Use markdown syntax to format the response. Enclose any single or multi-line code examples or code usage examples in a pair of ``` symbols to achieve code formatting."
),
)
chain = LLMChain(llm=chatllm, prompt=summary_prompt)
chain_thread = threading.Thread(target=chain.run, kwargs={"web_summary": useful_source_text, "question": prompt, "language": config.LANGUAGE})
chain_thread.start()
yield from chainStreamHandler.generate_tokens()
if __name__ == "__main__":
os.system("clear")

Expand All @@ -426,6 +457,7 @@ def get_google_search_results(prompt: str, context_max_tokens: int):
# # 搜索

# # for i in search_summary("今天的微博热搜有哪些?"):
# # for i in search_summary("macos 13.6 有什么新功能"):
# # for i in search_summary("用python写个网络爬虫给我"):
# # for i in search_summary("消失的她主要讲了什么?"):
# # for i in search_summary("奥巴马的全名是什么?"):
Expand Down
Loading

0 comments on commit 08bee25

Please sign in to comment.