1. Add feature: support modify language.

2. Add python package pdfminer.six 3. Decouple API and plugins.
yym68686 · Nov 29, 2023 · 08bee25 · 08bee25
1 parent 43bed96
commit 08bee25
Show file tree

Hide file tree

Showing 8 changed files with 112 additions and 156 deletions.
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ Join the [Telegram Group](https://t.me/+_01cz9tAkUc1YzZl) chat to share your use
 
 ## ✨ Features
 
-✅ Supports GPT3.5 and GPT4/GPT4 Turbo API, DALLE 3
+✅ Supports GPT3.5, GPT4/GPT4 Turbo and Claude2.1 API, DALLE 3
 
 ✅ Supports online search using duckduckgo and Google🔍. DuckDuckGo search is provided by default, and the official API for Google search needs to be applied by the user. It can provide real-time information that GPT could not answer before, such as Weibo hot search today, weather in a certain place today, and the progress of a certain person or news.
 

diff --git a/bot.py b/bot.py
@@ -8,7 +8,7 @@
 from utils.chatgpt2api import Chatbot as GPT
 from utils.chatgpt2api import claudebot
 from telegram.constants import ChatAction
-from utils.agent import docQA, get_doc_from_local
+from utils.agent import docQA, get_doc_from_local, claudeQA
 from telegram import BotCommand, InlineKeyboardButton, InlineKeyboardMarkup
 from telegram.ext import CommandHandler, MessageHandler, ApplicationBuilder, filters, CallbackQueryHandler, Application, AIORateLimiter
 from config import WEB_HOOK, PORT, BOT_TOKEN
@@ -32,7 +32,7 @@
 translator_prompt = "You are a translation engine, you can only translate text and cannot interpret it, and do not explain. Translate the text to {}, please do not explain any sentences, just translate or leave them as they are. this is the content you need to translate: "
 @decorators.Authorization
 async def command_bot(update, context, language=None, prompt=translator_prompt, title="", robot=None, has_command=True):
-    if update.message.reply_to_message is None or update.message.reply_to_message.text:
+    if update.message.reply_to_message is None or update.message.reply_to_message.text or update.message.reply_to_message.document is None:
         if has_command == False or len(context.args) > 0:
             message = update.message.text if config.NICK is None else update.message.text[botNicKLength:].strip() if update.message.text[:botNicKLength].lower() == botNick else None
             if has_command:
@@ -52,15 +52,15 @@ async def command_bot(update, context, language=None, prompt=translator_prompt,
                 reply_to_message_id=update.message.message_id,
             )
     else:
-        if update.message.reply_to_message.document is None:
-            message = (
-                f"格式错误哦~，需要回复一个文件，我才知道你要针对哪个文件提问，注意命令与问题之间的空格\n\n"
-                f"请输入 `要问的问题`\n\n"
-                f"例如已经上传某文档 ，问题是 蘑菇怎么分类？\n\n"
-                f"先左滑文档进入回复模式，在聊天框里面输入 `蘑菇怎么分类？`\n\n"
-            )
-            await context.bot.send_message(chat_id=update.effective_chat.id, text=escape(message), parse_mode='MarkdownV2', disable_web_page_preview=True)
-            return
+        # if update.message.reply_to_message.document is None:
+        #     message = (
+        #         f"格式错误哦~，需要回复一个文件，我才知道你要针对哪个文件提问，注意命令与问题之间的空格\n\n"
+        #         f"请输入 `要问的问题`\n\n"
+        #         f"例如已经上传某文档 ，问题是 蘑菇怎么分类？\n\n"
+        #         f"先左滑文档进入回复模式，在聊天框里面输入 `蘑菇怎么分类？`\n\n"
+        #     )
+        #     await context.bot.send_message(chat_id=update.effective_chat.id, text=escape(message), parse_mode='MarkdownV2', disable_web_page_preview=True)
+        #     return
         print("\033[32m", update.effective_user.username, update.effective_user.id, update.message.text, "\033[0m")
         await context.bot.send_chat_action(chat_id=update.message.chat_id, action=ChatAction.TYPING)
         pdf_file = update.message.reply_to_message.document
@@ -74,7 +74,10 @@ async def command_bot(update, context, language=None, prompt=translator_prompt,
 
         file_name = pdf_file.file_name
         docpath = os.getcwd() + "/" + file_name
-        result = await pdfQA(file_url, docpath, question)
+        if  "cluade" in config.GPT_ENGINE:
+            result = await claudeQA(file_url, question)
+        else:
+            result = await pdfQA(file_url, docpath, question)
         print(result)
         await context.bot.send_message(chat_id=update.message.chat_id, text=escape(result), parse_mode='MarkdownV2', disable_web_page_preview=True)
 
@@ -306,6 +309,9 @@ async def delete_message(update, context, messageid, delay=10):
         InlineKeyboardButton("搜索已打开", callback_data="搜索"),
         InlineKeyboardButton("联网解析PDF已打开", callback_data="pdf"),
     ],
+    [
+        InlineKeyboardButton("🇨🇳 中文", callback_data="language"),
+    ],
     [
         InlineKeyboardButton("gpt4free已关闭", callback_data="gpt4free"),
     ],
@@ -330,7 +336,6 @@ async def button_press(update, context):
     callback_query = update.callback_query
     await callback_query.answer()
     data = callback_query.data
-    print(data)
     if "gpt-" in data or "claude" in data:
         config.GPT_ENGINE = data
         if config.API and "gpt-" in data:
@@ -437,6 +442,33 @@ async def button_press(update, context):
         else:
             first_buttons[2][1] = InlineKeyboardButton("联网解析PDF已打开", callback_data="pdf")
 
+        info_message = (
+            f"`Hi, {update.effective_user.username}!`\n\n"
+            f"**Default engine:** `{config.GPT_ENGINE}`\n"
+            f"**temperature:** `{config.temperature}`\n"
+            f"**API_URL:** `{config.API_URL}`\n\n"
+            f"**API:** `{replace_with_asterisk(config.API)}`\n\n"
+            f"**WEB_HOOK:** `{config.WEB_HOOK}`\n\n"
+        )
+        message = await callback_query.edit_message_text(
+            text=escape(info_message),
+            reply_markup=InlineKeyboardMarkup(first_buttons),
+            parse_mode='MarkdownV2'
+        )
+    elif "language" in data:
+        if config.LANGUAGE == "Simplified Chinese":
+            first_buttons[3][0] = InlineKeyboardButton("🇺🇸 English", callback_data="language")
+            config.LANGUAGE = "English"
+        else:
+            first_buttons[3][0] = InlineKeyboardButton("🇨🇳 中文", callback_data="language")
+            config.LANGUAGE = "Simplified Chinese"
+        config.systemprompt = f"You are ChatGPT, a large language model trained by OpenAI. Respond conversationally in {config.LANGUAGE}. Knowledge cutoff: 2021-09. Current date: [ {config.Current_Date} ]"
+        if config.API:
+            config.ChatGPTbot = GPT(api_key=f"{config.API}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature)
+            config.ChatGPTbot.reset(convo_id=str(update.effective_chat.id), system_prompt=config.systemprompt)
+        if config.ClaudeAPI:
+            config.ChatGPTbot = claudebot(api_key=f"{config.ClaudeAPI}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature)
+
         info_message = (
             f"`Hi, {update.effective_user.username}!`\n\n"
             f"**Default engine:** `{config.GPT_ENGINE}`\n"
@@ -453,9 +485,9 @@ async def button_press(update, context):
     elif "gpt4free" in data:
         config.USE_G4F = not config.USE_G4F
         if config.USE_G4F == False:
-            first_buttons[3][0] = InlineKeyboardButton("gpt4free已关闭", callback_data="gpt4free")
+            first_buttons[4][0] = InlineKeyboardButton("gpt4free已关闭", callback_data="gpt4free")
         else:
-            first_buttons[3][0] = InlineKeyboardButton("gpt4free已打开", callback_data="gpt4free")
+            first_buttons[4][0] = InlineKeyboardButton("gpt4free已打开", callback_data="gpt4free")
 
         info_message = (
             f"`Hi, {update.effective_user.username}!`\n\n"
@@ -594,7 +626,7 @@ async def post_init(application: Application) -> None:
     application.add_handler(CommandHandler("search", lambda update, context: search(update, context, title=f"`🤖️ {config.GPT_ENGINE}`\n\n", robot=config.ChatGPTbot)))
     application.add_handler(CallbackQueryHandler(button_press))
     application.add_handler(CommandHandler("reset", reset_chat))
-    application.add_handler(CommandHandler("en2zh", lambda update, context: command_bot(update, context, "simplified chinese", robot=config.ChatGPTbot)))
+    application.add_handler(CommandHandler("en2zh", lambda update, context: command_bot(update, context, config.LANGUAGE, robot=config.ChatGPTbot)))
     application.add_handler(CommandHandler("zh2en", lambda update, context: command_bot(update, context, "english", robot=config.ChatGPTbot)))
     application.add_handler(CommandHandler("info", info))
     application.add_handler(CommandHandler("qa", qa))

diff --git a/config.py b/config.py
@@ -14,11 +14,12 @@
 SEARCH_USE_GPT = (os.environ.get('SEARCH_USE_GPT', "True") == "False") == False
 API_URL = os.environ.get('API_URL', 'https://api.openai.com/v1/chat/completions')
 PDF_EMBEDDING = (os.environ.get('PDF_EMBEDDING', "True") == "False") == False
+LANGUAGE = os.environ.get('LANGUAGE', 'Simplified Chinese')
 
 from datetime import datetime
 current_date = datetime.now()
 Current_Date = current_date.strftime("%Y-%m-%d")
-systemprompt = f"You are ChatGPT, a large language model trained by OpenAI. Knowledge cutoff: 2021-09. Current date: [ {Current_Date} ]"
+systemprompt = f"You are ChatGPT, a large language model trained by OpenAI. Respond conversationally in {LANGUAGE}. Knowledge cutoff: 2021-09. Current date: [ {Current_Date} ]"
 
 from utils.chatgpt2api import Chatbot as GPT
 from utils.chatgpt2api import Imagebot, claudebot

diff --git a/requirements.txt b/requirements.txt
@@ -14,4 +14,5 @@ duckduckgo-search==3.9.6
 # duckduckgo-search==3.8.5
 langchain==0.0.271
 oauth2client==3.0.0
+pdfminer.six
 g4f==0.1.8.8
diff --git a/test/test_pdf.py b/test/test_pdf.py
@@ -0,0 +1,20 @@
+from pdfminer.high_level import extract_text
+text = extract_text('/Users/yanyuming/Library/Mobile Documents/iCloud~QReader~MarginStudy/Documents/论文/VersatileGait- A Large-Scale Synthetic Gait Dataset with Fine-Grained Attributes and Complicated Scenarios.pdf')
+# print(repr(text))
+print(text)
+
+# from io import StringIO
+# from pdfminer.high_level import extract_text_to_fp
+# from pdfminer.layout import LAParams
+# output_string = StringIO()
+# with open('/Users/yanyuming/Desktop/Gait review.pdf', 'rb') as fin:
+#     extract_text_to_fp(fin, output_string, laparams=LAParams(),
+#                        output_type='html', codec=None)
+# print(output_string.getvalue().strip())
+
+# from io import StringIO
+# from pdfminer.high_level import extract_text_to_fp
+# output_string = StringIO()
+# with open('/Users/yanyuming/Library/Mobile Documents/iCloud~QReader~MarginStudy/Documents/论文/VersatileGait- A Large-Scale Synthetic Gait Dataset with Fine-Grained Attributes and Complicated Scenarios.pdf', 'rb') as fin:
+#     extract_text_to_fp(fin, output_string)
+# print(output_string.getvalue().strip())
diff --git a/utils/agent.py b/utils/agent.py
@@ -159,9 +159,17 @@ async def pdfQA(docurl, docpath, query_message, model="gpt-3.5-turbo"):
         vector_store = Chroma(persist_directory=persist_db_path, embedding_function=embeddings)
     qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(), return_source_documents=True)
     result = qa({"query": query_message})
-    print(2)
     return result['result']
 
+async def claudeQA(docurl, query_message):
+    from pdfminer.high_level import extract_text
+    filename = get_doc_from_url(docurl)
+    docpath = os.getcwd() + "/" + filename
+    text = extract_text(docpath)
+    print(text)
+    prompt = f"""你需要回答的问题是：{query_message}"""
+    return text
+
 def pdf_search(docurl, query_message, model="gpt-3.5-turbo"):
     chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.bot_api_url.v1_url, model_name=model, openai_api_key=os.environ.get('API', None))
     embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=os.environ.get('API', None))
@@ -309,7 +317,7 @@ def gptsearch(result, llm):
     return response
 
 
-def get_google_search_results(prompt: str, context_max_tokens: int):
+def get_search_results(prompt: str, context_max_tokens: int):
     start_time = record_time.time()
 
     urls_set = []
@@ -417,6 +425,29 @@ def get_google_search_results(prompt: str, context_max_tokens: int):
     print("text len", text_len)
     return useful_source_text
 
+def search_web_and_summary(
+        prompt: str,
+        engine: str = "gpt-3.5-turbo",
+        context_max_tokens: int = 4096,
+    ):
+    chainStreamHandler = ChainStreamHandler()
+    if config.USE_G4F:
+        chatllm = EducationalLLM(callback_manager=CallbackManager([chainStreamHandler]))
+    else:
+        chatllm = ChatOpenAI(streaming=True, callback_manager=CallbackManager([chainStreamHandler]), temperature=config.temperature, openai_api_base=config.bot_api_url.v1_url, model_name=engine, openai_api_key=config.API)
+    useful_source_text = get_search_results(prompt, context_max_tokens)
+    summary_prompt = PromptTemplate(
+        input_variables=["web_summary", "question"],
+        template=(
+            # "You are a text analysis expert who can use a search engine. You need to response the following question: {question}. Search results: {web_summary}. Your task is to thoroughly digest all search results provided above and provide a detailed and in-depth response in Simplified Chinese to the question based on the search results. The response should meet the following requirements: 1. Be rigorous, clear, professional, scholarly, logical, and well-written. 2. If the search results do not mention relevant content, simply inform me that there is none. Do not fabricate, speculate, assume, or provide inaccurate response. 3. Use markdown syntax to format the response. Enclose any single or multi-line code examples or code usage examples in a pair of ``` symbols to achieve code formatting. 4. Detailed, precise and comprehensive response in Simplified Chinese and extensive use of the search results is required."
+            "You need to response the following question: {question}. Search results: {web_summary}. Your task is to think about the question step by step and then answer the above question in {language} based on the Search results provided. Please response in {language} and adopt a style that is logical, in-depth, and detailed. Note: In order to make the answer appear highly professional, you should be an expert in textual analysis, aiming to make the answer precise and comprehensive. Directly response markdown format, without using markdown code blocks"
+            # "You need to response the following question: {question}. Search results: {web_summary}. Your task is to thoroughly digest the search results provided above, dig deep into search results for thorough exploration and analysis and provide a response to the question based on the search results. The response should meet the following requirements: 1. You are a text analysis expert, extensive use of the search results is required and carefully consider all the Search results to make the response be in-depth, rigorous, clear, organized, professional, detailed, scholarly, logical, precise, accurate, comprehensive, well-written and speak in Simplified Chinese. 2. If the search results do not mention relevant content, simply inform me that there is none. Do not fabricate, speculate, assume, or provide inaccurate response. 3. Use markdown syntax to format the response. Enclose any single or multi-line code examples or code usage examples in a pair of ``` symbols to achieve code formatting."
+        ),
+    )
+    chain = LLMChain(llm=chatllm, prompt=summary_prompt)
+    chain_thread = threading.Thread(target=chain.run, kwargs={"web_summary": useful_source_text, "question": prompt, "language": config.LANGUAGE})
+    chain_thread.start()
+    yield from chainStreamHandler.generate_tokens()
 if __name__ == "__main__":
     os.system("clear")
 
@@ -426,6 +457,7 @@ def get_google_search_results(prompt: str, context_max_tokens: int):
     # # 搜索
 
     # # for i in search_summary("今天的微博热搜有哪些？"):
+    # # for i in search_summary("macos 13.6 有什么新功能"):
     # # for i in search_summary("用python写个网络爬虫给我"):
     # # for i in search_summary("消失的她主要讲了什么？"):
     # # for i in search_summary("奥巴马的全名是什么？"):