Skip to content

Commit

Permalink
fixed bug: search command is None
Browse files Browse the repository at this point in the history
  • Loading branch information
yym68686 committed Nov 26, 2023
1 parent 2663bf0 commit 09faed6
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 1 deletion.
10 changes: 10 additions & 0 deletions bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,16 @@ async def getChatGPT(update, context, title, robot, message, use_search=config.S

async def search(update, context, title, robot):
message = update.message.text if config.NICK is None else update.message.text[botNicKLength:].strip() if update.message.text[:botNicKLength].lower() == botNick else None
print("\033[32m", update.effective_user.username, update.effective_user.id, update.message.text, "\033[0m")
if (len(context.args) == 0):
message = (
f"格式错误哦~,示例:\n\n"
f"`/search 今天的微博热搜有哪些?`\n\n"
f"👆点击上方命令复制格式\n\n"
)
await context.bot.send_message(chat_id=update.effective_chat.id, text=escape(message), parse_mode='MarkdownV2', disable_web_page_preview=True)
return
message = ' '.join(context.args)
result = title
text = message
modifytime = 0
Expand Down
11 changes: 10 additions & 1 deletion test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,13 @@ def __init__(


a = openaiAPI()
print(a.v1_url)
print(a.v1_url)

def getddgsearchurl(result, numresults=3):
# print("ddg-search", result)
search = DuckDuckGoSearchResults(num_results=numresults)
webresult = search.run(result)
# print("ddgwebresult", webresult)
urls = re.findall(r"(https?://\S+)\]", webresult, re.MULTILINE)
# print("duckduckgo urls", urls)
return urls
50 changes: 50 additions & 0 deletions test/test_ddg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import re
import time
import requests
import os
from bs4 import BeautifulSoup
from langchain.tools import DuckDuckGoSearchResults
def getddgsearchurl(result, numresults=3):
search = DuckDuckGoSearchResults(num_results=numresults)
webresult = search.run(result)
urls = re.findall(r"(https?://\S+)\]", webresult, re.MULTILINE)
return urls

urls = getddgsearchurl("你知道今天有什么热点新闻吗")
print(urls)

def Web_crawler(url: str) -> str:
"""返回链接网址url正文内容,必须是合法的网址"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
result = ''
try:
requests.packages.urllib3.disable_warnings()
response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True)
if response.status_code == 404:
print("Page not found:", url)
return ""
content_length = int(response.headers.get('Content-Length', 0))
if content_length > 5000000:
print("Skipping large file:", url)
return result
soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
body = "".join(soup.find('body').get_text().split('\n'))
result = body
except Exception as e:
print('\033[31m')
print("error url", url)
print("error", e)
print('\033[0m')
return result

start_time = time.time()

for url in urls:
print(Web_crawler(url))
print('-----------------------------')
end_time = time.time()
run_time = end_time - start_time
# 打印运行时间
print(f"程序运行时间:{run_time}秒")

0 comments on commit 09faed6

Please sign in to comment.