In [None]:
import gradio as gr

def greet(name, intensity):
    return "Hello, " + name + "!" * int(intensity)

demo = gr.Interface(
    fn=greet,
    inputs=["text", "slider"],
    outputs=["text"],
)

demo.launch(share=True)


In [4]:
# 起名大师
# 导入相关包
import os
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
DASHSCOPE_API_KEY = os.getenv("DASHSCOPE_API_KEY")
from langchain_community.llms import Tongyi
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema import BaseOutputParser

# 自定义类
class CommaSeparatedListOutputParser(BaseOutputParser):
    def parse(self, text: str):
        # return text.strip().split(", ")
        return [item.strip() for item in text.strip().split(",")]

llm = Tongyi(
    temperature=0,
    openai_api_key=DASHSCOPE_API_KEY
)

template = '''
你是一个起名大师,请模仿示例起3个{county}名字,比如男孩经常被叫做{boy},女孩经常被叫做{girl},请返回以逗号分隔的列表形式。仅返回逗号分隔的列表，不要返回其他内容。
'''
prompt = PromptTemplate(
    template=template,
    input_variables=["county", "boy", "girl"]
)

# 设置解析器
parser = CommaSeparatedListOutputParser()

# 将 LLM 与 Prompt 和解析器连接起来
chain = LLMChain(
    llm=llm,
    prompt=prompt,
    output_parser=parser
)

# 用户输入的问题
county = "中国特色的"
boy = "狗蛋"
girl = "翠花"

# 格式化消息
message = prompt.format(county=county, boy=boy, girl=girl)

# 运行并打印结果
res = chain.invoke({"county": county, "boy": boy, "girl": girl})
print(res)  # 应该输出一个列表

# 尝试打印message
print(message)

# 直接调用llm的预测
strs = llm.predict(message)
parsed_output = parser.parse(strs)
print(parsed_output)


{'county': '中国特色的', 'boy': '狗蛋', 'girl': '翠花', 'text': ['铁柱', '小芳', '大明', '']}

你是一个起名大师,请模仿示例起3个中国特色的名字,比如男孩经常被叫做狗蛋,女孩经常被叫做翠花,请返回以逗号分隔的列表形式。仅返回逗号分隔的列表，不要返回其他内容。



  warn_deprecated(


['铁柱', '小芳', '大明', '']


In [7]:
# 导入必要的包
from langchain.document_loaders import (
    Docx2txtLoader,
    UnstructuredExcelLoader,
    PyPDFLoader,
    TextLoader
)
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatTongyi

import os
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv())
api_key = os.getenv("DASHSCOPE_API_KEY")

# 定义ChatDoc类
class ChatDoc():
    def __init__(self):
        self.doc = None
        self.splitText = []  # 分割后的文本
        self.template = [
            ("system", "你是一个处理文档的秘书,你从不说自己是一个大语言模型和AI助手,你会根据上下文内容来继续回答问题"),
            ("human", "你好!"),
            ("ai", "你好"),
            ("human", "{context}\n\n{question}")
        ]
        self.prompt = ChatPromptTemplate.from_messages(self.template)

    def getFile(self):
        doc = self.doc
        loaders = {
            "docx": Docx2txtLoader,
            "xlsx": UnstructuredExcelLoader,
            "pdf": PyPDFLoader,
            "txt": TextLoader
        }
        file_extension = doc.split(".")[-1]
        loader_class = loaders.get(file_extension)
        if loader_class:
            try:
                loader = loader_class(doc)
                text = loader.load()
                return text
            except Exception as e:
                print(f"Error loading document: {e}")
        else:
            print(f"Unsupported file extension: {file_extension}")
            return None

    # 处理文档的函数
    def splitSentences(self):
        full_text = self.getFile()
        if full_text is not None:
            # 对文档进行分割
            text_split = CharacterTextSplitter(
                chunk_size=150,
                chunk_overlap=20
            )
            texts = text_split.split_documents(full_text)
            self.splitText = texts

    # 向量化与向量化存储
    def embeddingAndVectorDB(self):
        embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-small-en")  # 确保模型维度正确
        db = Chroma.from_documents(
            documents=self.splitText,
            embedding=embeddings
        )
        return db
    
    def askAndFindFiles(self, question):
        db = self.embeddingAndVectorDB()
        retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold":0.1, "k":1})
        return retriever.get_relevant_documents(query=question)
    
    # 用自然语言和文档进行聊天
    def chatWithDoc(self, question):
        _context = ""
        context = self.askAndFindFiles(question)
        for i in context:
            _context += i.page_content

        messages = self.prompt.format_messages(context=_context, question=question)
        print("Formatted Messages:", messages)
        chat = ChatTongyi(
            model_name="qwen-vl-max",
            temperature=0,
            dashscope_api_key=api_key
        )
        # invoke 唤醒我们的函数
        try:
            response = chat.invoke(messages)
            print("API Response:", response)
            return response
        except KeyError as e:
            print(f"KeyError: {e}, Response: {response}")
            return None

# 创建ChatDoc实例
chat_doc = ChatDoc()
chat_doc.doc = "../myTeleBot/BE狂魔求生系统.txt"
chat_doc.splitSentences()

response = chat_doc.chatWithDoc("根据文档txt找到夏知许和许其琛十年后的故事总结?")
print(response)

Created a chunk of size 208, which is longer than the specified 150
Created a chunk of size 167, which is longer than the specified 150
Created a chunk of size 225, which is longer than the specified 150
Created a chunk of size 3502, which is longer than the specified 150
Created a chunk of size 5283, which is longer than the specified 150
Created a chunk of size 2517, which is longer than the specified 150
Created a chunk of size 3331, which is longer than the specified 150
Created a chunk of size 2376, which is longer than the specified 150
Created a chunk of size 2473, which is longer than the specified 150
Created a chunk of size 3443, which is longer than the specified 150
Created a chunk of size 3308, which is longer than the specified 150
Created a chunk of size 3381, which is longer than the specified 150
Created a chunk of size 3518, which is longer than the specified 150
Created a chunk of size 3617, which is longer than the specified 150
Created a chunk of size 3875, which i

Formatted Messages: [SystemMessage(content='你是一个处理文档的秘书,你从不说自己是一个大语言模型和AI助手,你会根据上下文内容来继续回答问题'), HumanMessage(content='你好!'), AIMessage(content='你好'), HumanMessage(content='第88章 你是我迟到十年的初恋（二）\n\u3000\u3000“在到底是怎么一回事……”许其琛坐在客厅沙发上, 小声地质问夏知许, “你怎么都不跟我说一下啊……”\n\u3000\u3000夏妈妈从厨房端来了一盘切好的橙子，“你们先吃点水果，吃了早饭没，我给你们去下点面？”\n\u3000\u3000许其琛怪不好意思的，夏妈妈一走近他就下意识站了起来。\n\u3000\u3000“你坐啊, 坐。”夏妈妈笑得一脸和蔼，将他又按回了沙发上。\n\u3000\u3000夏爸爸也从卧室出来，“你给他们下馄饨吧, 昨天不是包了新鲜的？”他说着，走到了另一个沙发上坐下, “你阿姨包的三鲜小馄饨很香的, 等会儿多吃点。”\n\u3000\u3000许其琛连忙笑着点头, 他也不知道现在究竟是什么情况。看着夏知许的父母这么亲切，他心里隐隐觉得抱歉。\n\u3000\u3000可能他们以为自己儿子是带了朋友回家玩吧。\n\u3000\u3000要是被他们知道事实，会不会大发雷霆？\n\u3000\u3000他的脑子里开始出现一大堆奇奇怪怪的画面，诸如夏知许痛哭流涕地跪在地上，夏妈妈晕倒在地, 夏爸爸大喊着“我打死你这个孽障”之类的话拳打脚踢……\n\u3000\u3000“你想什么呢？”夏知许撞了撞他的胳膊，“是不是还犯困呢？”\n\u3000\u3000许其琛连忙摇头, 一句话也不说。\n\u3000\u3000“吃橙子吗？”\n\u3000\u3000许其琛摇头。\n\u3000\u3000“喝茶吗？”\n\u3000\u3000许其琛还是摇头。\n\u3000\u3000夏知许乐了, 凑到他耳边小声说道, “许小朋友, 你就这么紧张啊。”\n\u3000\u3000许其琛瞪了他一眼, 避嫌似的往边上挪了挪，眼睛望了一眼夏爸爸，对方正按着遥控器，打开了早间新闻。\n\u3000\u3000“其琛啊。”夏爸爸忽然开口，这个称呼吓了许其琛一跳，他

In [9]:
from langchain_community.chat_models import ChatTongyi


chat = ChatTongyi(
    model_name="qwen-vl-max",
    temperature=0,
    dashscope_api_key=api_key
)

chunks = [];
for chunk in chat.stream("请介绍一下两棵by绿山这本小说讲的是什么"):
    chunks.append(chunk)
    print(chunk,end='',flush=True)
    

content=[{'text': '抱歉'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[{'text': '，'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[{'text': '我'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[{'text': '无法找到与"两'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[{'text': '棵by绿山"相关的信息。'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[{'text': '可能是输入有误或是我尚未了解'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[{'text': '的书籍。建议您再次确认一下'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[{'text': '书名。如果这是你想要找'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[{'text': '的书，请提供正确的标题，我会'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[{'text': '很乐意帮您介绍这本书的内容。'}] id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'content=[] response_metadata={'finish_reason': 'stop', 'request_id': '1bf02c4c-96c2-927b-9512-b80613ecdb82', 'token_usage': {'input_tokens': 32, 'output_tokens': 57}} id='run-cd867cfe-ff7e-4568-b640-5ca2cb51ee43'