# set api & llm model

In [None]:
import yaml
with open('.dev.yaml','r') as f:
    CONFIG = yaml.safe_load(f)
import os
os.environ["OPENAI_API_KEY"] = CONFIG['openai_api_key'][0]
os.environ["SERPAPI_API_KEY"] = CONFIG['serpapi_api_key']
os.environ["PINECONE_API_KEY"] = CONFIG['pinecone_api_key']  # find at app.pinecone.io
os.environ["PINECONE_ENVIROMENT"] = CONFIG['pinecone_environment']  # next to api key in console
#print(CONFIG)
#print(os.environ)

# prompt

In [None]:
from langchain.prompts import PromptTemplate
promptObj = PromptTemplate(input_variables=["name","what"],template="{name}{what}有多少？")
final_prompt=promptObj.format(name="香港",what="面积")
print(final_prompt)

# llm

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import OpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
import random
def get_llm(**kwargs):
    name = kwargs.get('name', 'openai')
    is_stream = kwargs.get('is_stream', True)
    openai_api_key = kwargs.get('openai_api_key', None)
    model_name = kwargs.get('model_name', 'gpt-3.5-turbo')
    temperature = kwargs.get('temperature', 0.7)
    if openai_api_key==None:
        #openai_api_key=os.environ["OPENAI_API_KEY"]
        openai_api_key = random.choice(CONFIG["openai_api_key"])
    llm = None
    if name=="openai":
        if is_stream:
            llm = OpenAI(streaming=True,
                         openai_api_key = openai_api_key,
                         callbacks=[StreamingStdOutCallbackHandler()],
                         model_name="gpt-3.5-turbo",temperature=0)
        else:
            llm = OpenAI(openai_api_key = openai_api_key,model_name="gpt-3.5-turbo",temperature=0)
    else:
        name="chat"
        if is_stream:
            llm = ChatOpenAI(streaming=True, 
                             openai_api_key = openai_api_key,
                             callbacks=[StreamingStdOutCallbackHandler()], 
                             temperature=0)
        else:
            llm = ChatOpenAI(openai_api_key = openai_api_key,temperature=0)
    return llm
stream_chat = get_llm(name="chat",is_stream=True)
chat = get_llm(name="chat")
stream_openai = get_llm(name="openai",is_stream=True)
openai=get_llm(name="openai",is_stream=False)
openai("hello")

In [None]:
#openai(prompt=prompt.format(name="美国"))
promptObj = PromptTemplate(input_variables=["name","what"],template="{name}{what}有多少？")
messages = [
    SystemMessage(content="用中文和英文两种语言回答"),
    HumanMessage(content=promptObj.format(name="上海",what="天气"))
]
result1 = stream_openai(promptObj.format(name="上海",what="天气"))
print("\n stream_openai \n",result1)
result2 = openai.generate([promptObj.format(name="上海",what="天气")])
print("\n openai \n",result2)
result3 = stream_chat(messages)
print("\n stream_chat \n",result3)
result4 = chat.generate([messages])
print("\n chat \n",result4)

# chain

In [None]:
from langchain.prompts import PromptTemplate
promptObj = PromptTemplate(input_variables=["name","what"],template="{name}{what}有多少？")
from langchain.chains import LLMChain
chain = LLMChain(llm=stream_openai , prompt=promptObj)
chain.run({"name":"香港","what":"人口"})

In [None]:
from langchain.chains import load_chain
chain.save('test_chain.json')
chain1 = load_chain('test_chain.json')
chain1.run({"name":"全世界","what":"专利数"})
#出错原因是load_chain目前不支持chatgpt，仅支持openai

# SequentialChain
只有一个input，一个output可以使用SimpleSequentialChain。更通用的情况使用SequentialChain

In [None]:
template = """根据给定title和name，写一首关于name的诗歌

Title: {title}
Name: {name}
Playwright: 以下是诗歌:"""
prompt_template = PromptTemplate(input_variables=["title", 'name'], template=template)
chainA = LLMChain(llm=stream_openai, prompt=prompt_template, output_key="poem")

# This is an LLMChain to write a review of a play given a synopsis.
template = """根据给定的poem，自己评价自己写的好不好
Poem:
{poem}
对这个诗歌的评价:"""
prompt_template = PromptTemplate(input_variables=["poem"], template=template)
chainB = LLMChain(llm=stream_openai, prompt=prompt_template, output_key="review")

chainC = LLMChain.from_string(llm=stream_openai, template="根据{poem},把{name}替换为{other}")

# This is the overall chain where we run these two chains in sequence.
from langchain.chains import SequentialChain
all_chain = SequentialChain(
    chains=[chainA, chainB,chainC],
    input_variables=["title", "name","other"],
    # Here we return multiple variables
    output_variables=["poem", "review"],
    verbose=True)

result = all_chain({"title":"我家的小狗", "name": "狗","other":"猫"})
print(result)

# RouteChain

# index

In [None]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("./nianbao/ylz2022.pdf")
documents = loader.load_and_split()

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
import faiss
import pickle
index_name='ylz_nianbao'
embeddings = OpenAIEmbeddings()
store = FAISS.from_documents(documents, embeddings)
faiss.write_index(store.index, index_name)
store.index = None
with open("ylz_nianbao.pkl","wb") as f:
    pickle.dump(store, f)
    

In [None]:
from langchain.llms import OpenAI
openai = OpenAI(model_name="gpt-3.5-turbo")
from langchain.chains.question_answering import load_qa_chain
index = faiss.read_index("ylz_nianbao")
with open("ylz_nianbao.pkl","rb") as f:
  docsearch = pickle.load(f)
  docsearch.index = index
query="公司全年亏损多少？什么原因？"
docs = docsearch.similarity_search(query, include_metadata=True)
print(len(docs))
chain = load_qa_chain(openai, chain_type="refine")
r = chain.run(input_documents=docs,question=f"以下用中文回答\n\n{query}")
print(r.encode('utf-8').decode('utf-8'))

In [None]:
from langchain.document_loaders import UnstructuredURLLoader
urls = [
    "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023",
    "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-9-2023"
]
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()

# agent

In [None]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
tools = load_tools(["serpapi","llm-math"],llm=openai)
#agent = initialize_agent(tools, openai, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
agent = initialize_agent(tools, chat, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
agent.run("易联众300096昨天收盘多少钱?")

# memory

In [None]:
from langchain import ConversationChain
conversation = ConversationChain(llm=stream_openai, verbose=True)
output = conversation.predict(input="Hi there!")
print(output)
output = conversation.predict(input="what do I say just now?")
print(output)


In [None]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(return_messages=True)
memory.chat_memory.add_user_message("你好")
memory.chat_memory.add_ai_message("你好，很高兴见到你")
print(memory.load_memory_variables({}))

dicts = messages_to_dict(memory.load_memory_variables({})["history"])
print(dicts)

new_memory = messages_from_dict(dicts)
conversation = ConversationChain(llm=stream_openai,memory=new_memory)
conversation.predict(input="介绍你自己")
# from langchain.schema import AIMessage,HumanMessage,SystemMessage,messages_to_dict,messages_from_dict
# messages=[SystemMessage(content="随机增加几个emoji字符"),HumanMessage(content="如何理解langchain")]
# chat(messages)



In [None]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chains import ConversationChain
memory = ConversationSummaryBufferMemory(llm=openai, max_token_limit=40)
conversation_with_summary = ConversationChain(
    llm=stream_openai, 
    # We set a very low max_token_limit for the purposes of testing.
    memory=memory,
    verbose=True
)
conversation_with_summary.predict(input="猫和狗都是动物吗")
conversation_with_summary.predict(input="鸭子也是吗")
conversation_with_summary.predict(input="我刚才说了几种动物")
print(memory.load_memory_variables({}))


# 摘要

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import langchain.text_splitter
from langchain.chains.summarize import load_summarize_chain
from langchain.callbacks import get_openai_callback
def summarize_docs(docs,doc_url):
        print(f'you have {len(docs)} documents in your {doc_url} data')
        print(f'there are {len(docs[0].page_content)} characters in your document')
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
        split_docs = text_splitter.split_documents(docs)
        print(f'you have {len(split_docs)} split documents')
        llm = ChatOpenAI()
        chain = load_summarize_chain(llm,chain_type="map_reduce",verbose=True)
        response=""
        with get_openai_callback() as cb:
            response = chain.run(input_documents=split_docs)
            print(cb)
        return response


In [None]:
from langchain.document_loaders import UnstructuredURLLoader
url = "https://marketplace.visualstudio.com/items?itemName=ms-azuretools.vscode-docker"
summarize_docs(UnstructuredURLLoader(urls=[url]).load(),url)

# DB

In [None]:
from langchain import OpenAI,SQLDatabase,SQLDatabaseChain
db = SQLDatabase.from_uri("sqlite:///address.db")
print(db.table_info)
db_chain = SQLDatabaseChain(llm=openai,database=db,verbose=True)
db_chain.run("平均年龄是多少")

# QA with pinecone

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader,DirectoryLoader

#loader = TextLoader('langchainDoc')
loader = DirectoryLoader('./', glob="**/*.html", show_progress=True)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()

import pinecone 
# initialize pinecone
pinecone.init(
    api_key=config['pinecone_api_key'],  # find at app.pinecone.io
    environment=config['pinecone_environment']  # next to api key in console
)
index_name = "langchain-demo"
docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)
# if you already have an index, you can load it like this
# docsearch = Pinecone.from_existing_index(index_name, embeddings)
query = "What did the president say about Ketanji Brown Jackson"
docs = docsearch.similarity_search(query)
print(docs)

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from tqdm.autonotebook import tqdm
embeddings = OpenAIEmbeddings()

import pinecone
pinecone.init(
    api_key=config['pinecone_api_key'],  # find at app.pinecone.io
    environment=config['pinecone_environment']  # next to api key in console
)
index_name = "langchain-demo"
docsearch = Pinecone.from_existing_index(index_name, embeddings)
promptTemplate = PromptTemplate(input_variables=["query"], template="请用中文回答以下问题:\n{query}")
query = "load_qa_chain使用的是什么技术"
docs = docsearch.similarity_search(query)
#print(docs)

from langchain.chains.question_answering import load_qa_chain
chain = load_qa_chain(openai, chain_type="refine")
chain.run(input_documents=docs, question=promptTemplate.format(query=query))

# QA with FAISS

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader,DirectoryLoader
import os
import faiss
import pickle

from langchain.document_loaders import PyPDFDirectoryLoader


def load_train(name, dir="./", type="html"):
  file = f"{name}.pkl"
  if not os.path.exists(file):
      #loader = TextLoader('langchainDoc')
      if type=='pdf':
            loader = PyPDFDirectoryLoader(dir)
      else:
            loader = DirectoryLoader(dir, glob=f"**/*.{type}", show_progress=True)
      documents = loader.load()
      print(f"find {len(documents)} documents")
      text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
      docs = text_splitter.split_documents(documents) 
      embeddings = OpenAIEmbeddings()
      store = FAISS.from_documents(docs, embeddings)
      faiss.write_index(store.index, name)
      store.index = None
      with open(file,"wb") as f:
            pickle.dump(store, f)
  
  index = faiss.read_index(name)
  with open(file,"rb") as f:
      docsearch = pickle.load(f)
      docsearch.index = index
  return docsearch

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
name="langchainDoc"
dir="./python.langchain.com/"
type="html"
docsearch = load_train(name,dir,type)
print(f"we have docsearch named:{name}")
query="who to use few shot？"
promptTemplate = PromptTemplate(input_variables=["query"],template="以下请用中文回答，尽量采用表格或列表回答\n{query}")
find_docs = docsearch.similarity_search(query, include_metadata=True)
chain = load_qa_chain(stream_openai, chain_type="refine")
r = chain.run(input_documents=find_docs,question=promptTemplate.format(query=query))
print(r.encode('utf-8').decode('utf-8'))

In [None]:
docsearch = load_train("ylz-nianbao","pdf")
#query="langchain有哪些模块，如何自定义tools，给出代码"
query="公司亏损多少，是什么原因？"
promptTemplate = PromptTemplate(input_variables=["query"],template="以下请用中文回答，尽量采用有条理的回答，尽量提供具体代码\n{query}")
docs = docsearch.similarity_search(query, include_metadata=True)
r = chain.run(input_documents=docs,question=promptTemplate.format(query=query))
print(r.encode('utf-8').decode('utf-8'))

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
name="zhuanli"
dir="./深度学习与卷积神经网络相关/"
type="pdf"
docsearch = load_train(name,dir,type)
print(docsearch)
query="对这些专利做一个分类？"
promptTemplate = PromptTemplate(input_variables=["query"],template="以下请用中文回答，尽量采用表格或列表回答\n{query}")
find_docs = docsearch.similarity_search(query, include_metadata=True)
chain = load_qa_chain(stream_openai, chain_type="refine")
r = chain.run(input_documents=find_docs,question=promptTemplate.format(query=query))
print(r.encode('utf-8').decode('utf-8'))

# Question Answering

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.indexes.vectorstore import VectorstoreIndexCreator

with open("../../state_of_the_union.txt") as f:
    state_of_the_union = f.read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(state_of_the_union)

embeddings = OpenAIEmbeddings()

docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()

# API Chain

In [None]:
from langchain.chains.api.prompt import API_RESPONSE_PROMPT
from langchain.chains import APIChain
from langchain.prompts.prompt import PromptTemplate
from langchain.llms import OpenAI
llm = OpenAI(temperature=0,model_name="gpt-3.5-turbo")

from langchain.chains.api import open_meteo_docs
chain_new = APIChain.from_llm_and_api_docs(chat, open_meteo_docs.OPEN_METEO_DOCS, verbose=True)
chain_new.run('How is the weather in Xiamen，chain today?')

# streaming llm

In [None]:
from langchain.llms import OpenAI
from langchain.schema import (
    HumanMessage,
)
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
chat = OpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], model_name="gpt-3.5-turbo", temperature=0.8)
resp = chat("写一首狗和猫的诗")

# LLMRequestsChain

In [None]:
from langchain.chains import LLMRequestsChain,LLMChain
from langchain.prompts import PromptTemplate

template = """Between >>> and <<< are the raw search result text.
Extract the answer to the question '{query}' or say "not found" if the information is not contained.
Use the format
Extracted:<answer or "not found">
>>> {requests_result} <<<
Extracted:"""

promptObj = PromptTemplate(
    input_variables=["query", "requests_result"],
    template=template,
)
chain = LLMRequestsChain(llm_chain = LLMChain(llm=stream_openai, prompt=promptObj))
#question = "What are the Three (3) biggest countries, and their respective sizes?"
question = "langchain是什么，给一个python代码"
#url = "https://www.baidu.com/s?wd="
url = "https://www.google.com/search?q="
inputs = {
    "query": question,
    "url":  url + question.replace(" ", "+")
}
chain(inputs)

# langchainHub （不建议，因为默认配置采用davici-0003，贵）

In [None]:
from langchain.chains import load_chain
chain = load_chain("lc://chains/llm-math/chain.json")
chain.run("5的立方根")

# output_parser
 'CommaSeparatedListOutputParser',
 'GuardrailsOutputParser',
 'ListOutputParser',
 'OutputFixingParser',
 'PydanticOutputParser',
 'RegexDictParser',
 'RegexParser',
 'ResponseSchema',
 'RetryOutputParser',
 'RetryWithErrorOutputParser',
 'StructuredOutputParser

In [None]:
from langchain.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()
template = """列出所有彩虹的颜色"""
prompt = PromptTemplate(template=template, input_variables=[], output_parser=output_parser)
llm_chain = LLMChain(prompt=prompt, llm=openai)

print(llm_chain.predict())
print(llm_chain.predict_and_parse())

# multi pdf

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(llm=get_llm(name="openai"), chain_type="refine", retriever=docsearch.as_retriever())
query = "以下请用中文回答\n\n这些专利中哪些专利是最接近的?"
qa.run(query)

# agent for pandas

In [None]:
from langchain.agents import create_pandas_dataframe_agent
import pandas as pd
#600029_order.csv
#600029_tick.csv
#600029_trade.csv
df = pd.read_csv('600029_order.csv')
agent = create_pandas_dataframe_agent(openai, df, verbose=True)
agent.run("增加一个列标记volume超过平均值")

In [None]:
df

In [None]:
print('hello')

# gradio

In [None]:
#!pip3 install gradio
import gradio as gr
import os

def upload_image(image_file):
    if not os.path.exists("uploads"):
        os.mkdir("uploads")
    image_file.save("uploads/" + image_file.name)
    return "上传成功！"

#关闭所有已经创建的网站，释放端口
gr.close_all()
#定义接口
iface = gr.Interface(upload_image, 
                     inputs="file", 
                     outputs="image",
                     title="上传图片网站", 
                     description="上传图片并保存到本地uploads文件夹中。")
#启动
iface.launch(share=True,server_port=8080)

In [31]:
gr.Audio??

In [19]:
import gradio as gr
import os
import numpy as np
from PIL import Image
def new_message(name,is_stream,tempeture):
    stream = "stream" if is_stream else "no stream"
    message = f"{name} with {stream} and {tempeture}"
    return message
def change_image(image_array):
    image = Image.fromarray(image_array)
    bw_image = image.convert('L')
    bw_array = np.array(bw_image)
    return bw_array
    #return np.fliplr(image_array)
#关闭所有已经创建的网站，释放端口
#gr.close_all()
#定义接口
with gr.Blocks() as block:
    with gr.Tab("计算"):
        name = gr.Textbox(label="姓名",placeholder="输入姓名")
        with gr.Row():
            isStream = gr.Checkbox(label="Is Stream?")
            tempeture = gr.Slider(label="温度",minimum=0,maximum=1,step=0.1)
        msg = gr.Textbox(label="输出的消息")
        with gr.Row() as row:
            gr.Button("计算").click(fn=new_message,inputs=[name,isStream,tempeture],outputs=[msg])
            gr.Button("关闭").click(fn=lambda : block.close())
    with gr.Tab("问答"):
        with gr.Row():
            gr.Markdown("# MarkDown \n ## Step1 提问 \n *划重点* \n ## Step2 回答 \n `hello world`")
            question=gr.TextArea(label="文字编辑",default="hello\nworld")
            m1=gr.Markdown()
            gr.Button("生成").click(fn=new_message,inputs=[question],outputs=[m1])
    with gr.Tab("媒体"):
        with gr.Accordion(label="地址"):
            gr.HTML("<p style='color:red'>包括图片、声音、视频</p>")
            with gr.Column():
                with gr.Row():
                    image=gr.Image()
                    bw_image = gr.Image()
                gr.Button("change").click(fn=change_image,inputs=[image],outputs=[bw_image]) 
            with gr.Column():
                vedio=gr.Video()
                gr.Button("change").click(fn=lambda:print("ok"),inputs=[],outputs=[]) 
            audio=gr.Audio()
                
            

#启动
block.launch(share=True,server_port=8080)

  super().__init__(


Running on local URL:  http://127.0.0.1:8080
Running on public URL: https://3762992fdac25f879e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces




In [None]:
import os

if not os.getenv("OPENAI_API_KEY"):
    raise ValueError("OPENAI_API_KEY must be set")

from langchain.agents import initialize_agent
from langchain.llms import OpenAI
from gradio_tools import (StableDiffusionTool, ImageCaptioningTool, StableDiffusionPromptGeneratorTool,
                          TextToVideoTool)

from langchain.memory import ConversationBufferMemory

llm = OpenAI(temperature=0)
memory = ConversationBufferMemory(memory_key="chat_history")
tools = [StableDiffusionTool().langchain, ImageCaptioningTool().langchain,
         StableDiffusionPromptGeneratorTool().langchain, TextToVideoTool().langchain]


agent = initialize_agent(tools, llm, memory=memory, agent="conversational-react-description", verbose=True)
output = agent.run(input=("Please create a photo of a dog riding a skateboard "
                          "but improve my prompt prior to using an image generator."
                          "Please caption the generated image and create a video for it using the improved prompt."))
