# LangChain

In [None]:
# pip install openai
# pip install langchain

In [1]:
import openai
import os
import getpass

In [2]:
# Set up the OpenAI API key
os.environ["OPENAI_API_KEY"] = getpass.getpass()

 ········


In [42]:
from langchain_openai import ChatOpenAI

chat_model = ChatOpenAI(model="gpt-4o")
output = chat_model.invoke("What is LangChain?")
print(output.content)

LangChain is a versatile framework designed to facilitate the development of applications that leverage large language models (LLMs). It offers a comprehensive toolkit for various components such as prompt management, memory, and interaction with external data sources, making it particularly useful for complex language tasks. LangChain also supports agents capable of making decisions and taking actions, enhancing the functionality of LLMs in diverse applications like chatbots, content generation, and data analysis. The framework is modular and can be integrated with other technologies, providing a flexible and powerful solution for developers working with language models.


In [43]:
from langchain_core.prompts import ChatPromptTemplate

In [44]:
# Create a new prompt template
prompt = ChatPromptTemplate.from_template(
    """List 3 benefits of using this technology: {technology}"""
)

# Format the prompt with a specific technology
formatted_prompt = prompt.format(technology="blockchain")

# Get the response from the LLM
response = llm(formatted_prompt)

# Print out the response
print(response.content)


  warn_deprecated(


1. Transparency and security: Blockchain technology provides a secure and transparent way to record transactions, making it difficult for data to be altered or tampered with. This can help prevent fraud and increase trust among users.

2. Decentralization: Blockchain technology operates on a decentralized network, meaning that there is no central authority controlling the data. This can lead to increased efficiency, reduced costs, and greater accessibility for users.

3. Improved traceability and accountability: The immutable nature of blockchain technology allows for a clear and auditable record of transactions. This can be particularly useful in industries such as supply chain management, where tracking the origin and movement of goods is crucial.


In [45]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

In [46]:
# Initialize the ChatOpenAI model
model = ChatOpenAI(temperature=0)

# Define the desired data structure for the product review
class ProductReview(BaseModel):
    product_name: str = Field(description="The name of the product being reviewed")
    rating: int = Field(description="The rating given to the product, out of 5")
    review_text: str = Field(description="The text of the review")
    pros: str = Field(description="The positive aspects mentioned in the review")
    cons: str = Field(description="The negative aspects mentioned in the review")

# Set up the JSON output parser with the custom data structure
parser = JsonOutputParser(pydantic_object=ProductReview)

# Create a prompt template with instructions for the LLM
prompt = PromptTemplate(
    template="Provide a detailed product review based on the user's input.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Define the query for the LLM to generate a product review
review_query = "Review the latest smartphone model XYZ."

# Chain the prompt, model, and parser together
chain = prompt | model | parser

# Invoke the chain with the query to generate and parse the output
parsed_review = chain.invoke({"query": review_query})

# Print the parsed review in the structured JSON format
print(parsed_review)



{'product_name': 'XYZ Smartphone', 'rating': 4, 'review_text': 'I recently purchased the XYZ Smartphone and I am very impressed with its performance and features. The camera quality is excellent and the battery life is impressive. The design is sleek and modern, making it a stylish choice. However, I did notice that the fingerprint sensor can be a bit slow at times.', 'pros': 'Excellent camera quality, impressive battery life, sleek design', 'cons': 'Fingerprint sensor can be slow at times'}


In [7]:
# pip install tabulate

In [16]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from tabulate import tabulate

# Load the CSV file, using the 'Book Title' column as the document source
loader = CSVLoader(file_path='books_output.csv', source_column="Book Title")

data = loader.load()

# Prepare data for tabulate
table_data = []
headers = ["Book Title"]

for document in data:
    # Use the 'Book Title' as the content to display
    table_data.append([document.metadata['source']])

# Print the loaded documents as a table
print(tabulate(table_data, headers=headers, tablefmt="grid"))



+-----------------------+
| Book Title            |
| The Hobbit            |
+-----------------------+
| 1984                  |
+-----------------------+
| To Kill a Mockingbird |
+-----------------------+
| The Great Gatsby      |
+-----------------------+
| Moby-Dick             |
+-----------------------+


In [14]:
# pip install lxml

In [18]:
from langchain_text_splitters import HTMLHeaderTextSplitter

# Read the HTML content from a file
with open('sample_document.html', 'r', encoding='utf-8') as file:
    html_string = file.read()

# Define headers to split on
headers_to_split_on = [
    ("h1", "Header 1"),
    ("h2", "Header 2"),
    ("h3", "Header 3"),
]

# Initialize the HTMLHeaderTextSplitter with the headers
html_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

# Split the HTML content based on the defined headers
html_header_splits = html_splitter.split_text(html_string)

# Output the splits
print(html_header_splits)

[Document(page_content='Introduction to Web Development'), Document(metadata={'Header 1': 'Introduction to Web Development'}, page_content='Welcome to the basics of web development. This section covers fundamental concepts.  \nHTML Basics Tags and Elements Attributes'), Document(metadata={'Header 1': 'Introduction to Web Development', 'Header 2': 'HTML Basics'}, page_content='HTML stands for HyperText Markup Language and is the standard language for creating web pages.'), Document(metadata={'Header 1': 'Introduction to Web Development', 'Header 2': 'HTML Basics', 'Header 3': 'Tags and Elements'}, page_content='HTML documents are made up of elements defined by tags, such as <h1> for headers.'), Document(metadata={'Header 1': 'Introduction to Web Development', 'Header 2': 'HTML Basics', 'Header 3': 'Attributes'}, page_content='Attributes provide additional information about HTML elements, like class or id.'), Document(metadata={'Header 1': 'Introduction to Web Development'}, page_content

In [19]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain


In [20]:
prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content="You are a career coach chatbot, helping users with their career goals."
        ),  # The persistent system prompt
        MessagesPlaceholder(
            variable_name="chat_history"
        ),  # Where the memory will be stored
        HumanMessagePromptTemplate.from_template(
            "{human_input}"
        ),  # Where the human input will be injected
    ]
)



In [21]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

llm = ChatOpenAI()


In [22]:
chat_llm_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    verbose=True,
    memory=memory,
)


  warn_deprecated(


In [23]:
response = chat_llm_chain.predict(human_input="I'm thinking about switching careers to data science.")
print(response)

response = chat_llm_chain.predict(human_input="What skills do I need to develop for a data science role?")
print(response)

response = chat_llm_chain.predict(human_input="I have started learning Python. What should I focus on next?")

print(response)




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a career coach chatbot, helping users with their career goals.
Human: I'm thinking about switching careers to data science.[0m

[1m> Finished chain.[0m
That's great to hear! Data science is a rapidly growing field with a lot of opportunities. What is your current background and experience, and what specifically attracts you to data science?


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a career coach chatbot, helping users with their career goals.
Human: I'm thinking about switching careers to data science.
AI: That's great to hear! Data science is a rapidly growing field with a lot of opportunities. What is your current background and experience, and what specifically attracts you to data science?
Human: What skills do I need to develop for a data science role?[0m

[1m> Finished chain.[0m
To succeed in a data science role, you will n

In [3]:
from langchain import PromptTemplate
from langchain.tools import StructuredTool
from langchain_openai import ChatOpenAI
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain.tools import DuckDuckGoSearchResults
from pydantic import BaseModel

In [4]:

# Define the Task Status Retriever Tool
class TaskStatusRetriever:
    def __init__(self, task_database):
        self.task_database = task_database

    def get_status(self, task_id: str) -> str:
        return self.task_database.get(task_id, "Task not found")

task_database = {
    "task1": "In Progress",
    "task2": "Completed",
    "task3": "Not Started",
}

task_status_retriever = TaskStatusRetriever(task_database)

def task_status_lookup(task_id: str) -> str:
    return task_status_retriever.get_status(task_id)


In [5]:

# Define the input schema for TaskStatusLookup
class TaskStatusInput(BaseModel):
    task_id: str

task_status_tool = StructuredTool.from_function(
    name="TaskStatusLookup",
    description="Look up the status of a task by its ID.",
    func=task_status_lookup,
    args_schema=TaskStatusInput
)

In [6]:
# Define the Online Documentation Search Tool
duckduckgo_search = DuckDuckGoSearchResults()

def search_docs(query: str) -> str:
    return duckduckgo_search.run(query)

In [7]:


# Define the input schema for DocsSearch
class DocsSearchInput(BaseModel):
    query: str

docs_search_tool = StructuredTool.from_function(
    name="DocsSearch",
    description="Search for relevant documentation online.",
    func=search_docs,
    args_schema=DocsSearchInput
)

In [8]:
# Create the Tools List
tools = [task_status_tool, docs_search_tool]

In [9]:
# Define the LLM and Prompt
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

prompt_template = """
You are a project assistant. You can look up the status of tasks in a project management system and search for relevant documentation online. 
Respond based on the user's input using the appropriate tools.

User's input: {input}

{agent_scratchpad}
"""
prompt = PromptTemplate.from_template(prompt_template)

In [10]:
# Create the Agent
agent = create_tool_calling_agent(llm, tools, prompt)

# Create the Agent Executor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# Test the Agent with Example Inputs
response = agent_executor.invoke({"input": "What's the status of task1?"})
print(response['output'])

response = agent_executor.invoke({"input": "Find documentation on project management best practices"})
print(response['output'])





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `TaskStatusLookup` with `{'task_id': 'task1'}`


[0m[36;1m[1;3mIn Progress[0m[32;1m[1;3mThe status of task1 is "In Progress".[0m

[1m> Finished chain.[0m
The status of task1 is "In Progress".


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DocsSearch` with `{'query': 'project management best practices'}`


[0m[33;1m[1;3m[snippet: Learn about the top project management techniques, such as WBS, CPM, Waterfall, Scrum, Kanban, Gantt and PERT. Compare their advantages, disadvantages and applications for different types of projects., title: 7 Essential Project Management Techniques In 2024 - Forbes, link: https://www.forbes.com/advisor/business/project-management-techniques/], [snippet: Learn how to plan, organize, manage, and lead projects effectively with this guide. It covers topics such as business case, kickoff meeting, project blueprint, scope, quality, communication, feedback, 