In [3]:
import os

In [4]:
from dotenv import load_dotenv

load_dotenv()

False

In [1]:
!pip install langchain_huggingface

Collecting langchain_huggingface
  Downloading langchain_huggingface-1.0.1-py3-none-any.whl.metadata (2.1 kB)
Collecting langchain-core<2.0.0,>=1.0.3 (from langchain_huggingface)
  Downloading langchain_core-1.0.3-py3-none-any.whl.metadata (3.5 kB)
Downloading langchain_huggingface-1.0.1-py3-none-any.whl (27 kB)
Downloading langchain_core-1.0.3-py3-none-any.whl (469 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m469.9/469.9 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-core, langchain_huggingface
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 0.3.79
    Uninstalling langchain-core-0.3.79:
      Successfully uninstalled langchain-core-0.3.79
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain 0.3.27 requires langchain-core<1.0.0,>=0.3.72, but you

StructuredOutputParser we can define schema which will be followed by the LLM.



In [6]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema


llm = HuggingFaceEndpoint(repo_id='google/gemma-2-2b-it', task='text-generation', huggingfacehub_api_token='your api key')

model = ChatHuggingFace(llm=llm)

schema = [
    ResponseSchema(name='fact_1', description='Fact 1 about the topic'),
    ResponseSchema(name='fact_2', description='Fact 2 about the topic'),
    ResponseSchema(name='fact_3', description='Fact 3 about the topic'),
]

parser = StructuredOutputParser.from_response_schemas(schema)

template = PromptTemplate(
    template='Give 3 fact about {topic} \n {format_instruction}',
    input_variables=['topic'],
    partial_variables={'format_instruction':parser.get_format_instructions()}
)


chain = template | model | parser

result = chain.invoke({'topic':'Bermuda Triangle'})
print(result)

{'fact_1': 'The Bermuda Triangle is a loosely defined area in the western part of the North Atlantic ocean, roughly bounded by the southeastern United States, Bermuda, and Puerto Rico.', 'fact_2': 'A common misconception is that ships and aircraft mysteriously disappear within the Triangle.  However, the Bureau of Safety and Environmental Enforcement states that the disappearances within this region are no greater than rates in other areas of the ocean.', 'fact_3': 'The Bermuda Triangle is often associated with ghost stories, UFO sightings, and unexplained events. These myths have captivated the public imagination for decades.'}


- Each ResponseSchema defines a field you want in the output In this case, we are asking for three facts: fact_1,fact_2,fact_3

- StructuredOutputParser.from_response_schemas(schema) tells LangChain how to parse the models response into the structured format (JSON with 3 keys: fact_1, fact_2, fact_3). parser.get_format_instructions() automatically generates a text prompt telling the LLM how to structure its response.

- template: The text that will be sent to the LLM.
{topic}: Will be replaced by the topic you provide.
{format_instruction}: Will be replaced by parser instructions (to ensure JSON format output).
So the final prompt looks like:
Give 3 fact about Bermuda Triangle
The output should be in JSON format with keys: fact_1, fact_2, fact_3.

- langChain Expression Language (LCEL) pipeline:
template - formats the input prompt.
model - sends the prompt to the Hugging Face LLM.
parser - parses the raw output into structured data (dictionary).

- chain.invoke
Replaces {topic} with "Bermuda Triangle".
Sends prompt - receives model response → parses it into JSON.





---



---



StrOutputParser takes output from LLm and return plain string

In [11]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate


llm = HuggingFaceEndpoint(
    repo_id="google/gemma-2-2b-it",
    task="text-generation",
    huggingfacehub_api_token='your api key'
)

model = ChatHuggingFace(llm=llm)

# 1st prompt -> detailed report
template1 = PromptTemplate(
    template='Write a detailed report on {topic}',
    input_variables=['topic']
)

# 2nd prompt -> summary
template2 = PromptTemplate(
    template='Write a 5 line summary on the following text. /n {text}',
    input_variables=['text']
)

prompt1 = template1.invoke({'topic':'United Kingdom'})

result = model.invoke(prompt1)

prompt2 = template2.invoke({'text':result.content})

result1 = model.invoke(prompt2)

print(result1.content)


## The United Kingdom: A Summary

1. **Powerhouse Nation:** The UK is a global power with a rich history, diverse cultures, and influential role in international affairs, spanning geographically across the British Isles.
2. **Past Glory and Innovation:** Its history boasts Roman and Anglo-Saxon influences, with robust periods in medieval and industrial history, emerging as a leader in education and technology.
3. **Modern Economy:** A thriving economy driven largely by the service sector, with London as a global financial hub, boasting strengths in fintech and technology.
4. **Diverse Demographics and Society:** Featuring diverse populations and multicultural influences, the UK prides itself on  a multi-faith, vibrant society.
5. **Challenges and Contentious Future:** Navigating post-Brexit impacts, facing economic uncertainties, and addressing social inequalities are challenges facing the UK moving forward. 







---



---



PydanticOutputParser converts LLM response into well structured and validated python objects based on defined schema.

In [15]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field



# Define the model
llm = HuggingFaceEndpoint(
    repo_id="google/gemma-2-2b-it",
    task="text-generation", huggingfacehub_api_token='your api key')


model = ChatHuggingFace(llm=llm)

class Person(BaseModel):

    name:  str = Field(description = 'Name of the person')
    age :  int = Field(gt=18, description = 'Age of the person')
    city:  str = Field(description = 'Name of the city the person belongs to')

parser = PydanticOutputParser(pydantic_object=Person)

template = PromptTemplate(
    template='Generate the name, age and city of a fictional {place} person \n {format_instruction}',
    input_variables=['place'],
    partial_variables={'format_instruction':parser.get_format_instructions()}
)

chain = template | model | parser

final_result = chain.invoke({'place':'Australia'})

print(final_result)


name='Thomas Sutherland' age=32 city='Perth'


- creating a structured data model called Person using Pydantic.
Each field has:
A type (str, int)
A description (used by LangChain to instruct the LLM)
Optional validation rules (like gt=18 ensures age > 18)
So this schema expects:
{
  "name": "Alice",
  "age": 25,
  "city": "Paris"
}

- PydanticOutputParser(pydantic_object=Person) tells LangChain Whatever the LLM outputs must fit this Person structure
PydanticOutputParser automatically generates formatting instructions that tell the model how to respond in the correct format.



---



---



JsonOutputParser converts LLM response into JSON Object.

In [16]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser


# Define the model
llm = HuggingFaceEndpoint(
    repo_id="google/gemma-2-2b-it",
    task="text-generation",
    huggingfacehub_api_token='your api key'
)

model = ChatHuggingFace(llm=llm)

parser = JsonOutputParser()

template = PromptTemplate(
    template='Give me 5 facts about {topic} \n {format_instruction}',
    input_variables=['topic'],
    partial_variables={'format_instruction': parser.get_format_instructions()}
)

chain = template | model | parser

result = chain.invoke({'topic':'Rohit Sharma'})

print(result)



{'facts': ['Rohit Sharma is an Indian cricketer and former captain of the Indian national team.', 'He is renowned for his exceptional batting skills, particularly known for his aggressive strokeplay and ability to hit boundaries.', 'He holds the record for the fastest century in ODI cricket, achieving it in just 35 balls.', 'He is a three-time IPL (Indian Premier League) champion, winning with Mumbai Indians.', 'Named the best batsman in the world for the ICC World XI in 2019.']}


In [18]:
paragraph = '\n'.join(result['facts'])
print(paragraph)


Rohit Sharma is an Indian cricketer and former captain of the Indian national team.
He is renowned for his exceptional batting skills, particularly known for his aggressive strokeplay and ability to hit boundaries.
He holds the record for the fastest century in ODI cricket, achieving it in just 35 balls.
He is a three-time IPL (Indian Premier League) champion, winning with Mumbai Indians.
Named the best batsman in the world for the ICC World XI in 2019.
