## Expanding the content of your blog posts automatically with LangChain and Google Search

In [81]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import ChatPromptTemplate,HumanMessagePromptTemplate

##### text_all stores the full blog and text_to_change variable is what we would like to expand upon

In [82]:
title = "Importance of sleep in young kids"
text_all = """Sleep is a very important part of your child’s mental and physical health because it allows your child’s mind and body to rest and recover. Your child’s brain needs sleep to restore resources that were used up during the day. A well-rested brain can solve problems, learn new information and enjoy the day a lot more than a tired brain. The recommended amount of sleep is influenced by your child’s age."""
text_to_change = """The recommended amount of sleep is influenced by your child’s age."""

##### create a template for langchain

In [86]:
template = """You are an exceptional copywriter and content creator.

You're reading an article with the following title:
----------------
{title}
----------------

You've just read the following piece of text from that article.
----------------
{text_all}
----------------

Inside that text, there's the following TEXT TO CONSIDER that you want to enrich with new details.
----------------
{text_to_change}
----------------

Searching around the web, you've found this ADDITIONAL INFORMATION from distinct articles.
----------------
{doc_1}
----------------
{doc_2}
----------------
{doc_3}
----------------

Modify the previous TEXT TO CONSIDER by enriching it with information from the previous ADDITIONAL INFORMATION.
"""

##### create human message prompt from template and create a chat prompt from using human message prompt

In [87]:
human_message_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        template=template,
        input_variables=["text_to_change", "text_all", "title", "doc_1", "doc_2", "doc_3"],
    )
)
chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])

In [85]:
chatmodel = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=1)
chain = LLMChain(llm=chatmodel, prompt=chat_prompt_template)

##### Fetch response that will contain the 3 search queries

In [68]:
response = chain.run({
    "text_to_change": text_to_change,
    "text_all": text_all,
    "title": title
})
queries = [line[2:] for line in response.split("\n")]
print(queries)

['"Recommended amount of sleep for children by age"', '"Effects of sleep deprivation in young kids"', '"Tips for establishing a consistent sleep routine for children"']


##### To execute the queries langchain provides tools and googlesearchwrapper

In [69]:
from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper
from time import sleep 

# Remember to set the "GOOGLE_CSE_ID" and "GOOGLE_API_KEY" environment variable.
search = GoogleSearchAPIWrapper()
TOP_N_RESULTS = 5

def top_n_results(query):
    return search.results(query, TOP_N_RESULTS)

tool = Tool(
    name = "Google Search",
    description="Search Google for recent results.",
    func=top_n_results
)

all_results = []

for query in queries:
    print(f"querying..{query}")
    results = tool.run(query)
    print(results)
    all_results += results
    sleep(5)

print(all_results)

querying.."Recommended amount of sleep for children by age"
[{'title': 'Parent Corner: How Much Sleep Do Kids Need? - Door County Pulse', 'link': 'https://doorcountypulse.com/parent-corner-how-much-sleep-do-kids-need/', 'snippet': 'Apr 5, 2023 ... ... recommended amount of sleep for children by age: • Infants younger than 1 year: 12-16 hours • Children 1-2 years old: 11-14 hours • Children\xa0...'}]
querying.."Effects of sleep deprivation in young kids"
[{'title': 'How Much Sleep Does a Toddler Need? | Mom.com', 'link': 'https://mom.com/toddler/how-much-sleep-does-a-toddler-need', 'snippet': 'Jul 22, 2019 ... The AAP also cites hypertension, obesity and depression as just some of the long-term negative effects of sleep deprivation in young kids.'}]
querying.."Tips for establishing a consistent sleep routine for children"
[{'title': 'How to establish a sleep routine for children with ASD? – MM Foam', 'link': 'https://mmfoam.com/blogs/news/how-to-establish-a-sleep-routine-for-children-wi

### Find the Most Relevant Results before sending articles to langchain to reduce token use

##### First lets download the articles and store the content in memory

In [70]:
import newspaper

pages_content = []

for result in all_results:
	try:
		article = newspaper.Article(result["link"])
		article.download()
		article.parse()

		if len(article.text) > 0:
			pages_content.append({ "url": result["link"], "text": article.text })
	except:
		continue

print("Number of pages: ", len(pages_content))

Number of pages:  2


##### split the saved contents into smaller chunks

In [71]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=100)

docs = []
for d in pages_content:
    chunks = text_splitter.split_text(d["text"])
    for chunk in chunks:
        new_doc = Document(page_content=chunk, metadata={ "source": d["url"] })
        docs.append(new_doc)

print("Number of chunks: ", len(docs))

Number of chunks:  5


##### Create embeddings to convert the texts into vector space 

In [72]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

docs_embeddings = embeddings.embed_documents([doc.page_content for doc in docs])
query_embedding = embeddings.embed_query(text_to_change)

##### Cosine similarity metric between document embeddings and the desired text that needs to be expanded

In [77]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def get_top_k_indices(list_of_doc_vectors, query_vector, top_k):
    # convert the lists of vectors to numpy arrays
    list_of_doc_vectors = np.array(list_of_doc_vectors)
    query_vector = np.array(query_vector)

    # compute cosine similarities
    similarities = cosine_similarity(query_vector.reshape(1, -1), list_of_doc_vectors).flatten()

    # sort the vectors based on cosine similarity
    sorted_indices = np.argsort(similarities)[::-1]

    # retrieve the top K indices from the sorted list
    top_k_indices = sorted_indices[:top_k]

    return top_k_indices

top_k = 3
best_indexes = get_top_k_indices(docs_embeddings, query_embedding, top_k)
best_k_documents = [doc for i, doc in enumerate(docs) if i in best_indexes]

print(best_k_documents[0].page_content)

Art by Andrew Kleidon

by KAREN COREKIN-DeLaMER, Education and Community Relations Coordinator, Northern Door Children’s Center

Dear Karen,

I have three children. My oldest is a 14-year-old girl; the middle one is a 7-year-old boy; and my youngest is a 4-year-old boy. They are great kids, and I don’t have too many complaints.

Our biggest issue lately is sleep. We let our oldest stay up later because she has more homework to do. She usually goes to bed about 9:30 pm on school nights. We try to get the younger two in bed by 8 pm, although it normally ends up closer to 8:30 pm most nights. We don’t have any scheduled bedtime for the weekends, and the older two sleep pretty late on weekend mornings.

The problem is getting them up in the morning on school days. We start waking them up about 6 am. They are all hard to wake, especially our teenager. She has to be on the bus by a little before 7 am, and she always seems exhausted.

Here are my questions: How much sleep do children really n

##### Generate final response

In [89]:
chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.9)
chain = LLMChain(llm=chat, prompt=chat_prompt_template)

response = chain.run({
    "text_to_change": text_to_change,
    "text_all": text_all,
    "title": title,
    "doc_1": best_k_documents[0].page_content,
    "doc_2": best_k_documents[1].page_content,
    "doc_3": best_k_documents[2].page_content
})

print("Text to Change: ", text_to_change)
print("Expanded Variation:", response)

Text to Change:  The recommended amount of sleep is influenced by your child’s age.
Expanded Variation: The recommended amount of sleep is influenced by your child's age. According to Dr. Rachel Dawkins from Johns Hopkins All Children's Hospital, here are the guidelines for the recommended amount of sleep for children based on their age:

- Infants younger than 1 year: 12-16 hours
- Children 1-2 years old: 11-14 hours
- Children 3-5 years old: 10-13 hours
- Children 6-12 years old: 9-12 hours
- Teenagers 13-18 years old: 8-10 hours

It's important to note that each child is different, and some may need more or less sleep than the recommended guidelines. In addition to getting enough sleep, it's also beneficial to have a consistent sleep schedule for your children, even on the weekends. While it's okay to stay up an hour later and sleep in an hour later, maintaining a regular sleep routine helps promote quality sleep.

If your child is having trouble going to sleep or staying asleep, it