In [None]:
pip install langchain==0.0.305

In [None]:
pip install openai==0.28.1

In [None]:
from typing import List

from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.callbacks import StdOutCallbackHandler
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field, validator

import os

os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"


model_name = "text-davinci-003"
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)



class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")


actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)
#print(parser.get_format_instructions())

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

#_input = prompt.format_prompt(query=actor_query)
#output = model(_input.to_string())

chain = LLMChain(llm=model, prompt=prompt, verbose=True)
output = chain.run(actor_query)

parser.parse(output)

In [None]:
from langchain.llms import OpenAI

completion_model_name = "gpt-3.5-turbo-instruct"
temperature = 0.0
completion_llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 100)

completion_llm("You're a whimsical tour guide to France. Paris is a ")


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)


chat_model_name = "gpt-3.5-turbo"
temperature = 0.0
chat_llm = ChatOpenAI(model_name=model_name, temperature=temperature, max_tokens = 100)

messages = [
    SystemMessage(content="You are a French tour guide"),
    HumanMessage(content="Describe Paris in a whimsical style")
]


chat_llm(messages)

In [None]:
from langchain.llms import OpenAI

model_name = "gpt-3.5-turbo"
temperature = 0.0
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 500)

output = llm("What is Paris?")
print(output)

In [None]:
from langchain.prompts import PromptTemplate
from langchain import LLMChain

temperature = 0.8
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 500)
prompt_template = PromptTemplate.from_template(
    """Act as Marvin, a robot from Douglas Adams' Hithiker Guide. 
       Tell me a {story_type} about the person described in context below.
       Context: {context}"""
)
llm_chain = LLMChain(
    llm=llm,
    prompt=prompt_template
)
print("====OUTPUT=====\n")
output = llm_chain({"story_type": "funny joke", "context": "I'm a software engineer learning to use large language models"})
print(output)

In [None]:
from langchain.prompts.few_shot import FewShotPromptTemplate

model_name = "gpt-3.5-turbo"
temperature = 0.0
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 500)

# https://browse.arxiv.org/pdf/2303.12712.pdf

instruction = """
Andy harvests all the tomatoes from 18 plants that have 7 tomatoes each. If he dries half the
tomatoes and turns a third of the remainder into marinara sauce, how many tomatoes are left?
"""
print("Original Answer: ")
print(llm(instruction))

question1 = """
Karen harvests all the pears from 20 trees that have 10 pears each. She  throws a third of them away as they are rotten,
and turns a quarter of the remaining ones into jam. How many are left?
"""
answer1 = """
    First, let's calculate how many pears Gloria harvests: it's 20 * 10 = 200. 
    Then, let's calculate how many are rotten: 200 * 1/3 = 66.
    Thus, we know how many are left after she throws a third of them away: 200 - 66 = 134.
    1/4 of the remaining ones are turned into jam, or 134 * 1/4 = 33. Therefore, Karen is left with 134 - 33, or 101 pears
"""
question2 = """
Sergei harvests all the strawberries from 50 plants that have 8 stawberries each. He freezes a quarter of them,
and turns half of the remaining ones into jam. How many are left?
"""
answer2 = """
    First, let's calculate how many strawberries Sergei harvests: it's 50 * 8 = 400. 
    Then, let's calculate how many are frozen: 400 * 1/4 = 100.
    Thus, we know how many are left after he freezes 100 of them: 400 - 100 = 300.
    half of the remaining ones are turned into jam, or 300 * 1/2 = 150. Therefore, Sergei is left with 300 - 150, or 150 pears
"""


example_prompt = PromptTemplate(input_variables=["question", "answer"], template="{question}\n{answer}")
examples = [ 
    {
        "question": question1,
        "answer": answer1,
    },
    {
        "question": question2,
        "answer": answer2,
    }
]

cot_prompt = FewShotPromptTemplate(
    examples=examples, 
    example_prompt=example_prompt, 
    suffix="Use these questions and answers to give correct response to the problem below: {input}", 
    input_variables=["input"]
)

cot_text = cot_prompt.format(input=instruction)
print("=== Chain of Thought Prompt ===")
print(cot_text)
print("=== Chain of Thought Answer ===")
print(llm(cot_text))

In [None]:
model_name = "gpt-3.5-turbo"
temperature = 0.0
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 4000)

data_gen_template="""
generate csv formatted reviews for two different imaginary TVs. come up with a name for each one. 
for each tv, generate {num_reviews} reviews, with random number of positive and negatives reviews. 
each review will have these fields in the csv: tv name, review title, review rating (1-10), review text
be creative in your reviews, amaze us, csv format is a must.  
"""
data_gen_prompt = PromptTemplate.from_template(data_gen_template)

print(llm(data_gen_prompt.format(num_reviews = 10)))

In [None]:
!pip install duckdb

In [None]:
from langchain.document_loaders import DuckDBLoader
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path='./tv-reviews.csv')
data = loader.load()

#loader = DuckDBLoader("SELECT * FROM read_csv_auto('tv-reviews.csv')",
#                        page_content_columns=["Review Title", "Review Text"],
#                        metadata_columns=["TV Name", "Review Rating"])
#data = loader.load()

print(data)



In [None]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator, NonNegativeInt
from typing import List
from random import sample 

model_name = 'gpt-3.5-turbo'
llm = OpenAI(model_name=model_name, temperature=0)

class ReviewSentiment(BaseModel):
    positives: List[NonNegativeInt] = Field(description="index of a positive TV review, starting from 0")
    negatives: List[NonNegativeInt] = Field(description="index of a negative TV review, starting from 0")
        
parser = PydanticOutputParser(pydantic_object=ReviewSentiment)
print(parser.get_format_instructions())

prompt = PromptTemplate(
    template="{question}\n{format_instructions}\nContext: {context}",
    input_variables=["question", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)
question = """
    Review TVs provided in the context. 
    Only use the reviews provided in this context, do not make up new reviews or use any existing information you know about these TVs. 
    If there are no positive or negative reviews, output an empty JSON array. 
"""

reviews_to_classify = sample(data, 3)
context = '\n'.join(review.page_content for review in reviews_to_classify)

query = prompt.format(context = context, question = question)
print(query)
output = llm(query)
print(output)
result = parser.parse(output)
print(result)
print("Positives:\n" + "\n".join([reviews_to_classify[i].page_content for i in result.positives]))
print("Negatives:\n" + "\n".join([reviews_to_classify[i].page_content for i in result.negatives]))

In [22]:
from langchain import LLMChain
from langchain.chains import RetrievalQA
from langchain.chains.question_answering import load_qa_chain

prompt = PromptTemplate(
    template="{question}\n{format_instructions}\nContext: {context}",
    input_variables=["question", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)
chain = load_qa_chain(llm, prompt = prompt, chain_type="stuff")
result = parser.parse(chain.run(input_documents=reviews_to_classify, question=question))

print(result)

NameError: name 'parser' is not defined

In [None]:
from langchain.document_loaders import DuckDBLoader
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path='./tv-reviews.csv')
docs = loader.load()

# print(docs)

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

model_name = 'gpt-3.5-turbo'
llm = OpenAI(model_name=model_name, temperature=0, max_tokens=2000)

splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()

search = Chroma.from_documents(split_docs, embeddings)

rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=search.as_retriever())

query = """
    Based on the reviews in the context, tell me what people liked about the picture quality.  
    """
print(rag.run(query))


In [None]:
pip install chromadb

In [None]:
pip install tiktoken

In [None]:
from langchain.indexes import VectorstoreIndexCreator

index = VectorstoreIndexCreator().from_loaders([loader])

query = "Based on the reviews in the context, tell me what people liked about the picture quality"
index.query_with_sources(query, verbose = True)



In [None]:
pip install google-search-results

In [None]:
SERPAPI_API_KEY='d3d3e8883b16fa14cbc2e78052716545a8178cbcc0234629ccf53d38005fb64e'
SERPER_API_KEY='949ee492b56ce5725afef32cb6ff12f9a2708898'

In [None]:

from langchain.utilities import SerpAPIWrapper
search = SerpAPIWrapper(serpapi_api_key=SERPAPI_API_KEY)

search.run("What movies are playing right now in Austin Tx")

In [None]:
from serpapi import GoogleSearch

params = {
  "q": "movies playing near 78735",
  "location": "78735",
  "hl": "en",
  "gl": "us",
  "api_key": SERPAPI_API_KEY
}

search = GoogleSearch(params)
results = search.get_dict()
print(results)

In [None]:
from langchain.utilities import GoogleSerperAPIWrapper
from langchain.llms.openai import OpenAI
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

import os

os.environ["SERPER_API_KEY"] = SERPER_API_KEY

llm = OpenAI(temperature=0)
search = GoogleSerperAPIWrapper()
tools = [
    Tool(
        name="Intermediate Answer",
        func=search.run,
        description="useful for when you need to ask with search"
    )
]

self_ask_with_search = initialize_agent(tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True)
self_ask_with_search.run("Which movies are playing now near 78735")

In [None]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI

llm = OpenAI(temperature=0)

tools = load_tools(["google-search"], llm=llm)

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

agent.run("What is the weather in Pomfret?")

In [None]:
pip install wikipedia

In [None]:
from langchain.tools import WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper

wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

print(wikipedia.run("Barbie film"))

In [None]:
pip install wikipedia-api

In [None]:
import requests


def get_movie_plot(movie_name):
    headers = {
        'User-Agent': 'MoviePlotFetcher/1.0'
    }
    
    base_url = f"https://en.wikipedia.org/w/api.php"
        
    def is_movie_page(title):
        params = {
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "categories|revisions",
            "rvprop": "content",
            "cllimit": "max"
        }
    
        response = requests.get(base_url, headers=headers, params=params)
        data = response.json()
    
        try:
            page = list(data["query"]["pages"].values())[0]
            
            # Check categories for Movie indication
            categories = [cat["title"] for cat in page.get("categories", [])]
            for category in categories:
                if "films" in category.lower():
                    return True
                    
            # Check for infobox movie in the page content
            content = page["revisions"][0]["*"]
            if "{{Infobox film" in content:
                return True
                
        except Exception as e:
            pass

        return False
    
    def extract_plot_from_text(full_text):
        try:
            # Find the start of the Plot section
            plot_start = full_text.index("== Plot ==") + len("== Plot ==")
            
            # Find the start of the next section
            next_section_start = full_text.find("==", plot_start)

            # If no next section is found, use the end of the text
            if next_section_start == -1:
                next_section_start = len(full_text)

            # Extract the plot text and strip leading/trailing whitespace
            plot_text = full_text[plot_start:next_section_start].strip()

            # Return the extracted plot
            return plot_text

        except ValueError:
            # Return a message if the Plot section isn't found
            return "Plot section not found in the text."
        
    def extract_first_paragraph(full_text):
        # Find the first double newline
        end_of_first_paragraph = full_text.find("\n\n")

        # If found, slice the string to get the first paragraph
        if end_of_first_paragraph != -1:
            return full_text[:end_of_first_paragraph].strip()

        # If not found, return the whole text as it might be just one paragraph
        return full_text.strip()

    
    search_params = {
        "action": "query",
        "format": "json",
        "list": "search",
        "srsearch": movie_name,
        "utf8": 1,
        "srlimit": 5  # Top 5 search results
    }

    response = requests.get(base_url, headers=headers, params=search_params)
    data = response.json()
    
    # Go through top search results to find a movie page
    for search_result in data["query"]["search"]:
        title = search_result["title"]
        if is_movie_page(title):
            # Fetch plot for the movie page
            plot_params = {
                "action": "query",
                "format": "json",
                "titles": title,
                "prop": "extracts",
                "explaintext": True,
            }
            
            plot_response = requests.get(base_url, headers=headers, params=plot_params)
            plot_data = plot_response.json()
            
            try:
                page = list(plot_data["query"]["pages"].values())[0]
                full_text = page.get("extract", "No text...")
                return f"""Overview:\n{extract_first_paragraph(full_text)}\nPlot:\n{extract_plot_from_text(full_text)}""".strip()
            except:
                return "Error fetching plot."

    return "Movie not found."


        
# Test the function
movie_name = "Nightmare before Christmas"
print(get_movie_plot(movie_name))


In [21]:
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory, CombinedMemory, ChatMessageHistory
from langchain.chains import ConversationChain
from typing import Any, Dict, Optional, Tuple


model_name = "gpt-3.5-turbo"
temperature = 0.0
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 1000)

movies = [ "Barbie", "Oppenheimer", "The Creator", "Dumb Money" ] 

personal_questions = [  "Which movie genre you like the most?", 
                        "What is your favorite color?", 
                        "What is your favorite movie?", 
                        "Pick one - dogs, cats or hamsters?",
                        "What is your favorite food?",
                        "What is your favorite drink?" ]

#personal_answers = [ ] 
#for question in personal_questions:
#    answer = input(question)
#    personal_answers.append(answer)

max_rating = 100
    
personal_answers = ['thriller', 'blue', 'inception', 'dogs', 'fish tacos', 'strawberry milkshake']
print(personal_answers)

history = ChatMessageHistory()
history.add_user_message(f"""You are AI that will recommend user a movie based on their answers to personal questions. Ask user {len(personal_questions)} questions""")
for i in range(len(personal_questions)):
    history.add_ai_message(personal_questions[i])
    history.add_user_message(personal_answers[i])
    
history.add_ai_message("""Now tell me a plot summary of a movie you're considering watching, and specify how you want me to respond to you with the movie rating""")
summary_memory = ConversationSummaryMemory(
    llm=llm,
    memory_key="recommendation_summary", 
    input_key="input",
    buffer=f"The human answered {len(personal_questions)} personal questions). Use them to rate, from 1 to {max_rating}, how much they like a movie they describe to you.",
    return_messages=True)

class MementoBufferMemory(ConversationBufferMemory):
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        pass
    
conversational_memory = MementoBufferMemory(
    chat_memory=history,
    memory_key="questions_and_answers", 
    input_key="input"
)

# Combined
memory = CombinedMemory(memories=[conversational_memory, summary_memory])
RECOMMENDER_TEMPLATE = """The following is a friendly conversation between a human and an AI Movie Recommender. 
                        The AI is follows human instructions and provides movie ratings for a human based on the movie plot. 

Summary of Recommendations:
{recommendation_summary}
Personal Questions and Answers:
{questions_and_answers}
Human: {input}
AI:"""
PROMPT = PromptTemplate(
    input_variables=["recommendation_summary", "input", "questions_and_answers"],
    template=RECOMMENDER_TEMPLATE
)
recommender = ConversationChain(llm=llm, verbose=True, memory=memory, prompt=PROMPT)
 
for movie in movies:
    print("Movie: " + movie)
    movie_plot = get_movie_plot(movie)
    print(f"Plot: {movie_plot}")
    
    plot_rating_instructions = f"""
        Reply Instruction That Must Be Strictly Followed:
        AI will reply with a highly personalized rating based only on the plot summary human provided and human answers to personal questions. 
        AI should be very sensible to human personal preferences captured in the answers to personal questions, and should not be influenced by anything else.
        Human will expect ratings for each movie to be different. 
        AI will reply with a rating from 1 to {max_rating}, with {max_rating} meaning human will love it, and 1 meaning human will hate it, 
        and AI include an explanation for the rating that must be based on human's previous answers.
        For example, let's say human answered that they like thriller movies, and you're providing a rating about a comedy movie. 
        If this is the only piece of information you have, you'd rate the movie low - {max_rating / 4} or so. 
        AI will carefully examine the movie plot to figure out the genre of the movie, and include this as part of the explanation.
        
        Movie Plot for {movie}:
        {movie_plot}
    """
    prediction = recommender.predict(input=plot_rating_instructions)
    print(prediction)

final_recommendation = """Now that AI has rated all the movies, AI will recommend human the one that human will like the most. 
                            AI will respond with movie recommendation, and short explanation for why human will like it over all other movies. 
                            AI will not include any ratings in your explanation, only the reasons why human will like it the most.
                            However, the movie you will pick must be one of the movies you rated the highest.
                            For example, if you rated one movie 65, and the other 60, you will recommend the movie with rating 65."""
prediction = recommender.predict(input=final_recommendation)
print(prediction)
    
    
    
    
    
    
   



    



['thriller', 'blue', 'inception', 'dogs', 'fish tacos', 'strawberry milkshake']
Movie: Barbie
Plot: Overview:
Barbie is a 2023 American fantasy comedy film directed by Greta Gerwig from a screenplay she wrote with Noah Baumbach. Based on the eponymous fashion dolls by Mattel, it is the first live-action Barbie film after numerous computer-animated films and specials. The film stars Margot Robbie as the title character and Ryan Gosling as Ken, and follows the pair on a journey of self-discovery following an existential crisis. The supporting cast includes America Ferrera, Kate McKinnon, Issa Rae, Rhea Perlman, and Will Ferrell.
A live-action Barbie film was announced in September 2009 by Universal Pictures with Laurence Mark producing. Development began in April 2014, when Sony Pictures acquired the film rights. Following multiple writer and director changes and the casting of Amy Schumer and later Anne Hathaway as Barbie, the rights were transferred to Warner Bros. Pictures in October 

KeyboardInterrupt: 

In [24]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain import LLMChain
from langchain.chains.question_answering import load_qa_chain

loader = CSVLoader(file_path='./tv-reviews.csv')
docs = loader.load()

# print(docs)
model_name = 'gpt-3.5-turbo'
llm = OpenAI(model_name=model_name, temperature=0, max_tokens=2000)

splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()

db = Chroma.from_documents(split_docs, embeddings)
query = """
    Based on the reviews in the context, tell me what people liked about the picture quality.
    Make sure you do not paraphrase the reviews, and only use the information provided in the reviews.
    """

use_chain_helper = False
if use_chain_helper:
    rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
    print(rag.run(query))
else:
    similar_docs = db.similarity_search(query, k=5)
    prompt = PromptTemplate(
        template="{query}\nContext: {context}",
        input_variables=["query", "context"],
    )
    chain = load_qa_chain(llm, prompt = prompt, chain_type="stuff")
    print(chain.run(input_documents=similar_docs, query = query))

People liked that the picture quality of the Imagix Pro and VisionMax Ultra was crystal clear, sharp, and lifelike. They also appreciated the vibrant and realistic colors, which made everything look stunning. The picture quality enhanced their movie-watching and viewing experiences, making it feel like they were watching movies in a theater.
