In [2]:
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)

import textwrap

OPEN_AI_API_KEY = ""

In [3]:
all_in_video_urls = [
    "https://www.youtube.com/watch?v=SoTOF-plFwc"
]

In [4]:
class YoutubeVideoGPT:
    """
    class for querying youtube video transcripts with gpt 3.5 turbo using langchain

    all user inputs are specified when instantiating the class with a list of video url's

    users can then use the .answer_query method to ask questions
    """
    def __init__(self, video_urls: list = []):

        self.video_urls = video_urls
        self.final_docs = []
        self.embeddings = OpenAIEmbeddings(openai_api_key=OPEN_AI_API_KEY)
        self.db = self.create_db_from_video_urls(self.video_urls)

        self.system_template = """
            You are a helpful assistant answering questions about youtube videos based on their transcripts: {docs}

            Only use factual information from the transcipt to answer the question. The answers you provide should be detailed but concise, \
            and understandable by someone who has not watched the Youtube video or read the transcript.

            If you feel like you don't have enought information to answer the question, \
            repond with 'I don't have sufficient information to answer this question at the moment based on the transcript of this video alone.'
        """

        self.human_template = "Answer the following question: {question}"

    def create_db_from_video_urls(self, video_urls: list = []):

        for video_url in video_urls:
            try:
                loader = YoutubeLoader.from_youtube_url(video_url)
                transcript = loader.load()
                text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
                docs = text_splitter.split_documents(transcript)
                for _ in docs:
                    self.final_docs.append(_)
            except:
                continue

        db = FAISS.from_documents(self.final_docs, self.embeddings)
        return db

    def print_response(self, response: str):
        print(textwrap.fill(response, width=75))

    def answer_query(self, query: str, k: int = 4):

        def document_similarity_search(query: str = "", k: int = 4):
            docs = self.db.similarity_search(query, k=k)
            docs_page_content = " ".join([d.page_content for d in docs])
            return docs_page_content

        chat = ChatOpenAI(
            openai_api_key=OPEN_AI_API_KEY,
            model_name="gpt-3.5-turbo",
            temperature=0.2
        )
        chat_prompt = ChatPromptTemplate.from_messages([
            SystemMessagePromptTemplate.from_template(self.system_template),
            HumanMessagePromptTemplate.from_template(self.human_template)
        ])

        docs_page_content = document_similarity_search(query = query, k = k)

        chain = LLMChain(llm=chat, prompt=chat_prompt)
        response = chain.run(question=query, docs=docs_page_content).replace("\n", " ")
        self.response = response
        self.print_response(response = self.response)


In [5]:
query_bot = YoutubeVideoGPT(video_urls=all_in_video_urls)

In [9]:
query_bot.answer_query(query = "What do they say about Tucker's interview with Putin?")

The speakers in the transcript discuss Tucker Carlson's interview with
Vladimir Putin. They mention that Tucker Carlson could have done better in
the interview, but overall it wasn't bad considering the circumstances.
They highlight that it's challenging for any journalist to extract valuable
information from Putin, especially when the interviewer, like Tucker, is
sympathetic towards Russia. The speakers note that Tucker asked some
softball questions and lost control of the interview early on. They also
mention that despite Putin extending an olive branch in the interview, the
Biden Administration still rejects negotiations with Russia. Overall, they
express disappointment in how American-Russian relations have been handled
over the years, turning a potential ally into an enemy.


In [10]:
query_bot.answer_query(query = "What were more specific details about the mishandling of American-Russian relations over the past 25 years that were stated?")

The mishandling of American-Russian relations over the past 25 years was
highlighted in the transcript through the rejection of negotiations with
Russia despite Putin's offer, the turning of a potential ally into an
George Kennan and Bill Burns. The decision to expand NATO, breaking
promises made to Russia, was emphasized as a key turning point that
exacerbated tensions and led to the deterioration of relations. These
actions were seen as detrimental to Russian democracy, inflaming
nationalist and anti-western sentiments, and reigniting Cold War dynamics.


In [11]:
query_bot.answer_query(query = "Describe the circumstances under which Putin asked Bill Clinton if Russia could join NATO.")

In 1999, during a meeting with Bill Clinton, Putin asked if Russia could
join NATO. Clinton initially agreed to discuss the details over dinner, but
later during the dinner, Clinton's team informed Putin that the answer was
no. This moment is seen as significant because it sheds light on the
challenges Russia faced during its transition to democracy and the emotions
that may have influenced Putin's actions since then. Putin's offer to join
NATO was ultimately rebuffed, leading to potential underlying tensions that
have shaped Russia's relationship with the West.
