<a href="https://colab.research.google.com/github/whitepanic/1-nlp/blob/main/CrewAI_Youtube_%26_web_%EA%B8%B0%EB%B0%98_RAG_%EC%97%90%EC%9D%B4%EC%A0%84%ED%8A%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q -U crewai crewai_tools langchain_community youtube-transcript-api youtube-search faiss-cpu

In [None]:
import os
from langchain_openai import ChatOpenAI
from crewai_tools import YoutubeVideoSearchTool
from langchain_community.tools.tavily_search import TavilySearchResults
from crewai_tools  import tool
from crewai import Crew, Task, Agent
from google.colab import userdata

#### Langchain YouTubeSearchTool을 활용해 영상 검색

In [None]:
from langchain_community.tools import YouTubeSearchTool
youtube_search_tool = YouTubeSearchTool()
youtube_search_tool.run("테디노트")

#### LLM 설정

In [None]:
from google.colab import userdata
#API키를 좌측 탭 열쇠를 눌러서 저장해주세요
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

llm = ChatOpenAI(model="gpt-4o-mini")

#### Youtube 스크립트로 RAG 실행하는 Custom Tool 정의

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import YoutubeLoader
from langchain_core.documents import Document
import ast

youtube_search_tool = YouTubeSearchTool()


@tool
def youtube_retriever(query:str) -> str:
    """
    Retriever tool for the transcript of a YouTube video. query should be given in string format.
    """
    #query에 해당하는 Youtube 비디오 URL 가져오기
    urls = youtube_search_tool.run(query)
    urls = ast.literal_eval(urls)
    #URL 순회하면서 Document 객체에 내용 담기
    docs = []
    for url in urls:
        loader = YoutubeLoader.from_youtube_url(
        url,
        add_video_info=True,
        language=["en", "ko"]
        )
        scripts = loader.load()
        script_content = scripts[0].page_content
        title=scripts[0].metadata['title']
        author=scripts[0].metadata['author']
        doc = Document(page_content=script_content, metadata={"source": url, "title":title, "author":author})
        docs.append(doc)

    #모든 비디오의 내용을 벡터DB에 담기
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(docs)
    embeddings = OpenAIEmbeddings()
    db = FAISS.from_documents(texts, embeddings)
    retriever = db.as_retriever()
    retrieved_docs = retriever.invoke(query)

    video_results = []

    for doc in retrieved_docs:
        title = doc.metadata.get('title', 'No title available')
        author = doc.metadata.get('author', 'No author available')
        script_content = doc.page_content

        video_info = f"""
        Video Information:
        ------------------
        Title: {title}
        Author: {author}
        Transcript:
        {script_content}
        ------------------
        """
        video_results.append(video_info)

    # Join all video results into a single string
    all_video_results = "\n\n".join(video_results)

    return all_video_results

In [None]:
youtube_retriever.run("테디노트")

#### Web 검색 툴 정의

In [None]:
os.environ['TAVILY_API_KEY'] = userdata.get('TAVILY_API_KEY')
web_search_tool = TavilySearchResults(k=3)

#### 에이전트 설정

In [None]:
# Agent definitions
# query = "테디노트는 누구인가요?"
video_analyzer = Agent(
    role="Video Analyzer",
    goal=f"""
    Analyze youtube videos about user's query: {query} and Analyze YouTube video transcripts and identify main topics,
    key points, and areas needing further research. This is crucial for answering user's query.
    """,
    backstory="Expert in Youtube video analysis with a keen eye for identifying core themes and knowledge gaps",
    verbose=True,
    max_iter=2,
    llm=llm,
    tools=[youtube_retriever]
)

researcher = Agent(
    role="Web Researcher",
    goal=f"Conduct web searches with query to find additional information on topics identified from the video to answer the user's query: {query}",
    backstory="Skilled internet researcher with a talent for finding reliable and relevant information quickly",
    verbose=True,
    llm=llm,
    max_iter=2,
    tools=[web_search_tool]
)

rag_agent = Agent(
    role="RAG Agent",
    goal=f"""Answer user's query: {query} based on video content analysis and additional research
    If resources are not enough to answer the users' question, then you should command other agents for further research.""",
    backstory="You are a helpful RAG Agent who should refer to the analysis of video analyzer and researcher.",
    llm=llm,
    verbose=True
)

#### Task 정의

In [None]:
task1 = Task(
    description=f"""Analyze the video transcript,
    identify main topics, key points for each topic, and
    list questions that need further research to answer user's {query}.""",
    agent=video_analyzer,
    expected_output="A detailed analysis of the video content including main topics, key points, and questions for answering user's query."
)

task2 = Task(
    description=f"""Research the user query:{query} identified from the video analysis and provide findings with sources.
    search query for web search tool should be string format.""",
    agent=researcher,
    expected_output="Comprehensive research findings for each identified question, including relevant information and sources(url needed)."
)

task3 = Task(
    description=f"""Answer the user's query: {query} with factful resources from video and web search result.
    you should consider user's language to give great answer.
    """,
    agent=rag_agent,
    expected_output=f"""A well-structured, engaging and concise answer to user's query: {query} based on the video content and additional research,
    including a title, main content, and references(including URLs)."""
)

#### Crew 설정

In [None]:
# Create a crew with the tasks
crew = Crew(
    agents=[video_analyzer, researcher, rag_agent],
    tasks=[task1, task2, task3],
    verbose=True
)

In [None]:
result = crew.kickoff(inputs={"query":"테디노트는 누구인가요?"})

In [None]:
result.raw

#### 마크다운 결과물 확인

### 테디노트는 누구인가요?

테디노트(실명: 이경록)는 AI 개발 유튜버이자 교육자로, 데이터 분석, 머신러닝, 딥러닝, 대규모 언어 모델(LLM) 등 다양한 주제를 다룹니다. 그는 연구보다는 실제 개발에 중점을 두고 있으며, 특히 파이썬 프로그래밍과 관련된 콘텐츠에 집중하고 있습니다.

그의 YouTube 채널에서는 RAG(Retriever Augmented Generation)와 같은 최신 AI 기술을 설명하고, LangChain을 활용한 AI 개발의 중요성을 강조합니다. 테디노트는 다양한 사례를 통해 기업들이 이러한 AI 기술을 어떻게 활용할 수 있는지를 보여주고 있습니다.

또한, 그는 Udemy에서 여러 강의를 제공하고 있으며, 데이터 분석, 머신러닝, 딥러닝의 기초부터 실무 적용까지 포괄적인 내용을 다룹니다. 테디노트의 강의는 실습 중심의 학습을 지향하며, AI 개발에 관심이 있는 학생들과 개발자들에게 큰 도움이 되고 있습니다. 그의 경험과 지식은 많은 사람들에게 영감을 주고 있습니다.

더 자세한 정보는 다음의 링크에서 확인하실 수 있습니다:
- [Udemy 프로필](https://www.udemy.com/user/teddynote/)
- [YouTube 채널](https://teddynote.com/)
- [LinkedIn 프로필](https://kr.linkedin.com/in/teddy-lee)
- [GitHub Lectures](https://teddylee777.github.io/lectures/)