In [90]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders.web_base import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.llms.openai import OpenAI
from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
from langchain.chat_models.openai import ChatOpenAI

In [91]:
url = "https://365datascience.com/upcoming-courses"

In [92]:
loader = WebBaseLoader(url)
raw_documents = loader.load()
print(raw_documents)

[Document(page_content="\n\n\nOnline Data Science Courses and Certification – 365 Data Science\n\n\n\n  \n          Courses\n         \n          Learning Paths  Learning Paths\n             \n                Career Paths  Career Paths\n                   See all  Career Paths See all  \n                        Data analyst\n                      \n                        Data scientist\n                      \n                        Business analyst\n                      \n                        Senior data analyst\n                      \n                        Senior data scientist\n                      \n                        Tableau developer\n                      \n                        Power BI developer\n                      \n                        Data engineer\n                      \n                        Machine learning scientist\n                      \n                        AI engineer\n                      \n                        AI agent engineer\n 

In [93]:
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(raw_documents)
print(documents)

[Document(page_content='Online Data Science Courses and Certification – 365 Data Science', metadata={'source': 'https://365datascience.com/upcoming-courses', 'title': 'Online Data Science Courses and Certification – 365 Data Science', 'description': 'Browse various data science courses on in-demand tools like SQL, Python, and R. Discover the perfect structured training program for beginners. Start now!', 'language': 'en'}), Document(page_content='Courses\n         \n          Learning Paths  Learning Paths\n             \n                Career Paths  Career Paths\n                   See all  Career Paths See all  \n                        Data analyst\n                      \n                        Data scientist\n                      \n                        Business analyst\n                      \n                        Senior data analyst\n                      \n                        Senior data scientist\n                      \n                        Tableau developer\n 

In [94]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if not api_key:
    raise ValueError("OPENAI_API_KEY not found in .env file.")

api_key = api_key.strip()

In [95]:
embeddings = OpenAIEmbeddings(openai_api_key=api_key)

In [96]:
vectorstore = FAISS.from_documents(documents, embeddings)

In [97]:
memory = ConversationBufferMemory(memory_key = "chat_history", return_messages=True)
print(memory)

return_messages=True memory_key='chat_history'


In [98]:
qa = ConversationalRetrievalChain.from_llm(ChatOpenAI(openai_api_key=api_key, 
                                                  model="gpt-3.5-turbo", 
                                                  temperature=0), 
                                           vectorstore.as_retriever(), 
                                           memory=memory)

In [99]:
query = "What data science courses are available? Who are the instructors?"
result = qa({"question": query})

In [100]:
result["answer"]

'Some of the data science courses available are "Introduction to Data and Data Science" with Martin Ganchev and Iliya Valchanov, "Intro to AI" with Ned Krastev, "AI Strategy" with Bernard Marr, "Intro to AI Agents and Agentic AI" with Ned Krastev, "Data Strategy" with Harpreet Sahota, "Intro to Shell" with Thomas Nield, "Understanding Cloud Computing" with Andrew Jones, "Introduction to AWS Cloud" with Ernest Asena, "Intro to Microsoft Azure" with Andrew Jones, "Intro to Google Cloud" with Samantha Guerriero, "Intro to Data Engineering" with Shashank Kalanithi, "Introduction to Data Warehousing" with Rahul Singh, "SQL" with Martin Ganchev and Vladimir Saev, "Advanced SQL" with Martin Ganchev and Vladimir Saev, "SQL + Tableau" with Martin Ganchev, "Introduction to Databricks" with Dimitar Shutev, "Introduction to Jupyter" with Martin Ganchev, "Introduction to Python" with Martin Ganchev, "Python Programmer Bootcamp" with Giles McMullen-Klein, "Python 101: Kickoff" with Hristina Hristova