In [1]:
from langchain_openai import ChatOpenAI

# OpenAI
llm = ChatOpenAI(api_key="OPEN-AI-API-KEY", model="gpt-3.5-turbo")

In [19]:
from langchain_community.document_loaders import CSVLoader
import sqlite3
import pandas as pd

# Connecting to SQLite Database
sqlite_db = r"nba.db"
conn = sqlite3.connect(sqlite_db)
cur = conn.cursor()
#Getting the data from the database and Transforming it into a CSV file for easier use by langchain
df = cur.execute("SELECT athlete_display_name,COUNT(game_id) AS number_of_games, team_name,SUM(minutes),SUM(offensive_rebounds),SUM(defensive_rebounds),SUM(rebounds),SUM(assists),SUM(steals),SUM(blocks),SUM(turnovers),SUM(fouls),SUM(points),athlete_jersey,athlete_short_name,athlete_position_abbreviation FROM nba_player_boxscore GROUP BY athlete_display_name, team_name").fetchall()
df = pd.DataFrame(df, columns=[x[0] for x in cur.description])
df = df.to_csv(r"nba_player_boxscore.csv", index=False)

#loading the data into langchain
loader = CSVLoader(file_path=r"nba_player_boxscore.csv")

data = loader.load()



In [20]:
from langchain.text_splitter import CharacterTextSplitter
# Split the data into chunks of 100 characters with 10 characters overlap
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=100,
    chunk_overlap=10,
    length_function=len
)

docs = text_splitter.split_documents(data)

In [21]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
#using OpenAI embeddings to embed the data into vectors
embedding = OpenAIEmbeddings(api_key="OPEN-AI-API-KEY")

persist_directory = 'docs/chroma/1/'

vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    persist_directory=persist_directory
)

In [40]:
from langchain.chains import RetrievalQA

#using the retrieval QA chain to answer questions
chat_model = ChatOpenAI(openai_api_key="OPEN-AI-API-KEY")

qa_chain = RetrievalQA.from_chain_type(
    chat_model,
    retriever=vectordb.as_retriever()
)
#making a query to the model
question = "How many points Lebron James scored?"
result = qa_chain({"query": question})
print(result["result"])

I don't have information about Lebron James' points scored in his seasons.
