In [None]:
!docker run -p 6379:6379 -it - rm falkordb/falkordb

In [None]:
%pip install langchain langchain-openai falkordb langchain-experimental pandas gradio

In [None]:
import os
import sys
import logging
os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"
logging.basicConfig(stream=sys.stdout, level=logging.INFO)

In [None]:
from langchain_community.graphs import FalkorDBGraph
from langchain_community.document_loaders.csv_loader import CSVLoader
import os
import pandas as pd
graph = FalkorDBGraph(database="imdb")
filename = os.path.join(os.getcwd(), 'imdb_top_1000.csv')
data = pd.read_csv(filename)

In [None]:
import os
import pandas as pd

# Define a function to clean up each column
def clean_column(column_data):
    return column_data.apply(lambda x: x.replace("'", "\\'") if pd.notnull(x) else 'NA')


# Assume 'data' is already defined and loaded with pd.read_csv()

# Apply the cleanup function to all necessary columns
data['Series_Title'] = clean_column(data['Series_Title'])
data['Director'] = clean_column(data['Director'])
data['Certificate'] = clean_column(data['Certificate'])
data['Runtime'] = clean_column(data['Runtime'])
data['Genre'] = clean_column(data['Genre'])
data['IMDB_Rating'] = data['IMDB_Rating'].fillna('NA')
data['Released_Year'] = pd.to_numeric(data['Released_Year'], errors='coerce').fillna(-1)

# Clean the star columns
star_columns = ['Star1', 'Star2', 'Star3', 'Star4']
for column in star_columns:
    data[column] = clean_column(data[column])

# Now, iterate through the DataFrame and construct the graph
for index, row in data.iterrows():
    # Directly use the cleaned data
    movie_title = row['Series_Title']
    director_name = row['Director']
    certificate = row['Certificate']
    runtime = row['Runtime']
    genre = row['Genre']
    imdb_rating = row['IMDB_Rating']
    released_year = int(row['Released_Year']) if row['Released_Year'] != -1 else 'NA'

    # Skip the row if the year is invalid
    if released_year == 'NA':
        print(f"Skipping row {i} due to invalid year.")
        continue

    # Create or merge the Movie node
    movie_node = (
        f"MERGE (m:Movie {{"
        f"title: '{movie_title}', "
        f"year: {released_year}, "
        f"certificate: '{certificate}', "
        f"runtime: '{runtime}', "
        f"genre: '{genre}', "
        f"imdb_rating: {imdb_rating}"
        f"}})"
    )

    # Create or merge the Director node
    director_node = f"MERGE (d:Director {{name: '{director_name}'}})"
    directed_by_relation = f"MERGE (d)-[:DIRECTED]->(m)"

    # Create or merge the Star nodes and their relationships
    star_nodes = ""
    starred_in_relations = ""
    for s, star in enumerate(star_columns, start=1):
        star_name = row[star]
        if star_name != 'NA':
            star_nodes += f"MERGE (s{s}:Star {{name: '{star_name}'}})\n"
            starred_in_relations += f"MERGE (s{s})-[:STARRED_IN]->(m)\n"


    cypher_query = (
    f"{movie_node} {director_node} {directed_by_relation}"
    f" {star_nodes.strip()}\n{starred_in_relations.strip()}"
    )
    # Print the Cypher query
    print(cypher_query)
    graph.query(cypher_query)

# It might be better to refresh the schema once after all operations to optimize performance
graph.refresh_schema()
print("Added graph documents to FalkorDB")

In [None]:
import random
import pandas as pd

# Assume 'data' is already loaded with pd.read_csv()
movie_names = data['Series_Title'].tolist().replace("'", "\\'")

# Demographic data ranges and options
birth_years_range = (1950, 2005)
genders = ["Male", "Female", "Other"]
locations = ["USA", "UK", "Canada", "Australia", "France", "Germany", "India", "Japan"]

num_persons = 100
num_data_points = 1000

opinions = ["liked", "loved", "ignored", "disliked", "hated"]

#First creating Person Nodes

for i in range(1, num_persons + 1):
    person_name = f"Person {i}"
    birth_year = random.randint(*birth_years_range)
    gender = random.choice(genders)
    location = random.choice(locations)

    person_node = f"CREATE (:Person {{name: '{person_name}', birthYear: {birth_year}, gender: '{gender}', location: '{location}'}})"
    graph.query(person_node)

#Then Matching Person nodes with Movie nodes at random
for _ in range(num_data_points):
    person_index = random.randint(1, num_persons)
    person_name = f"Person {person_index}"

    opinion_edge = random.choice(opinions).upper()
    movie_name = random.choice(movie_names)

    opinion_query = f"MATCH (m:Movie {{title: '{movie_name}'}}), (p:Person {{name: '{person_name}'}})"
    opinion_query += f" MERGE (p)-[:{opinion_edge}]->(m)"


    graph.query(opinion_query)


graph.refresh_schema()
print("Added graph documents to FalkorDB")

In [None]:
from langchain_openai import ChatOpenAI
from langchain.chains import FalkorDBQAChain


chain = FalkorDBQAChain.from_llm(ChatOpenAI(temperature=0), graph=graph, verbose=True)
out1 = chain.run("Which movies did Christopher Nolan direct?")
print(out1)

In [None]:
out = chain.run("Who starred in Lifeboat?")
print(out)

In [None]:
out1 = chain.run("Which directors does Person 2 love?")
print(out1)

In [None]:
out1 = chain.run("Which movies did person born in year 1975 like?")
print(out1)
out2 = chain.run("Which movies should person born in year 1967 watch?")
print(out2)

In [None]:
out1 = chain.run("Which films should person born in year 1972 not watch?")
print(out1)

In [None]:
out1 = chain.run("Which person likes the director Christopher Nolan's films?")

In [None]:
out1 = chain.run("Where do people who like the director Martin Scorsese live?")

In [None]:
out1 = chain.run("What is the gender distribution of people who love films?")

In [None]:
out1 = chain.run("Who are the people that liked Action, Adventure films?")

In [None]:
import gradio as gr

# Assuming your FalkorDBQAChain setup code is correctly initialized here

def ask_question(question):
    # Your function to run the question through the chain and return the output
    output = chain.run(question)
    return output

# Corrected Gradio interface setup
iface = gr.Interface(fn=ask_question,
                     inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
                     outputs=gr.Textbox(lines=10, label="Output"),  # Increased the lines for the output
                     title="FalkorDB QA System",
                     description="Ask any question related to the movie database.")

# Launch the Gradio app
iface.launch(share=True)