In [25]:
import torch
torch.cuda.is_available()

True

In [39]:
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import pandas as pd
import os
from dotenv import load_dotenv
import pymongo

load_dotenv()

True

In [41]:
llm = Llama(
    model_path="Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
    n_gpu_layers=20, # Uncomment to use GPU acceleration
    # seed=1337, # Uncomment to set a specific seed
    # n_ctx=2048, # Uncomment to increase the context window
)
model = SentenceTransformer("BAAI/bge-large-en-v1.5", device="cuda")

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from Meta-Llama-3-8B-Instruct-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Meta-Llama-3-8B-Instruct-imatrix
llama_model_loader: - kv   2:                          llama.block_count u32              = 32
llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.attention.head_count u32              = 32
llama_model_loader: - kv   7:              llama.at

In [42]:
# <https://huggingface.co/datasets/AIatMongoDB/embedded_movies>
dataset = load_dataset("AIatMongoDB/embedded_movies")

# Convert the dataset to a pandas dataframe
dataset_df = pd.DataFrame(dataset['train'])

dataset_df.head(5)

Unnamed: 0,plot,genres,runtime,cast,num_mflix_comments,poster,title,fullplot,languages,directors,writers,awards,imdb,countries,type,plot_embedding,rated,metacritic
0,Young Pauline is left a lot of money when her ...,[Action],199.0,"[Pearl White, Crane Wilbur, Paul Panzer, Edwar...",0,https://m.media-amazon.com/images/M/MV5BMzgxOD...,The Perils of Pauline,Young Pauline is left a lot of money when her ...,[English],"[Louis J. Gasnier, Donald MacKenzie]","[Charles W. Goddard (screenplay), Basil Dickey...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 4465, 'rating': 7.6, 'votes': 744}",[USA],movie,"[0.0007293965299999999, -0.026834568000000003,...",,
1,A penniless young man tries to save an heiress...,"[Comedy, Short, Action]",22.0,"[Harold Lloyd, Mildred Davis, 'Snub' Pollard, ...",0,https://m.media-amazon.com/images/M/MV5BNzE1OW...,From Hand to Mouth,As a penniless man worries about how he will m...,[English],"[Alfred J. Goulding, Hal Roach]",[H.M. Walker (titles)],"{'nominations': 1, 'text': '1 nomination.', 'w...","{'id': 10146, 'rating': 7.0, 'votes': 639}",[USA],movie,"[-0.022837115, -0.022941574000000003, 0.014937...",TV-G,
2,"Michael ""Beau"" Geste leaves England in disgrac...","[Action, Adventure, Drama]",101.0,"[Ronald Colman, Neil Hamilton, Ralph Forbes, A...",0,,Beau Geste,"Michael ""Beau"" Geste leaves England in disgrac...",[English],[Herbert Brenon],"[Herbert Brenon (adaptation), John Russell (ad...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 16634, 'rating': 6.9, 'votes': 222}",[USA],movie,"[0.00023330492999999998, -0.028511643000000003...",,
3,"Seeking revenge, an athletic young man joins t...","[Adventure, Action]",88.0,"[Billie Dove, Tempe Pigott, Donald Crisp, Sam ...",1,https://m.media-amazon.com/images/M/MV5BMzU0ND...,The Black Pirate,A nobleman vows to avenge the death of his fat...,,[Albert Parker],"[Douglas Fairbanks (story), Jack Cunningham (a...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 16654, 'rating': 7.2, 'votes': 1146}",[USA],movie,"[-0.005927917, -0.033394486, 0.0015323418, -0....",,
4,An irresponsible young millionaire changes his...,"[Action, Comedy, Romance]",58.0,"[Harold Lloyd, Jobyna Ralston, Noah Young, Jim...",0,https://m.media-amazon.com/images/M/MV5BMTcxMT...,For Heaven's Sake,"The Uptown Boy, J. Harold Manners (Lloyd) is a...",[English],[Sam Taylor],"[Ted Wilde (story), John Grey (story), Clyde B...","{'nominations': 1, 'text': '1 nomination.', 'w...","{'id': 16895, 'rating': 7.6, 'votes': 918}",[USA],movie,"[-0.0059373598, -0.026604708, -0.0070914757000...",PASSED,


In [43]:
# Remove data point where plot column is missing
dataset_df = dataset_df.dropna(subset=['plot'])
print("\\nNumber of missing values in each column after removal:")
print(dataset_df.isnull().sum())

# Remove the plot_embedding from each data point in the dataset as we are going to create new embeddings with the new OpenAI embedding Model "text-embedding-3-small"
dataset_df = dataset_df.drop(columns=['plot_embedding'])
dataset_df.head(5)

\nNumber of missing values in each column after removal:
plot                    0
genres                  0
runtime                14
cast                    1
num_mflix_comments      0
poster                 78
title                   0
fullplot               21
languages               1
directors              13
writers                13
awards                  0
imdb                    0
countries               0
type                    0
plot_embedding          1
rated                 284
metacritic            903
dtype: int64


Unnamed: 0,plot,genres,runtime,cast,num_mflix_comments,poster,title,fullplot,languages,directors,writers,awards,imdb,countries,type,rated,metacritic
0,Young Pauline is left a lot of money when her ...,[Action],199.0,"[Pearl White, Crane Wilbur, Paul Panzer, Edwar...",0,https://m.media-amazon.com/images/M/MV5BMzgxOD...,The Perils of Pauline,Young Pauline is left a lot of money when her ...,[English],"[Louis J. Gasnier, Donald MacKenzie]","[Charles W. Goddard (screenplay), Basil Dickey...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 4465, 'rating': 7.6, 'votes': 744}",[USA],movie,,
1,A penniless young man tries to save an heiress...,"[Comedy, Short, Action]",22.0,"[Harold Lloyd, Mildred Davis, 'Snub' Pollard, ...",0,https://m.media-amazon.com/images/M/MV5BNzE1OW...,From Hand to Mouth,As a penniless man worries about how he will m...,[English],"[Alfred J. Goulding, Hal Roach]",[H.M. Walker (titles)],"{'nominations': 1, 'text': '1 nomination.', 'w...","{'id': 10146, 'rating': 7.0, 'votes': 639}",[USA],movie,TV-G,
2,"Michael ""Beau"" Geste leaves England in disgrac...","[Action, Adventure, Drama]",101.0,"[Ronald Colman, Neil Hamilton, Ralph Forbes, A...",0,,Beau Geste,"Michael ""Beau"" Geste leaves England in disgrac...",[English],[Herbert Brenon],"[Herbert Brenon (adaptation), John Russell (ad...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 16634, 'rating': 6.9, 'votes': 222}",[USA],movie,,
3,"Seeking revenge, an athletic young man joins t...","[Adventure, Action]",88.0,"[Billie Dove, Tempe Pigott, Donald Crisp, Sam ...",1,https://m.media-amazon.com/images/M/MV5BMzU0ND...,The Black Pirate,A nobleman vows to avenge the death of his fat...,,[Albert Parker],"[Douglas Fairbanks (story), Jack Cunningham (a...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 16654, 'rating': 7.2, 'votes': 1146}",[USA],movie,,
4,An irresponsible young millionaire changes his...,"[Action, Comedy, Romance]",58.0,"[Harold Lloyd, Jobyna Ralston, Noah Young, Jim...",0,https://m.media-amazon.com/images/M/MV5BMTcxMT...,For Heaven's Sake,"The Uptown Boy, J. Harold Manners (Lloyd) is a...",[English],[Sam Taylor],"[Ted Wilde (story), John Grey (story), Clyde B...","{'nominations': 1, 'text': '1 nomination.', 'w...","{'id': 16895, 'rating': 7.6, 'votes': 918}",[USA],movie,PASSED,


In [45]:
def get_embedding(text):
    """Generate an embedding for the given text using OpenAI's API."""

    # Check for valid input
    if not text or not isinstance(text, str):
        return None

    try:
        # Call OpenAI API to get the embedding
        embedding = model.encode(text)
        return embedding.tolist()
    except Exception as e:
        print(f"Error in get_embedding: {e}")
        return None

In [46]:
# dataset_df["plot_embedding_optimised"] = dataset_df['plot'].apply(get_embedding)
dataset_df["plot_embedding_optimised"] = dataset_df['plot'].head(5).apply(get_embedding)

dataset_df.head()

Unnamed: 0,plot,genres,runtime,cast,num_mflix_comments,poster,title,fullplot,languages,directors,writers,awards,imdb,countries,type,rated,metacritic,plot_embedding_optimised
0,Young Pauline is left a lot of money when her ...,[Action],199.0,"[Pearl White, Crane Wilbur, Paul Panzer, Edwar...",0,https://m.media-amazon.com/images/M/MV5BMzgxOD...,The Perils of Pauline,Young Pauline is left a lot of money when her ...,[English],"[Louis J. Gasnier, Donald MacKenzie]","[Charles W. Goddard (screenplay), Basil Dickey...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 4465, 'rating': 7.6, 'votes': 744}",[USA],movie,,,"[-0.0027107351925224066, 0.005813805852085352,..."
1,A penniless young man tries to save an heiress...,"[Comedy, Short, Action]",22.0,"[Harold Lloyd, Mildred Davis, 'Snub' Pollard, ...",0,https://m.media-amazon.com/images/M/MV5BNzE1OW...,From Hand to Mouth,As a penniless man worries about how he will m...,[English],"[Alfred J. Goulding, Hal Roach]",[H.M. Walker (titles)],"{'nominations': 1, 'text': '1 nomination.', 'w...","{'id': 10146, 'rating': 7.0, 'votes': 639}",[USA],movie,TV-G,,"[0.006872688420116901, 0.024589575827121735, -..."
2,"Michael ""Beau"" Geste leaves England in disgrac...","[Action, Adventure, Drama]",101.0,"[Ronald Colman, Neil Hamilton, Ralph Forbes, A...",0,,Beau Geste,"Michael ""Beau"" Geste leaves England in disgrac...",[English],[Herbert Brenon],"[Herbert Brenon (adaptation), John Russell (ad...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 16634, 'rating': 6.9, 'votes': 222}",[USA],movie,,,"[0.04522330313920975, -0.021780306473374367, 0..."
3,"Seeking revenge, an athletic young man joins t...","[Adventure, Action]",88.0,"[Billie Dove, Tempe Pigott, Donald Crisp, Sam ...",1,https://m.media-amazon.com/images/M/MV5BMzU0ND...,The Black Pirate,A nobleman vows to avenge the death of his fat...,,[Albert Parker],"[Douglas Fairbanks (story), Jack Cunningham (a...","{'nominations': 0, 'text': '1 win.', 'wins': 1}","{'id': 16654, 'rating': 7.2, 'votes': 1146}",[USA],movie,,,"[0.01849355176091194, 0.023532547056674957, 0...."
4,An irresponsible young millionaire changes his...,"[Action, Comedy, Romance]",58.0,"[Harold Lloyd, Jobyna Ralston, Noah Young, Jim...",0,https://m.media-amazon.com/images/M/MV5BMTcxMT...,For Heaven's Sake,"The Uptown Boy, J. Harold Manners (Lloyd) is a...",[English],[Sam Taylor],"[Ted Wilde (story), John Grey (story), Clyde B...","{'nominations': 1, 'text': '1 nomination.', 'w...","{'id': 16895, 'rating': 7.6, 'votes': 918}",[USA],movie,PASSED,,"[-0.026248369365930557, -0.005364720709621906,..."


In [48]:
def get_mongo_client(mongo_uri):
    """Establish connection to the MongoDB."""
    try:
        client = pymongo.MongoClient(mongo_uri)
        print("Connection to MongoDB successful")
        return client
    except pymongo.errors.ConnectionFailure as e:
        print(f"Connection failed: {e}")
        return None

mongo_uri = os.getenv("MONGO_URI")
if not mongo_uri:
    print("MONGO_URI not set in environment variables")

mongo_client = get_mongo_client(mongo_uri)

# Ingest data into MongoDB
db = mongo_client['movies2']
collection = db['movie_collection']

documents = dataset_df.to_dict('records')
# collection.insert_many(documents)

print("Data ingestion into MongoDB completed")

Connection to MongoDB successful
Data ingestion into MongoDB completed


In [55]:
def vector_search(user_query, collection):
    """
    Perform a vector search in the MongoDB collection based on the user query.

    Args:
    user_query (str): The user's query string.
    collection (MongoCollection): The MongoDB collection to search.

    Returns:
    list: A list of matching documents.
    """

    # Generate embedding for the user query
    query_embedding = get_embedding(user_query)
    print(query_embedding)

    if query_embedding is None:
        return "Invalid query or embedding generation failed."

    # Define the vector search pipeline
    pipeline = [
        {
            "$vectorSearch": {
                "index": "vector_index",
                "queryVector": query_embedding,
                "path": "plot_embedding_optimised",
                "numCandidates": 150,  # Number of candidate matches to consider
                "limit": 5  # Return top 5 matches
            }
        },
        {
            "$project": {
                "_id": 0,  # Exclude the _id field
                # "plot_embedding_opitimzed": 0,  # Exclude the plot_embedding_opitimzed field
                "plot": 1,  # Include the plot field
                "title": 1,  # Include the title field
                "genres": 1, # Include the genres field
                "score": {
                    "$meta": "vectorSearchScore"  # Include the search score
                }
            }
        }
    ]

    # Execute the search
    results = collection.aggregate(pipeline)
    return list(results)

In [56]:
def handle_user_query(query, collection):

    get_knowledge = vector_search(query, collection)

    search_result = ''
    for result in get_knowledge:
        search_result += f"Title: {result.get('title', 'N/A')}, Plot: {result.get('plot', 'N/A')}\\n"

    completion = llm.create_chat_completion(
        messages=[
            {"role": "system", "content": "You are a movie recommendation system."},
            {"role": "user", "content": "Answer this user query: " + query + " with the following context: " + search_result}
        ]
    )

    print("Answer this user query: " + query + " with the following context: " + search_result)
    return (completion['choices'][0]['message']['content']), search_result

In [57]:
# Conduct query with retrieval of sources
query = "why can't I get married now? I'm still in grade 6 of elementary school."
response, source_information = handle_user_query(query, collection)

print(f"Response: {response}")
print(f"Source Information: \\n{source_information}")

[0.0028849677182734013, -0.013520942069590092, -0.05275743827223778, 0.03825893625617027, -0.0005258011515252292, -0.005293089430779219, 0.017407221719622612, 0.03405078127980232, 0.05018270015716553, 0.045042604207992554, -0.007639624178409576, -0.017039382830262184, 0.03192805126309395, -0.01955028995871544, -0.013311472721397877, -0.010042687878012657, -0.027405789121985435, -0.00982621219009161, -0.002962846076115966, 0.026916923001408577, -0.023979946970939636, 0.011946417391300201, -0.008009045384824276, -0.007865840569138527, 0.008289700374007225, 0.025750285014510155, 0.013283106498420238, 0.018295906484127045, 0.014368566684424877, 0.03164248913526535, -0.0023752793204039335, 0.0018194292206317186, -0.06668398529291153, -0.031227238476276398, -0.016868891194462776, -0.008029854856431484, -0.006907272152602673, 0.008609458804130554, 0.000860941712744534, -0.01365893054753542, -0.02718408964574337, -0.009528519585728645, -0.014821399934589863, 0.014563562348484993, -0.0651055946

Llama.generate: 245 prefix-match hit, remaining 1 prompt tokens to eval
llama_perf_context_print:        load time =   12147.10 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   254 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   39888.47 ms /   255 tokens


Answer this user query: why can't I get married now? I'm still in grade 6 of elementary school. with the following context: Title: The Perils of Pauline, Plot: Young Pauline is left a lot of money when her wealthy uncle dies. However, her uncle's secretary has been named as her guardian until she marries, at which time she will officially take ...\nTitle: From Hand to Mouth, Plot: A penniless young man tries to save an heiress from kidnappers and help her secure her inheritance.\nTitle: For Heaven's Sake, Plot: An irresponsible young millionaire changes his tune when he falls for the daughter of a downtown minister.\nTitle: Beau Geste, Plot: Michael "Beau" Geste leaves England in disgrace and joins the infamous French Foreign Legion. He is reunited with his two brothers in North Africa, where they face greater danger from their...\nTitle: The Black Pirate, Plot: Seeking revenge, an athletic young man joins the pirate band responsible for his father's death.\n
Response: I see you're fee