# Hybrid Search Demo
## Create Graph Schema

In [1]:
>>> import os
>>> os.environ["TG_HOST"] = "http://127.0.0.1"
>>> os.environ["TG_USERNAME"] = "tigergraph"
>>> os.environ["TG_PASSWORD"] = "tigergraph"

In [2]:
>>> graph_schema = {
...     "graph_name": "KGRec",
...     "nodes": {
...         "User": {
...             "primary_key": "id",
...             "attributes": {
...                 "id": "INT",
...             },
...         },
...         "Song": {
...             "primary_key": "id",
...             "attributes": {
...                 "id": "INT",
...                 "description": "STRING",
...             },
...             "vector_attributes": {"emb_1": 1536},
...         },
...     },
...     "edges": {
...         "downloaded": {
...             "is_directed_edge": False,
...             "from_node_type": "User",
...             "to_node_type": "Song",
...         },
...         "similar_to": {
...             "is_directed_edge": False,
...             "from_node_type": "Song",
...             "to_node_type": "Song",
...             "attributes": {
...                 "score": "DOUBLE",
...             },
...         },
...     },
... }

In [3]:
>>> from tigergraphx import Graph
>>> G = Graph(graph_schema)

## Load Data

In [7]:
>>> loading_job_config = {
...     "loading_job_name": "loading_job",
...     "files": [
...         {
...             "file_alias": "f_song",
...             "file_path": "/home/tigergraph/data/KGRec/song_embeddings.csv",
...             "csv_parsing_options": {
...                 "separator": ",",
...                 "header": True,
...             },
...             "node_mappings": [
...                 {
...                     "target_name": "Song",
...                     "attribute_column_mappings": {
...                         "id": "item_id",
...                         "description": "description",
...                         "emb_1": 'SPLIT($"embedding", " ")',
...                     },
...                 }
...             ],
...         },
...         {
...             "file_alias": "f_downloads",
...             "file_path": "/home/tigergraph/data/KGRec/implicit_lf_dataset.csv",
...             "csv_parsing_options": {
...                 "separator": "\t",
...                 "header": False,
...             },
...             "node_mappings": [
...                 {
...                     "target_name": "User",
...                     "attribute_column_mappings": {
...                         "id": 0,
...                     },
...                 },
...                 {
...                     "target_name": "Song",
...                     "attribute_column_mappings": {
...                         "id": 1,
...                     },
...                 }
...             ],
...             "edge_mappings": [
...                 {
...                     "target_name": "downloaded",
...                     "source_node_column": 0,
...                     "target_node_column": 1,
...                 }
...             ],
...         },
...         {
...             "file_alias": "f_similar_to",
...             "file_path": "/home/tigergraph/data/KGRec/similar_songs.csv",
...             "csv_parsing_options": {
...                 "separator": ",",
...                 "header": True,
...             },
...             "edge_mappings": [
...                 {
...                     "target_name": "similar_to",
...                     "source_node_column": "song_id_1",
...                     "target_node_column": "song_id_2",
...                     "attribute_column_mappings": {
...                         "score": "similarity_score",
...                     },
...                 }
...             ],
...         },
...     ],
... }

In [8]:
>>> G.load_data(loading_job_config)

2025-03-07 11:26:21,494 - tigergraphx.core.managers.data_manager - INFO - Initiating data load for job: loading_job...
2025-03-07 11:26:34,448 - tigergraphx.core.managers.data_manager - INFO - Data load completed successfully.


## Graph-based Similarity Search

In [82]:
>>> graph_search_results = G.run_query("graph_based_similarity_search", params={"input": 17418216, "k": 4})
>>> for result in graph_search_results:
...     for key, songs in result.items():
...         for song in songs:
...             print(song)

{'v_id': '4425', 'v_type': 'Song', 'attributes': {'id': 4425, 'description': "Thousand Foot Krutch vocalist Trevor McNevan -LRB- from NewReleaseTuesday -RRB- : `` This is another firecracker , more of an adrenaline rock song .\\nI could n't help but picture NASCAR drivers flying by on the track to this .\\nI love big , anthemic songs that are calls to action - so this one is case and point . ''", '@sum_score': 4.889628140900233, '@visited': False}}
{'v_id': '5148', 'v_type': 'Song', 'attributes': {'id': 5148, 'description': "TFK frontman/songwriter Trevor McNevan had the idea for this song for some time .\\nHe told NewReleaseTuesday : `` Although it 's in the same vein as some of our other high-octane songs , like ` Fire It Up , ' it 's quite different .\\nI wanted it to have that U2 Vertigo type vibe ; that big stadium energy with single notes on the main guitar riff , instead of chords . ''\\nThis was a challenge for McNevan to sing as its one of the highest songs vocally he 's writt

## Vector-based Similarity Search

In [83]:
>>> import numpy as np
>>> df = G.get_neighbors(start_nodes=17418216, start_node_type="User", edge_types="downloaded")
>>> song_ids = set(df['id'])
>>> songs = G.fetch_nodes(song_ids, vector_attribute_name="emb_1", node_type="Song")
>>> embeddings = np.array(list(songs.values()))
>>> user_embedding = np.mean(embeddings, axis=0)
>>> print(embeddings.shape)

(59, 1536)


In [84]:
print(user_embedding.shape)

(1536,)


In [85]:
>>> vector_search_results = G.search(
...     data=user_embedding.tolist(),
...     vector_attribute_name="emb_1",
...     node_type="Song",
...     limit=4,
...     return_attributes=["id", "description"]
... )
>>> for node in vector_search_results:
...     print(node)

{'id': 2424, 'distance': 0.08361697, 'description': "Lead singer Christian Lindskog : `` This was a possible title for the record for me .\\nIt was one of the first things I wrote and was very much tied to the intro piece .\\nDuring my time in Africa I dealt with a lot of conflict about my feelings towards what I was seeing .\\nThere is so much devastation it is hard not to become numb to it and just shut down .\\nWhen I came home I found the same thing was really true for everything we have to deal with from our TV sets .\\nI had n't seen any TV for 6 weeks and when I turned it back on I was so shocked by what I was seeing and I realized we become numb to the things that come out of our TV everyday .\\nThe lyric was actually pretty much improvised in the studio .\\nIt was one of the first vocals I did .\\nWith every record we make it seems like I never have the lyrics ready until the last minute , and sometime not even then !\\nSo I ended up working on things at the mic .\\nThis was d

## Hybrid Search

In [86]:
# Extract graph-based recommendations
graph_recs = []
for result in graph_search_results:
    if isinstance(result, dict):  # Ensure result is a dictionary
        for key, songs in result.items():
            if isinstance(songs, list):  # Ensure songs is a list
                for song in songs:
                    if isinstance(song, dict) and 'attributes' in song:
                        graph_recs.append({
                            "id": int(song.get('v_id', 0)),  # Default ID to 0 if missing
                            "graph_score": song['attributes'].get('@sum_score', 0),  # Default to 0 if missing
                            "description": song['attributes'].get('description', 'No description available')  # Default description
                        })

# Extract vector-based recommendations
vector_recs = [
    {
        "id": int(node.get("id", 0)),  # Default ID to 0 if missing
        "vector_distance": node.get("distance", 1.0),  # Default max distance to 1.0
        "description": node.get("description", "No description available")  # Default description
    }
    for node in vector_search_results
]

# Convert to DataFrame
df_graph = pd.DataFrame(graph_recs)
df_vector = pd.DataFrame(vector_recs)

# Convert `id` column to int before merging
df_graph['id'] = df_graph['id'].astype(int)
df_vector['id'] = df_vector['id'].astype(int)

# Normalize Graph Scores
if not df_graph.empty and 'graph_score' in df_graph:
    df_graph['graph_score_norm'] = (df_graph['graph_score'] - df_graph['graph_score'].min()) / \
                                   (df_graph['graph_score'].max() - df_graph['graph_score'].min())
else:
    df_graph['graph_score_norm'] = 0  # Default normalization if empty

# Normalize Vector Scores (inverse because lower is better)
if not df_vector.empty and 'vector_distance' in df_vector:
    df_vector['vector_score_norm'] = (df_vector['vector_distance'].max() - df_vector['vector_distance']) / \
                                     (df_vector['vector_distance'].max() - df_vector['vector_distance'].min())
else:
    df_vector['vector_score_norm'] = 0  # Default normalization if empty

# Merge both DataFrames
df_merged = pd.merge(df_graph, df_vector, on='id', how='outer')

# Fill missing scores and descriptions
df_merged['graph_score_norm'].fillna(0, inplace=True)
df_merged['vector_score_norm'].fillna(0, inplace=True)
df_merged['description_x'].fillna(df_merged['description_y'], inplace=True)
df_merged.rename(columns={"description_x": "description"}, inplace=True)
df_merged.drop(columns=["description_y"], inplace=True)

# Compute Hybrid Score with weight α = 0.5
alpha = 0.5
df_merged['hybrid_score'] = alpha * df_merged['graph_score_norm'] + (1 - alpha) * df_merged['vector_score_norm']

# Sort by Hybrid Score and select top 10
df_sorted = df_merged.sort_values(by='hybrid_score', ascending=False).head(4)

# Print results one by one
for _, row in df_sorted.iterrows():
    print(f"ID: {row['id']}")
    print(f"Hybrid Score: {row['hybrid_score']:.4f}")
    print(f"Description: {row['description']}\n" + "-" * 80)

ID: 4425
Hybrid Score: 0.5000
Description: Thousand Foot Krutch vocalist Trevor McNevan -LRB- from NewReleaseTuesday -RRB- : `` This is another firecracker , more of an adrenaline rock song .\nI could n't help but picture NASCAR drivers flying by on the track to this .\nI love big , anthemic songs that are calls to action - so this one is case and point . ''
--------------------------------------------------------------------------------
ID: 5996
Hybrid Score: 0.5000
Description: Frontman Justin Pierre told Alternative Press that the genesis of this song harks back to 2007 : `` The original idea for this song came while we were recording Even If It Kills Me .\nI had a few lines for verses and part of the chorus , but I was n't sure where it was going .\nThere was n't enough time to explore it back then , so we saved it for this record .\nI had this strange image in my head of two people sitting on the roof of a house at night in the fall , shivering slightly and silently together ; the

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_merged['graph_score_norm'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_merged['vector_score_norm'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are

## Drop Graph

In [7]:
>>> G.drop_graph()

2025-03-06 15:09:49,562 - tigergraphx.core.managers.schema_manager - INFO - Dropping graph: KGRec...
2025-03-06 15:09:53,129 - tigergraphx.core.managers.schema_manager - INFO - Graph dropped successfully.


---