In [1]:
%cd ..

/home/zain/projects/matchmaker


In [11]:
import pandas as pd
from matchmaker.data import Interactions

import cudf
import cugraph
import cupy as cp
import itertools
from tqdm import tqdm 

In [3]:
print("Importing data...")
df = pd.read_csv("examples/data/swipes.csv")

Importing data...


In [4]:
# load pandas dataframe into a GPU dataframe
gdf = cudf.DataFrame.from_pandas(df)

In [None]:
# Only keep relevant columns for graph
edges = gdf[['decidermemberid', 'othermemberid', 'like']]

G = cugraph.Graph(directed=True)

# Pick 'like' as edge weight
G.from_cudf_edgelist(
    gdf,
    source="decidermemberid",
    destination="othermemberid",
    edge_attr="like",  # only one column allowed
    store_transposed=True)

In [16]:
# Compute PageRank on GPU
pr = cugraph.pagerank(G, alpha=0.85)

In [17]:
# Likes received per user
likes_received = edges.groupby('othermemberid')['like'].sum().reset_index()
likes_received = likes_received.rename(columns={'othermemberid':'vertex', 'like':'likes_received'})

# Likes given per user
likes_given = edges.groupby('decidermemberid')['like'].sum().reset_index()
likes_given = likes_given.rename(columns={'decidermemberid':'vertex', 'like':'likes_given'})

# Merge stats with PageRank
stats = pr.merge(likes_received, on='vertex', how='left') \
          .merge(likes_given, on='vertex', how='left')

# Fill NaNs with 0
stats = stats.fillna(0)

# Compute like ratio (add small epsilon to avoid div by 0)
stats['like_ratio'] = stats['likes_received'] / (stats['likes_given'] + 1e-6)


In [18]:
# Map user -> gender
decider_gender = gdf[['decidermemberid', 'decidergender']].rename(
    columns={'decidermemberid': 'vertex', 'decidergender': 'gender'}
)
other_gender = gdf[['othermemberid', 'othergender']].rename(
    columns={'othermemberid': 'vertex', 'othergender': 'gender'}
)

# Combine (take first non-null)
gender_map = cudf.concat([decider_gender, other_gender]).drop_duplicates(subset='vertex', keep='first')

# Merge gender with stats
stats = stats.merge(gender_map, on='vertex', how='left')

In [19]:
top_users = stats.sort_values('pagerank', ascending=False).head(10)
print(top_users)


        vertex  pagerank  likes_received  likes_given  like_ratio gender
23473   371545  0.002049              15        17850    0.000840      M
23477   100243  0.001795               6        19316    0.000311      M
23478   498532  0.001494              20        13327    0.001501      M
23479   679634  0.001454              49        12903    0.003798      M
23482   147887  0.001429              14        12159    0.001151      M
17497  3811977  0.001380             353          847    0.416765      F
23483   855632  0.001322               8        11482    0.000697      M
28053  3866975  0.001182              87         1177    0.073917      F
23484   315033  0.001141              73         9417    0.007752      M
22530  1624195  0.000869              15         7723    0.001942      M
