In [None]:
! pip install faiss-cpu
! pip install sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from to

In [None]:
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss


df = pd.read_csv('test (1) (1).csv')
#df = df[df["looking_for_partner"] == True].reset_index(drop=True)


gender_map = {"male": 0, "female": 1}

df["gender_code"] = df["gender"].map(gender_map)

# Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Create embeddings
def get_text_vector(row):
    text = ", ".join(row["hobbies"] + row["interests"])
    return model.encode(text)

text_vectors = np.vstack(df.apply(get_text_vector, axis=1))

# Numeric features: age, angkatan, gender
cat_features = df[["age", "angkatan", "gender_code"]].astype(np.float32).to_numpy()

# Concatenate vectors
full_vectors = np.hstack([cat_features, text_vectors]).astype(np.float32)

# FAISS index
dim = full_vectors.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(full_vectors)

# Matching function
def shared_tags(row1, row2):
    hobbies1 = set(row1["hobbies"])
    hobbies2 = set(row2["hobbies"])
    interests1 = set(row1["interests"])
    interests2 = set(row2["interests"])
    return len(hobbies1 & hobbies2) + len(interests1 & interests2)

def find_matches(user_idx, top_k=10, max_dist=2.5, max_results=3):
    query = full_vectors[user_idx].reshape(1, -1)
    D, I = index.search(query, top_k + 1)
    matches = []

    for idx, dist in zip(I[0], D[0]):
        if idx == user_idx:
            continue
        if dist > max_dist:
            continue
        if df.loc[idx, "gender"] == df.loc[user_idx, "gender"]:
            continue
        if abs(df.loc[idx, "age"] - df.loc[user_idx, "age"]) > 2:
            continue
        if abs(df.loc[idx, "angkatan"] - df.loc[user_idx, "angkatan"]) > 1:
            continue
        shared = shared_tags(df.loc[user_idx], df.loc[idx])
        matches.append((df.loc[idx, "name"], dist, shared))

    return sorted(matches, key=lambda x: x[1])[:max_results]

# Run matching
for i in range(len(df)):
    user_name = df.loc[i, "name"]
    gender = df.loc[i, "gender"]
    print(f"\nMatches for {user_name} ({gender}):")
    results = find_matches(i)
    if not results:
        print("  No suitable matches found.")
    else:
        for match_name, dist, shared in results:
            print(f"  - {match_name} (distance={dist:.2f}, shared_tags={shared})")


Matches for finally to (male):
  No suitable matches found.

Matches for abdicate er (female):
  - ladder accustom (distance=1.05, shared_tags=24)
  - ack bob (distance=1.05, shared_tags=20)
  - hideous sombrero (distance=1.06, shared_tags=23)

Matches for gutter hourly (male):
  - downturn miter (distance=1.04, shared_tags=29)
  - abdicate er (distance=1.09, shared_tags=23)

Matches for brightly solid (female):
  No suitable matches found.

Matches for mint king (female):
  - nor though (distance=1.06, shared_tags=21)

Matches for stereotype till (male):
  No suitable matches found.

Matches for oh not (female):
  - sleepily meh (distance=1.07, shared_tags=14)
  - mean boo (distance=1.08, shared_tags=20)
  - nor though (distance=2.05, shared_tags=14)

Matches for grotesque yum (male):
  No suitable matches found.

Matches for reassuringly ha (female):
  No suitable matches found.

Matches for ack bob (male):
  - abdicate er (distance=1.05, shared_tags=20)

Matches for indeed courageo