In [13]:
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import faiss
import numpy as np

# Load pre-trained model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Sample profiles
profiles = [
    "I love hiking and outdoor adventures. Looking for someone who shares my passion for nature.",
    "Avid reader and coffee enthusiast. Looking for intellectual conversations and a great company.",
    "Tech enthusiast and gamer. Looking for someone who enjoys video games and technology discussions.",
    "Fitness lover and healthy lifestyle advocate. Seeking someone who values fitness and well-being.",
    "Art lover and museum goer. Searching for someone to explore art galleries and exhibitions with.",
    "Love to meditate & do yoga. I'm a minimalist. Trying to become a vegan. Looking for someone who is spiritual.",
]

# Convert profiles to embeddings
profile_embeddings = model.encode(profiles)

# Convert embeddings to numpy array
profile_embeddings_np = np.array(profile_embeddings).astype('float32')

# Build the FAISS index
index = faiss.IndexFlatL2(profile_embeddings_np.shape[1])  # L2 distance
index.add(profile_embeddings_np)

# TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(profiles)

# Query profile
query_profile = "I enjoy luxury travels and tasty food. Looking for an extroverted partner who enjoys a lavish life."
query_embedding = model.encode([query_profile]).astype('float32')

# Search the index for top 5 matches using embeddings
k = 5  # Number of top matches to retrieve
D, I = index.search(query_embedding, k)

# Get the indices of top matches from embeddings
top_embedding_matches = I[0]

# Compute TF-IDF similarity
query_tfidf = vectorizer.transform([query_profile])
cosine_similarities = cosine_similarity(query_tfidf, tfidf_matrix).flatten()

# Combine results from embeddings and TF-IDF
combined_scores = [(i, D[0][idx] + (1 - cosine_similarities[i])) for idx, i in enumerate(top_embedding_matches)]
combined_scores.sort(key=lambda x: x[1])  # Sort by combined score

# Get the indices of top combined matches
top_combined_matches = [idx for idx, score in combined_scores]

# Print the results
print("Top matches:")
for i in top_combined_matches:
    print(profiles[i])

Top matches:
Fitness lover and healthy lifestyle advocate. Seeking someone who values fitness and well-being.
Love to meditate & do yoga. I'm a minimalist. Trying to become a vegan. Looking for someone who is spiritual.
I love hiking and outdoor adventures. Looking for someone who shares my passion for nature.
Art lover and museum goer. Searching for someone to explore art galleries and exhibitions with.
Avid reader and coffee enthusiast. Looking for intellectual conversations and a great company.
