In [305]:
import pandas as pd
df = pd.read_csv('../data/clustered_df.csv')

pd.set_option('display.max_columns', None) 
pd.set_option('display.width', 1000)  
df.head(3)

Unnamed: 0,female,gender_not_specified,male,not_cis,asexual,bi/pan/demi_sexual/queer,heterosexual,homosexual,income,romantic_relationship_intent,education_level_grouped,location_density,air_zodiac_sign,earth_zodiac_sign,fire_zodiac_sign,water_zodiac_sign,age,bio_length,likes_received,emoji_usage_rate,message_sent_count,interest_lifestyle,interest_health,interest_creativity,interest_education_culture,interest_entertainment,interest_social,cluster
0,0,1,0,0,0,0,0,1,2,0,1,2,0,1,0,0,56,44,173,0.36,75,0.333333,0.333333,0.0,0.333333,0.0,0.0,7
1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,40,301,107,0.42,35,0.333333,0.0,0.333333,0.333333,0.0,0.0,6
2,0,0,0,1,0,1,0,0,0,2,2,1,0,0,1,0,30,309,91,0.41,33,0.0,0.0,0.333333,0.333333,0.333333,0.0,6


In [306]:
#had help with from chatgpt to fix problems with non-binary, asexual and all
def sexuality_analysis(user1, user2):
    def get_preferences(user):
        if user.get('female', 0) == 1:
            gender = 'female'
        elif user.get('male', 0) == 1:
            gender = 'male'
        else:
            gender = None  

        if user.get('asexual', 0) == 1:
            pref = ['male', 'female'] #friends
        elif user.get('heterosexual', 0) == 1:
            pref = ['male'] if gender == 'female' else ['female']
        elif user.get('homosexual', 0) == 1:
            pref = [gender] if gender else ['male', 'female']
        elif user.get('bi/pan/demi_sexual/queer', 0) == 1:
            pref = ['male', 'female']
        else:
            pref = ['male', 'female']  # friends

        return pref, gender

    prefs1, gender1 = get_preferences(user1)
    prefs2, gender2 = get_preferences(user2)

    if gender1 is None or gender2 is None:
        return True

    return gender2 in prefs1 and gender1 in prefs2


In [None]:
import math


def calc_age_score(user1, user2):
    dif = abs(user1['age'] - user2['age'])
    # trying to make my final score more distinct from each potential match
    return math.exp(-dif / 6) 

In [308]:
def calc_intent_score(user1, user2):
    dif = abs(user1['romantic_relationship_intent'] - user2['romantic_relationship_intent'])
    if dif == 0:
        return 1.0
    elif dif == 1:
        return 0.5
    else:
        return 0.0    

In [309]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def calc_interests_score(user1, user2):
    interests = [
        'interest_lifestyle', 'interest_health', 'interest_creativity',
        'interest_education_culture', 'interest_entertainment', 'interest_social'
    ]
    i1 = np.array([user1[interests].values])
    i2 = np.array([user2[interests].values])
    return cosine_similarity(i1, i2)[0][0] 


In [310]:
sign_compatibilities = {
    'fire_zodiac_sign': ['fire_zodiac_sign', 'air_zodiac_sign'],
    'air_zodiac_sign': ['air_zodiac_sign', 'fire_zodiac_sign'],
    'earth_zodiac_sign': ['earth_zodiac_sign', 'water_zodiac_sign'],
    'water_zodiac_sign': ['water_zodiac_sign', 'earth_zodiac_sign']
}

In [311]:
def calc_astro_score(user1, user2):
    elements = ['fire_zodiac_sign', 'air_zodiac_sign', 'earth_zodiac_sign', 'water_zodiac_sign']
    
    for elem in elements:
        if user1[elem] == 1:
            compatible = sign_compatibilities[elem]
            return 1.0 if any(user2[c] == 1 for c in compatible) else 0.0
    return 0.0 

In [312]:
def recommendation(user1, user2, w_interest=0.20, w_age=0.4, w_intent=0.34, w_sign=0.006):
    interests_score = calc_interests_score(user1, user2)
    age_score = calc_age_score(user1, user2)
    intent_score = calc_intent_score(user1, user2)
    sign_score = calc_astro_score(user1, user2)
    
    return (
        w_interest * interests_score +
        w_age * age_score +
        w_intent * intent_score +
        w_sign * sign_score
    )

In [313]:
def potential_matches(user, df, top_n=5):
    same_cluster = df[df['cluster'] == user['cluster']].copy()

    compatible = same_cluster[
        same_cluster.apply(lambda x: sexuality_analysis(user, x), axis=1)
    ]
   
    compatible['score'] = compatible.apply(lambda x: recommendation(user, x), axis=1)

    return compatible.sort_values(by='score', ascending=False).head(top_n)


In [315]:
user = df.sample(1, random_state=59) 
print("User selected:\n")
print(user)
recommended = potential_matches(user.iloc[0], df)
print("\nPotential matches:")
print(recommended)
print("\nPerfect Match:")
print(recommended.sample(1))

User selected:

       female  gender_not_specified  male  not_cis  asexual  bi/pan/demi_sexual/queer  heterosexual  homosexual  income  romantic_relationship_intent  education_level_grouped  location_density  air_zodiac_sign  earth_zodiac_sign  fire_zodiac_sign  water_zodiac_sign  age  bio_length  likes_received  emoji_usage_rate  message_sent_count  interest_lifestyle  interest_health  interest_creativity  interest_education_culture  interest_entertainment  interest_social  cluster
21822       1                     0     0        0        0                         0             0           1       1                             1                        2                 0                0                  0                 1                  0   36          58             180              0.43                  47            0.333333         0.333333                  0.0                         0.0                0.333333              0.0        1

Potential matches:
       female  gen

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  compatible['score'] = compatible.apply(lambda x: recommendation(user, x), axis=1)
