In [89]:
import pandas as pd
df = pd.read_csv('../data/clustered_df.csv')

pd.set_option('display.max_columns', None) 
pd.set_option('display.width', 1000)  
df.head(3)

Unnamed: 0,female,gender_not_specified,male,not_cis,asexual,bi/pan/demi_sexual/queer,heterosexual,homosexual,income,romantic_relationship_intent,education_level_grouped,location_density,air_zodiac_sign,earth_zodiac_sign,fire_zodiac_sign,water_zodiac_sign,age,bio_length,likes_received,emoji_usage_rate,message_sent_count,interest_lifestyle,interest_health,interest_creativity,interest_education_culture,interest_entertainment,interest_social,cluster
0,0,1,0,0,0,0,0,1,2,0,1,2,0,1,0,0,56,44,173,0.36,75,0.333333,0.333333,0.0,0.333333,0.0,0.0,7
1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,40,301,107,0.42,35,0.333333,0.0,0.333333,0.333333,0.0,0.0,6
2,0,0,0,1,0,1,0,0,0,2,2,1,0,0,1,0,30,309,91,0.41,33,0.0,0.0,0.333333,0.333333,0.333333,0.0,6


In [90]:
def sexuality_analysis(user1, user2):
    def get_preferences(user):
        if user['female'] == 1:
            gender = 'female'
        elif user['male'] == 1:
            gender = 'male'
        else:
            return [], None  # not_cis or undefined
        
        if user['heterosexual'] == 1:
            pref = ['male'] if gender == 'female' else ['female']
        elif user['homosexual'] == 1:
            pref = [gender]
        elif user['bi/pan/demi_sexual/queer'] == 1:
            pref = ['male', 'female']
        elif user['asexual'] == 1:
            pref = [] 
        else:
            pref = []
        return pref, gender

    prefs1, gender1 = get_preferences(user1)
    prefs2, gender2 = get_preferences(user2)

    if not prefs1 or not prefs2 or gender1 is None or gender2 is None:
        return False

    return gender2 in prefs1 and gender1 in prefs2


In [91]:
def calc_age_score(user1, user2):
    dif = abs(user1['age'] - user2['age'])
    if dif <= 5:
        return 1.0
    elif dif <= 10:
        return 0.5
    elif dif <= 15:
        return 0.3
    else:
        return 0.1

In [92]:
def calc_intent_score(user1, user2):
    dif = abs(user1['romantic_relationship_intent'] - user2['romantic_relationship_intent'])
    if dif == 0:
        return 1.0
    elif dif == 1:
        return 0.5
    else:
        return 0    

In [93]:
from sklearn.metrics.pairwise import cosine_similarity

def calc_interests_score(user1, user2):
    interests = [
        'interest_lifestyle', 'interest_health', 'interest_creativity',
        'interest_education_culture', 'interest_entertainment', 'interest_social'
    ]
    i1 = user1[interests].values.reshape(1, -1)
    i2 = user2[interests].values.reshape(1, -1)
    return cosine_similarity(i1, i2)[0][0] 


In [94]:
sign_compatibilities = {
    'fire_zodiac_sign': ['fire_zodiac_sign', 'air_zodiac_sign'],
    'air_zodiac_sign': ['air_zodiac_sign', 'fire_zodiac_sign'],
    'earth_zodiac_sign': ['earth_zodiac_sign', 'water_zodiac_sign'],
    'water_zodiac_sign': ['water_zodiac_sign', 'earth_zodiac_sign']
}

In [95]:
def calc_astro_score(user1, user2):
    elements = ['fire_zodiac_sign', 'air_zodiac_sign', 'earth_zodiac_sign', 'water_zodiac_sign']
    
    for elem in elements:
        if user1[elem] == 1:
            compatible = sign_compatibilities[elem]
            return 1.0 if any(user2[c] == 1 for c in compatible) else 0.0
    return 0.0 

In [96]:
def recommendation(user1, user2, w_interest=0.35, w_age=0.2, w_intent=0.35, w_sign=0.1):
    interests_score = calc_interests_score(user1, user2)
    age_score = calc_age_score(user1, user2)
    intent_score = calc_intent_score(user1, user2)
    sign_score = calc_astro_score(user1, user2)
    
    return (
        w_interest * interests_score +
        w_age * age_score +
        w_intent * intent_score +
        w_sign * sign_score
    )

In [97]:
def potential_matches(user, df, top_n=5):
    same_cluster = df[df['cluster'] == user['cluster']].copy()

    compatible = same_cluster[
        same_cluster.apply(lambda x: sexuality_analysis(user, x), axis=1)
    ]

    compatible['score'] = compatible.apply(lambda x: recommendation(user, x), axis=1)

    return compatible.sort_values(by='score', ascending=False).head(top_n)


In [102]:
user = df.sample(1, random_state=43)
print("User selected:\n")
print(user)
recommended = potential_matches(user.iloc[0], df)
print("\nPotential matches:")
print(recommended)
print("\nPerfect Match:")
print(recommended.sample(1))

User selected:

      female  gender_not_specified  male  not_cis  asexual  bi/pan/demi_sexual/queer  heterosexual  homosexual  income  romantic_relationship_intent  education_level_grouped  location_density  air_zodiac_sign  earth_zodiac_sign  fire_zodiac_sign  water_zodiac_sign  age  bio_length  likes_received  emoji_usage_rate  message_sent_count  interest_lifestyle  interest_health  interest_creativity  interest_education_culture  interest_entertainment  interest_social  cluster
7396       1                     0     0        0        0                         1             0           0       1                             0                        1                 1                1                  0                 0                  0   26         467              29              0.35                  54                 0.0              0.0             0.666667                         0.0                0.333333              0.0        5

Potential matches:
       female  gende

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  compatible['score'] = compatible.apply(lambda x: recommendation(user, x), axis=1)
