In [28]:
import random
import pandas as pd
import numpy as np
from geopy.distance import geodesic

# Define the service categories with gender likelihood (Female, Male)
SERVICE_CATEGORIES = [
    "houseCleaning", "electricity", "plumbing", "gardening", "painting", "carpentry", 
    "pestControl", "acRepair", "vehicleRepair", "applianceInstallation", "itSupport", 
    "homeSecurity", "interiorDesign", "windowCleaning", "furnitureAssembly"
]

# Gender likelihood for each service category (Female: higher likelihood for some categories)
SERVICE_GENDER_LIKELIHOOD = {
    "houseCleaning": "Female",
    "electricity": "Male",
    "plumbing": "Male",
    "gardening": "Female",
    "painting": "Female",
    "carpentry": "Male",
    "pestControl": "Male",
    "acRepair": "Male",
    "vehicleRepair": "Male",
    "applianceInstallation": "Male",
    "itSupport": "Male",
    "homeSecurity": "Male",
    "interiorDesign": "Female",
    "windowCleaning": "Female",
    "furnitureAssembly": "Male"
}

# Define the Algerian cities with geographic ranges (same as before)
algerian_cities = {
    "Algiers": {"lat_min": 36.5, "lat_max": 37.0, "lon_min": 2.6, "lon_max": 3.3},
    "Oran": {"lat_min": 35.5, "lat_max": 36.0, "lon_min": -1.0, "lon_max": -0.4},
    "Constantine": {"lat_min": 36.1, "lat_max": 36.5, "lon_min": 6.4, "lon_max": 6.9},
    "Annaba": {"lat_min": 36.7, "lat_max": 37.2, "lon_min": 7.5, "lon_max": 8.0},
    "Blida": {"lat_min": 36.3, "lat_max": 36.8, "lon_min": 2.4, "lon_max": 3.3},
    "Sétif": {"lat_min": 35.4, "lat_max": 36.6, "lon_min": 5.2, "lon_max": 6.6},
    "Tébessa": {"lat_min": 34.4, "lat_max": 36.0, "lon_min": 7.4, "lon_max": 8.8}
}

# Define the ranges for service and user IDs by city (same as before)
city_service_id_ranges = {
    "Algiers": (1, 200),
    "Oran": (201, 350),
    "Constantine": (351, 500),
    "Annaba": (501, 650),
    "Blida": (651, 800),
    "Sétif": (801, 950),
    "Tébessa": (951, 1100)
}

city_user_id_ranges = {
    "Algiers": (1, 400),
    "Oran": (401, 700),
    "Constantine": (701, 1000),
    "Annaba": (1001, 1300),
    "Blida": (1301, 1600),
    "Sétif": (1601, 1800),
    "Tébessa": (1801, 2000)
}

# Add a random offset to the location
def add_random_offset(location):
    lat, lon = location
    lat += random.gauss(0, 0.01)
    lon += random.gauss(0, 0.01)
    return (round(lat, 6), round(lon, 6))

# Generate a random location within a city's geographic range
def generate_location(assigned_locations, city):
    lat_min, lat_max = algerian_cities[city]["lat_min"], algerian_cities[city]["lat_max"]
    lon_min, lon_max = algerian_cities[city]["lon_min"], algerian_cities[city]["lon_max"]
    
    lat = random.uniform(lat_min, lat_max)
    lon = random.uniform(lon_min, lon_max)
    location = (round(lat, 6), round(lon, 6))
    location = add_random_offset(location)
    
    while location in assigned_locations:
        lat = random.uniform(lat_min, lat_max)
        lon = random.uniform(lon_min, lon_max)
        location = (round(lat, 6), round(lon, 6))
        location = add_random_offset(location)
    
    assigned_locations.add(location)
    return location

# Calculate distance between user and service provider locations
def calculate_distance(user_location, provider_location):
    return geodesic(user_location, provider_location).km

# Generate service data with gender
def generate_service_data():
    assigned_locations = set()
    service_data = []
    
    for city, (start_id, end_id) in city_service_id_ranges.items():
        for service_id in range(start_id, end_id + 1):
            location = generate_location(assigned_locations, city)
            service_category = random.choice(SERVICE_CATEGORIES)
            # Assign gender based on the service category
            gender = SERVICE_GENDER_LIKELIHOOD.get(service_category, random.choice(["Male", "Female"]))
            
            service_data.append({
                "service_id": service_id,
                "city": city,
                "provider_location_x": location[0],
                "provider_location_y": location[1],
                "service_category": service_category,
                "gender": gender,  # Added gender to the service data
                "review_avg": round(random.uniform(3.0, 5.0), 1),
                "review_count": random.randint(1, 50),
                "click_count": random.randint(0, 100),
                "provider_age": random.randint(25, 60)
            })
    
    return pd.DataFrame(service_data)

# Generate user data (no changes needed here for gender)
def generate_user_data(service_df):
    assigned_locations = set()
    user_data = []
    
    for city, (start_id, end_id) in city_user_id_ranges.items():
        city_services = service_df[service_df['city'] == city]
        
        for user_id in range(start_id, end_id + 1):
            location = generate_location(assigned_locations, city)
            service_categories_interest = random.sample(SERVICE_CATEGORIES, random.randint(2, 4))
            available_services = city_services[city_services['service_category'].isin(service_categories_interest)]
            
            num_reviews = random.randint(0, min(3, len(available_services)))
            reviewed_service_ids = random.sample(available_services['service_id'].tolist(), num_reviews)
            
            click_count_per_service = {
                sid: random.randint(1, 10) 
                for sid in reviewed_service_ids
            }
            
            favorite_services = random.sample(available_services['service_id'].tolist(), random.randint(1, 3))
            
            user_data.append({
                "user_id": user_id,
                "city": city,
                "location_x": location[0],
                "location_y": location[1],
                "age": random.randint(18, 65),
                "gender": random.choice(["Male", "Female"]),
                "service_categories_interest": service_categories_interest,
                "reviewed_service_ids": reviewed_service_ids,
                "click_count_per_service": click_count_per_service,
                "favorites": favorite_services
            })
    
    return pd.DataFrame(user_data)

# Generate datasets
service_df = generate_service_data()
user_df = generate_user_data(service_df)

# Optionally, save the data to CSV for further analysis
service_df.to_csv('service_data.csv', index=False)
user_df.to_csv('user_data.csv', index=False)

# Display the first few rows of each dataframe
print("Service Data:")
print(service_df.head())
print("\nUser Data:")
print(user_df.head())


Service Data:
   service_id     city  provider_location_x  provider_location_y  \
0           1  Algiers            36.788574             2.651042   
1           2  Algiers            36.739646             2.745843   
2           3  Algiers            36.591374             2.701977   
3           4  Algiers            36.660633             2.660858   
4           5  Algiers            36.527135             3.260517   

  service_category  gender  review_avg  review_count  click_count  \
0   interiorDesign  Female         4.1            38           20   
1    vehicleRepair    Male         3.9            48           13   
2    vehicleRepair    Male         4.6            44           52   
3   windowCleaning  Female         3.2            22           62   
4         painting  Female         3.2            19           11   

   provider_age  
0            48  
1            29  
2            59  
3            27  
4            44  

User Data:
   user_id     city  location_x  location_

In [22]:
service_df

Unnamed: 0,service_id,city,provider_location_x,provider_location_y,service_category,gender,review_avg,review_count,click_count,provider_age
0,1,Algiers,36.698749,2.630818,applianceInstallation,Male,4.6,36,54,44
1,2,Algiers,36.845574,2.990769,windowCleaning,Female,3.6,3,46,48
2,3,Algiers,36.510432,3.259104,electricity,Male,4.9,17,57,49
3,4,Algiers,36.790717,3.165165,interiorDesign,Female,3.5,14,22,26
4,5,Algiers,36.700429,2.920078,houseCleaning,Female,5.0,6,6,25
...,...,...,...,...,...,...,...,...,...,...
1095,1096,Tébessa,35.183454,8.768447,furnitureAssembly,Male,3.8,28,33,33
1096,1097,Tébessa,35.159503,8.484917,applianceInstallation,Male,4.2,44,76,27
1097,1098,Tébessa,34.502821,8.642028,applianceInstallation,Male,4.4,42,1,37
1098,1099,Tébessa,35.067966,8.046843,houseCleaning,Female,4.0,48,42,47


In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Function to calculate the distance between two locations (using Euclidean distance)
def calculate_distance(x1, y1, x2, y2):
    return np.sqrt((x1 - x2)**2 + (y1 - y2)**2)

# Function to generate recommendations for a user
def recommend_services(user_id, user_df, service_df):
    # Step 1: Get the user's data
    user_data = user_df[user_df['user_id'] == user_id].iloc[0]
    user_city = user_data['city']
    user_location = (user_data['location_x'], user_data['location_y'])
    user_interests = user_data['service_categories_interest']
    user_gender = user_data['gender']
    user_age = user_data['age']
    user_favorites = user_data['favorites']
    
    # Step 2: Filter services from the same city
    services_in_city = service_df[service_df['city'] == user_city]
    
    # Step 3: Compute the distance of each service from the user
    services_in_city['distance'] = services_in_city.apply(
        lambda row: calculate_distance(user_location[0], user_location[1], row['provider_location_x'], row['provider_location_y']), axis=1
    )
    
    # Step 4: Collaborative filtering - User similarity based on behavior (click_count and review_avg)
    # We need to reshape user_df to have a user-service interaction matrix
    interaction_matrix = user_df[['user_id', 'reviewed_service_ids', 'click_count_per_service']].explode(['reviewed_service_ids', 'click_count_per_service'])
    
    # Create interaction matrix: One user can have many services they interacted with (click or review)
    interaction_matrix['service_id'] = interaction_matrix['reviewed_service_ids']
    interaction_matrix['click_count'] = interaction_matrix['click_count_per_service']
    
    # Now let's pivot the data to create a user-service matrix where interactions are stored
    user_service_matrix = interaction_matrix.pivot_table(index='user_id', columns='service_id', values='click_count', aggfunc='sum', fill_value=0)
    
    # Calculate cosine similarity between the target user and all other users
    user_vector = user_service_matrix.loc[user_id].values.reshape(1, -1)
    similarity_scores = cosine_similarity(user_vector, user_service_matrix.values)
    similarity_df = pd.DataFrame(similarity_scores.T, index=user_service_matrix.index, columns=['similarity'])
    
    # Get the most similar users (top 10 most similar users)
    similar_users = similarity_df.sort_values(by='similarity', ascending=False).iloc[1:11]
    
    # Step 5: Calculate the weighted interactions of similar users
    similar_users_interactions = user_service_matrix.loc[similar_users.index].mean()
    
    # Step 6: Content-based filtering - Services matching the user's interest categories
    matching_services = services_in_city[services_in_city['service_category'].isin(user_interests)]
    
    # Step 7: Rank services based on collaborative filtering (click_count, reviews) and content-based (user interest)
    # We'll combine scores from both approaches
    services_in_city['cf_score'] = services_in_city['service_id'].map(similar_users_interactions)
    services_in_city['cb_score'] = services_in_city['service_category'].apply(
        lambda category: 1 if category in user_interests else 0
    )
    
    # Step 8: Include favorites in the recommendation ranking
    services_in_city['favorite_match'] = services_in_city['service_id'].apply(
        lambda sid: 1 if sid in user_favorites else 0
    )
    
    # Final ranking score based on collaborative filtering, content-based filtering, proximity, and favorites
    services_in_city['final_score'] = (
        services_in_city['cf_score'] * 0.3 + 
        services_in_city['cb_score'] * 0.3 + 
        (1 / (1 + services_in_city['distance'])) * 0.2 +  # The closer the service, the higher the score
        services_in_city['favorite_match'] * 0.1  # Bonus for favorites
    )
    
    # Step 9: Sort services by final score
    recommended_services = services_in_city.sort_values(by='final_score', ascending=False)
    
    # Return the top recommended services
    return recommended_services[['service_id', 'city', 'service_category', 'review_avg', 'click_count', 'provider_experience', 'distance', 'final_score']]

# Example usage
user_id = 2  # Replace with an actual user ID from your user_df
recommended_services = recommend_services(user_id, user_df, service_df)

# Display the top recommended services
recommended_services[['service_id','city', 'service_category', 'review_avg', 'click_count', 'provider_experience', 'final_score']].head(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  services_in_city['distance'] = services_in_city.apply(
  user_service_matrix = interaction_matrix.pivot_table(index='user_id', columns='service_id', values='click_count', aggfunc='sum', fill_value=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  services_in_city['cf_score'] = services_in_city['service_id'].map(similar_users_interactions)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas

Unnamed: 0,service_id,city,service_category,review_avg,click_count,provider_experience,final_score
123,124,Algiers,houseCleaning,3.8,41,12,15.33755
157,158,Algiers,houseCleaning,3.3,45,17,9.953004
133,134,Algiers,pestControl,4.7,70,16,8.488439
131,132,Algiers,houseCleaning,3.0,3,11,4.41921
60,61,Algiers,pestControl,3.1,48,10,4.128344
118,119,Algiers,painting,3.3,25,6,3.706608
74,75,Algiers,pestControl,3.1,14,1,2.688253
58,59,Algiers,pestControl,3.1,63,19,2.349639
5,6,Algiers,pestControl,4.8,13,9,0.645596
116,117,Algiers,pestControl,4.5,100,17,0.569113


In [26]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Function to calculate the distance between two locations (using Euclidean distance)
def calculate_distance(x1, y1, x2, y2):
    return np.sqrt((x1 - x2)**2 + (y1 - y2)**2)

# Function to calculate age difference score
def age_match_score(user_age, provider_age, max_diff=10):
    age_diff = abs(user_age - provider_age)
    # If the age difference is less than max_diff, we return a score based on the difference
    if age_diff <= max_diff:
        return 1 - (age_diff / max_diff)  # Score between 0 and 1
    else:
        return 0  # No match if the difference is too high

# Function to calculate gender match score
def gender_match_score(user_gender, provider_gender):
    return 1 if user_gender == provider_gender else 0

# Function to generate recommendations for a user
def recommend_services(user_id, user_df, service_df):
    # Step 1: Get the user's data
    user_data = user_df[user_df['user_id'] == user_id].iloc[0]
    user_city = user_data['city']
    user_location = (user_data['location_x'], user_data['location_y'])
    user_interests = user_data['service_categories_interest']
    user_gender = user_data['gender']
    user_age = user_data['age']
    user_favorites = user_data['favorites']
    
    # Step 2: Filter services from the same city
    services_in_city = service_df[service_df['city'] == user_city]
    
    # Step 3: Compute the distance of each service from the user
    services_in_city['distance'] = services_in_city.apply(
        lambda row: calculate_distance(user_location[0], user_location[1], row['provider_location_x'], row['provider_location_y']), axis=1
    )
    
    # Step 4: Collaborative filtering - User similarity based on behavior (click_count and review_avg)
    # We need to reshape user_df to have a user-service interaction matrix
    interaction_matrix = user_df[['user_id', 'reviewed_service_ids', 'click_count_per_service']].explode(['reviewed_service_ids', 'click_count_per_service'])
    
    # Create interaction matrix: One user can have many services they interacted with (click or review)
    interaction_matrix['service_id'] = interaction_matrix['reviewed_service_ids']
    interaction_matrix['click_count'] = interaction_matrix['click_count_per_service']
    
    # Now let's pivot the data to create a user-service matrix where interactions are stored
    user_service_matrix = interaction_matrix.pivot_table(index='user_id', columns='service_id', values='click_count', aggfunc='sum', fill_value=0)
    
    # Calculate cosine similarity between the target user and all other users
    user_vector = user_service_matrix.loc[user_id].values.reshape(1, -1)
    similarity_scores = cosine_similarity(user_vector, user_service_matrix.values)
    similarity_df = pd.DataFrame(similarity_scores.T, index=user_service_matrix.index, columns=['similarity'])
    
    # Get the most similar users (top 10 most similar users)
    similar_users = similarity_df.sort_values(by='similarity', ascending=False).iloc[1:11]
    
    # Step 5: Calculate the weighted interactions of similar users
    similar_users_interactions = user_service_matrix.loc[similar_users.index].mean()
    
    # Step 6: Content-based filtering - Services matching the user's interest categories
    matching_services = services_in_city[services_in_city['service_category'].isin(user_interests)]
    
    # Step 7: Rank services based on collaborative filtering (click_count, reviews) and content-based (user interest)
    # We'll combine scores from both approaches
    services_in_city['cf_score'] = services_in_city['service_id'].map(similar_users_interactions)
    services_in_city['cb_score'] = services_in_city['service_category'].apply(
        lambda category: 1 if category in user_interests else 0
    )
    
    # Step 8: Include favorites in the recommendation ranking
    services_in_city['favorite_match'] = services_in_city['service_id'].apply(
        lambda sid: 1 if sid in user_favorites else 0
    )
    
    # Step 9: Gender match score
    services_in_city['gender_match'] = services_in_city.apply(
    lambda row: gender_match_score(user_gender, row['gender']), axis=1
)

    
    # Step 10: Age match score
    services_in_city['age_match'] = services_in_city.apply(
        lambda row: age_match_score(user_age, row['provider_age']), axis=1
    )
    
    # Final ranking score based on collaborative filtering, content-based filtering, proximity, favorites, gender match, and age match
    services_in_city['final_score'] = (
        services_in_city['cf_score'] * 0.3 + 
        services_in_city['cb_score'] * 0.3 + 
        (1 / (1 + services_in_city['distance'])) * 0.2 +  # The closer the service, the higher the score
        services_in_city['favorite_match'] * 0.1 +  # Bonus for favorites
        services_in_city['gender_match'] * 0.05 +  # Gender match
        services_in_city['age_match'] * 0.05  # Age match
    )
    
    # Step 11: Sort services by final score
    recommended_services = services_in_city.sort_values(by='final_score', ascending=False)
    
    # Return the top recommended services
    return recommended_services[['service_id','city', 'service_category','gender', 'review_avg','review_count', 'click_count','provider_age', 'final_score']]

# Example usage
user_id = 2  # Replace with an actual user ID from your user_df
recommended_services = recommend_services(user_id, user_df, service_df)

# Display the top recommended services
recommended_services[['service_id','city', 'service_category', 'review_avg','gender','review_count', 'click_count','provider_age', 'final_score']].head(21)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  services_in_city['distance'] = services_in_city.apply(
  user_service_matrix = interaction_matrix.pivot_table(index='user_id', columns='service_id', values='click_count', aggfunc='sum', fill_value=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  services_in_city['cf_score'] = services_in_city['service_id'].map(similar_users_interactions)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas

Unnamed: 0,service_id,city,service_category,review_avg,gender,review_count,click_count,provider_age,final_score
189,190,Algiers,interiorDesign,4.9,Female,7,99,30,29.015587
101,102,Algiers,vehicleRepair,3.3,Male,34,3,44,12.739966
152,153,Algiers,gardening,4.7,Female,17,23,34,4.797175
140,141,Algiers,interiorDesign,4.4,Female,5,17,49,4.747676
121,122,Algiers,applianceInstallation,4.2,Male,31,49,49,3.827347
91,92,Algiers,carpentry,4.3,Male,18,64,44,2.966284
73,74,Algiers,interiorDesign,3.4,Female,27,100,31,2.844022
71,72,Algiers,applianceInstallation,3.8,Male,41,96,44,2.388283
72,73,Algiers,acRepair,4.0,Male,28,79,60,2.35786
64,65,Algiers,homeSecurity,4.1,Male,15,19,59,2.104793


In [20]:
user_df

Unnamed: 0,user_id,city,location_x,location_y,age,gender,service_categories_interest,reviewed_service_ids,click_count_per_service,favorites
0,1,Algiers,36.658461,2.701499,46,Male,"[painting, pestControl]",[171],{171: 1},"[171, 66, 168]"
1,2,Algiers,36.650230,2.971385,41,Female,"[interiorDesign, vehicleRepair]","[190, 102, 4]","{190: 3, 102: 1, 4: 5}","[74, 160]"
2,3,Algiers,36.875321,2.856457,39,Male,"[plumbing, homeSecurity]","[186, 184]","{186: 8, 184: 5}",[60]
3,4,Algiers,36.926206,3.051790,29,Female,"[applianceInstallation, furnitureAssembly]",[],{},[148]
4,5,Algiers,36.766042,3.234332,21,Female,"[applianceInstallation, electricity]",[172],{172: 3},"[63, 172]"
...,...,...,...,...,...,...,...,...,...,...
1995,1996,Tébessa,35.567262,8.550020,56,Male,"[applianceInstallation, acRepair, itSupport]","[1024, 1026, 996]","{1024: 3, 1026: 6, 996: 2}",[959]
1996,1997,Tébessa,34.686215,8.556322,38,Male,"[applianceInstallation, homeSecurity]","[1097, 1041]","{1097: 4, 1041: 10}",[968]
1997,1998,Tébessa,35.776016,8.560394,36,Male,"[interiorDesign, carpentry]",[1037],{1037: 9},"[969, 1081]"
1998,1999,Tébessa,35.544009,8.457915,42,Male,"[vehicleRepair, itSupport, homeSecurity]",[1087],{1087: 10},"[965, 1039, 959]"


In [27]:
#new
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Function to calculate the distance between two locations (using Euclidean distance)
def calculate_distance(x1, y1, x2, y2):
    return np.sqrt((x1 - x2)**2 + (y1 - y2)**2)

# Function to calculate age difference score
def age_match_score(user_age, provider_age, max_diff=10):
    age_diff = abs(user_age - provider_age)
    # If the age difference is less than max_diff, we return a score based on the difference
    if age_diff <= max_diff:
        return 1 - (age_diff / max_diff)  # Score between 0 and 1
    else:
        return 0  # No match if the difference is too high

# Function to calculate gender match score
def gender_match_score(user_gender, provider_gender):
    return 1 if user_gender == provider_gender else 0

# Function to handle new providers with no reviews/ratings
def handle_new_provider(row):
    if row['review_avg'] == 0 and row['review_count'] == 0:
        return 0.1  # Assign a low score for new providers
    return 1  # Providers with reviews get the normal score

# Function to generate recommendations for a user
def recommend_services(user_id, user_df, service_df):
    # Step 1: Get the user's data
    user_data = user_df[user_df['user_id'] == user_id].iloc[0]
    user_city = user_data['city']
    user_location = (user_data['location_x'], user_data['location_y'])
    user_interests = user_data['service_categories_interest']
    user_gender = user_data['gender']
    user_age = user_data['age']
    user_favorites = user_data['favorites']
    
    # Step 2: Filter services from the same city
    services_in_city = service_df[service_df['city'] == user_city]
    
    # Step 3: Compute the distance of each service from the user
    services_in_city['distance'] = services_in_city.apply(
        lambda row: calculate_distance(user_location[0], user_location[1], row['provider_location_x'], row['provider_location_y']), axis=1
    )
    
    # Step 4: Collaborative filtering - User similarity based on behavior (click_count and review_avg)
    interaction_matrix = user_df[['user_id', 'reviewed_service_ids', 'click_count_per_service']].explode(['reviewed_service_ids', 'click_count_per_service'])
    interaction_matrix['service_id'] = interaction_matrix['reviewed_service_ids']
    interaction_matrix['click_count'] = interaction_matrix['click_count_per_service']
    user_service_matrix = interaction_matrix.pivot_table(index='user_id', columns='service_id', values='click_count', aggfunc='sum', fill_value=0)
    
    # Calculate cosine similarity between the target user and all other users
    user_vector = user_service_matrix.loc[user_id].values.reshape(1, -1)
    similarity_scores = cosine_similarity(user_vector, user_service_matrix.values)
    similarity_df = pd.DataFrame(similarity_scores.T, index=user_service_matrix.index, columns=['similarity'])
    similar_users = similarity_df.sort_values(by='similarity', ascending=False).iloc[1:11]
    
    # Step 5: Calculate the weighted interactions of similar users
    similar_users_interactions = user_service_matrix.loc[similar_users.index].mean()
    
    # Step 6: Content-based filtering - Services matching the user's interest categories
    matching_services = services_in_city[services_in_city['service_category'].isin(user_interests)]
    
    # Step 7: Rank services based on collaborative filtering (click_count, reviews) and content-based (user interest)
    services_in_city['cf_score'] = services_in_city['service_id'].map(similar_users_interactions)
    services_in_city['cb_score'] = services_in_city['service_category'].apply(
        lambda category: 1 if category in user_interests else 0
    )
    
    # Step 8: Include favorites in the recommendation ranking
    services_in_city['favorite_match'] = services_in_city['service_id'].apply(
        lambda sid: 1 if sid in user_favorites else 0
    )
    
    # Step 9: Gender match score
    services_in_city['gender_match'] = services_in_city.apply(
        lambda row: gender_match_score(user_gender, row['gender']), axis=1
    )

    # Step 10: Age match score
    services_in_city['age_match'] = services_in_city.apply(
        lambda row: age_match_score(user_age, row['provider_age']), axis=1
    )
    
    # Step 11: Handle new providers (no reviews/rating)
    services_in_city['new_provider_score'] = services_in_city.apply(handle_new_provider, axis=1)
    
    # Final ranking score based on collaborative filtering, content-based filtering, proximity, favorites, gender match, and age match
    services_in_city['final_score'] = (
        services_in_city['cf_score'] * 0.3 + 
        services_in_city['cb_score'] * 0.3 + 
        (1 / (1 + services_in_city['distance'])) * 0.2 +  # The closer the service, the higher the score
        services_in_city['favorite_match'] * 0.1 +  # Bonus for favorites
        services_in_city['gender_match'] * 0.05 +  # Gender match
        services_in_city['age_match'] * 0.05  # Age match
    )
    
    # Multiply by new_provider_score to adjust ranking for new providers
    services_in_city['final_score'] *= services_in_city['new_provider_score']
    
    # Step 12: Sort services by final score
    recommended_services = services_in_city.sort_values(by='final_score', ascending=False)
    
    # Return the top recommended services
    return recommended_services[['service_id','city', 'service_category','gender', 'review_avg','review_count', 'click_count','provider_age', 'final_score']]

# Example usage
user_id = 2  # Replace with an actual user ID from your user_df
recommended_services = recommend_services(user_id, user_df, service_df)

# Display the top recommended services
recommended_services[['service_id','city', 'service_category', 'review_avg','gender','review_count', 'click_count','provider_age', 'final_score']].head(21)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  services_in_city['distance'] = services_in_city.apply(
  user_service_matrix = interaction_matrix.pivot_table(index='user_id', columns='service_id', values='click_count', aggfunc='sum', fill_value=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  services_in_city['cf_score'] = services_in_city['service_id'].map(similar_users_interactions)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas

Unnamed: 0,service_id,city,service_category,review_avg,gender,review_count,click_count,provider_age,final_score
189,190,Algiers,interiorDesign,4.9,Female,7,99,30,29.015587
101,102,Algiers,vehicleRepair,3.3,Male,34,3,44,12.739966
152,153,Algiers,gardening,4.7,Female,17,23,34,4.797175
140,141,Algiers,interiorDesign,4.4,Female,5,17,49,4.747676
121,122,Algiers,applianceInstallation,4.2,Male,31,49,49,3.827347
91,92,Algiers,carpentry,4.3,Male,18,64,44,2.966284
73,74,Algiers,interiorDesign,3.4,Female,27,100,31,2.844022
71,72,Algiers,applianceInstallation,3.8,Male,41,96,44,2.388283
72,73,Algiers,acRepair,4.0,Male,28,79,60,2.35786
64,65,Algiers,homeSecurity,4.1,Male,15,19,59,2.104793
