In [262]:
print("Installing requirements....")

Installing requirements....


In [263]:
%pip install faker scikit-surprise

Note: you may need to restart the kernel to use updated packages.


In [264]:
from faker import Faker
import polars as pl
import numpy as np
import pyarrow.parquet as pq
from surprise.model_selection import train_test_split
from surprise import SVD

import pandas as pd

from surprise import Dataset, NormalPredictor, Reader


In [265]:
def init_profil():
    df_profil = [
        {
            "Nom": "Fan de la nature",
            "pos": [161902431, 6256757776, 5771053254, 8731063849, 9609413254, 8615466894, 9356148774, 304351397, 304374138],
            "neg": [304850843, 9964615559, 6264550437, 2514192427, 11186120804, 9663434784]
        },
        {
            "Nom": "Historique (médieval)",
            "pos": [9964615559, 2080454089, 8309291920, 9767828163, 10815282268, 11006240930, 6635074831, 205161375, 312031207, 9964615559],
            "neg": [267878387, 6264550437, 6256757776, 304882303, 310235557]
        },
        {
            "Nom": "Monument typique (Grand lieu)",
            "pos": [6580307576, 249292417, 267878387],
            "neg": [8731063849, 8731063849, 6635074831, 9767828163]
        },
        {
            "Nom": "Historique (19,20ème)",
            "pos": [267878387, 6264550437, 2514258897, 2084125343, 407714718, 34050681, 271394429, 310235557, 368996291, 393969161],
            "neg": [312415707, 291231959, 9356148774, 11186120804, 6256757776, 34050692, 304374138]
        },
        {
            "Nom": "Artistique",
            "pos": [2514192427, 9663434784, 251316201, 11186120804, 291231959],
            "neg": [9356148774, 407714718, 11006240930, 9964615559, 310235557, 393969161]
        },
        {
            "Nom": "Antiquité",
            "pos": [4729709152, 874341418, 9864187809, 34050692],
            "neg": [267878387, 9609413254, 271394429, 6635074831, 6580307576]
        },
        {
            "Nom": "Fan de musée",
            "pos": [34050681, 393969161, 34050692, 160079640, 251316201, 252382829, 291231959, 311512162, 34050692],
            "neg": [9609413254, 6580307576, 2084125343, 407714718]
        },
        {
            "Nom": "Histoire (général)",
            "pos": [251316201, 252382829, 258816379, 291232006, 304882303, 310235557],
            "neg": [6256757776, 5771053254, 2514192427, 6580307576]
        },
        {
            "Nom": "Religieux",
            "pos": [251466390, 304850843, 307675986, 304850843, 10922394383],
            "neg": [251316201]
        }
    ]
    df_profil = pd.DataFrame(df_profil)
    return df_profil


In [266]:
def init_user(df_profil):
    # Initialisation de Faker
    fake = Faker()

    nb_profil = 9
    # Génération de 20 profils aléatoires avec noms et prénoms
    noms = [fake.last_name() for _ in range(nb_profil)]
    prenoms = [fake.first_name() for _ in range(nb_profil)]
    profils_aleatoires = [df_profil['Nom'][i] for i in range(nb_profil)]
    ids = [i for i in range(nb_profil)]

    # Création du DataFrame
    df_user = pd.DataFrame({
        "Id": ids,
        "Nom": noms,
        "Prénom": prenoms,
        "Profil": profils_aleatoires
    })

    # Ajout des points positifs (pos) et négatifs (neg) pour chaque profil
    df_user["Pos"] = df_user["Profil"].apply(lambda x: df_profil[df_profil["Nom"] == x]["pos"].values[0])
    df_user["Neg"] = df_user["Profil"].apply(lambda x: df_profil[df_profil["Nom"] == x]["neg"].values[0])

    return df_user



In [267]:

def add_quokka(df_user, pos, neg):
    # Nouveau DataFrame pour Quokka
    quokka_data = {
        "Id": [10],
        "Nom": ["Quokka"],
        "Prénom": ["Quokka"],
        "Profil": [""],  # Mettez le profil approprié pour Quokka
        "Pos": [pos],  # Mettez les points positifs appropriés pour Quokka
        "Neg": [neg]  # Mettez les points négatifs appropriés pour Quokka
    }

    # Créer le DataFrame pour Quokka
    df_quokka = pd.DataFrame(quokka_data)

    # Concaténer df_user et df_quokka
    df_user = pd.concat([df_user, df_quokka], ignore_index=True)

    return df_user

In [268]:

def init_poi_data():
    df_geo = pl.read_parquet("../../data/transformed/poi_clean_category_geo.parquet")
    df_poi_pl = df_geo.drop(["type", "geometry"])
    df_poi = df_poi_pl.to_pandas()
    df_poi.head()
    return df_poi


In [269]:

def transform_to_user_item_interactions(df, df_poi):
    interactions = []

    for _, row in df.iterrows():
        user = row["Nom"]
        profile = row["Profil"]
        id = row["Id"]

        # Ajouter les POI aimés comme interactions positives
        for poi_pos in row["Pos"]:
            pos_poi_info = df_poi[df_poi["id"] == poi_pos].index
            pos_poi_info = df_poi.iloc[pos_poi_info[0]]
            interactions.append((id, pos_poi_info["id"], 1))

        # Ajouter les POI non aimés comme interactions négatives
        for poi_neg in row["Neg"]:
            neg_poi_info = df_poi[df_poi["id"] == poi_neg].index
            neg_poi_info = df_poi.iloc[neg_poi_info[0]]
            interactions.append((id, neg_poi_info["id"], 0))

    return pd.DataFrame(interactions, columns=["UserID", "ItemID", "rating"])



In [270]:

def find_similarity(df_user_transformed):

    print(df_user_transformed)         
    reader = Reader(rating_scale=(0, 1))
    
    # The columns must correspond to user id, item id and ratings (in that order).
    data = Dataset.load_from_df(df_user_transformed[["UserID", "ItemID", "rating"]], reader)
    print(data)
    trainset, testset = train_test_split(data, test_size=0.33, random_state= 42)

    algo = SVD()

    algo.fit(trainset)
   
    return algo


In [271]:
def main_collaborative(pos, neg) :
    print("Init profil....")
    df_profil = init_profil()
    print("Init user....")
    df_user = init_user(df_profil)
    df_user = add_quokka(df_user, pos, neg)
    print("Init POI....")
    df_poi = init_poi_data()
    df_user_transformed = transform_to_user_item_interactions(df_user, df_poi)

    print("Find similariry....")
    svd_trained = find_similarity(df_user_transformed)
    return svd_trained