In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
posts_df = pd.read_csv('datasets/postsData.csv')
users_df = pd.read_csv('datasets/users.csv')

In [3]:
melted_df = users_df.melt(id_vars=['id'],
                    value_vars=[col for col in users_df.columns if col.startswith('interested_posts__')],
                    var_name='post_type',
                    value_name='interested_posts')

melted_df = melted_df.dropna(subset=['interested_posts'])

merged_df = melted_df.merge(posts_df, left_on='interested_posts', right_on='id')

merged_df['content'] = merged_df['title'] + " " + merged_df['description']

In [4]:
merged_df[merged_df['id_x'] == '3Be4eTsgUNbRcu05ehVjm5PGbkG2']

Unnamed: 0,id_x,post_type,interested_posts,userId,id_y,title,description,image,updated_at,created_at,type,status,interest_count,content
19,3Be4eTsgUNbRcu05ehVjm5PGbkG2,interested_posts__001,IYLbeI1Zrjon644mIQsx,Hmsep7fiZROUABHOTFOsjBhDJ1d2,IYLbeI1Zrjon644mIQsx,Lukisan Pesisir Tropis,Lukisan ini menggambarkan keindahan pesisir t...,https://storage.googleapis.com/barterbuddy.app...,2024-06-12T09:57:15+00:00,2024-06-12T09:57:15+00:00,Barang,Tersedia,1,Lukisan Pesisir Tropis Lukisan ini menggambar...
163,3Be4eTsgUNbRcu05ehVjm5PGbkG2,interested_posts__003,P8yF6I2WkMX0DbKEGvGC,VvYs1Nkad2VtziFNHT8DfirnzXO2,P8yF6I2WkMX0DbKEGvGC,Kursi Rotan Bekas,"Kursi rotan bekas dengan bantal empuk, ada beb...",https://storage.googleapis.com/barterbuddy.app...,2024-06-12T09:57:57+00:00,2024-06-12T09:57:57+00:00,Barang,Tersedia,13,Kursi Rotan Bekas Kursi rotan bekas dengan ban...
301,3Be4eTsgUNbRcu05ehVjm5PGbkG2,interested_posts__004,IZ61JUYxZ49AZJnyDRbe,BXThcmLFl7WAOttPa0NobNza1bv1,IZ61JUYxZ49AZJnyDRbe,Sepatu Kasual Vans Authentic,"Vans Authentic, warna biru, ukuran 39, masih d...",https://storage.googleapis.com/barterbuddy.app...,2024-06-12T09:59:15+00:00,2024-06-12T09:59:15+00:00,Barang,Tersedia,4,"Sepatu Kasual Vans Authentic Vans Authentic, w..."
322,3Be4eTsgUNbRcu05ehVjm5PGbkG2,interested_posts__005,79hicPAJdnKHvzWYKMCv,0KX5pZM33cMkDCw2tIxPWk8Cram1,79hicPAJdnKHvzWYKMCv,Jam tangan Saphire Submariner,"Jam tangan Saphire Submariner bekas, tahan air...",https://storage.googleapis.com/barterbuddy.app...,2024-06-12T09:53:04+00:00,2024-06-12T09:53:04+00:00,Barang,Tersedia,7,Jam tangan Saphire Submariner Jam tangan Saphi...
369,3Be4eTsgUNbRcu05ehVjm5PGbkG2,interested_posts__002,i6apqdZ9tEj8kdjaBZhX,6GzrihosqMOYAYEHE5JrdjlMU5H3,i6apqdZ9tEj8kdjaBZhX,Lenovo ThinkPad X1 Carbon,"Laptop bekas Lenovo ThinkPad X1 Carbon, tahan ...",https://storage.googleapis.com/barterbuddy.app...,2024-06-12T09:56:42+00:00,2024-06-12T09:56:42+00:00,Barang,Tersedia,1,Lenovo ThinkPad X1 Carbon Laptop bekas Lenovo ...


In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

merged_df['content'] = merged_df['content'].fillna('')

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(merged_df['content'])
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names_out())
tfidf_df['user_id'] = merged_df['id_x'].astype('category').cat.codes.values
tfidf_df['post_id'] = merged_df['id_y'].astype('category').cat.codes.values

n_users = merged_df['id_x'].nunique()
n_posts = merged_df['id_y'].nunique()

tfidf_df = tfidf_df[tfidf_df['user_id'] < n_users]
tfidf_df = tfidf_df[tfidf_df['post_id'] < n_posts]

print(n_users)
print(n_posts)
tfidf_df

300
400


Unnamed: 0,10,10t,11,12,13,14,144hz,15,19,20,...,yang,yoga,z6,zara,zenbook,zenfone,zenith,zte,user_id,post_id
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0,65
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,1,138
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.38723,0.0,0.0,0.0,0.0,0.0,2,206
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,3,268
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.180361,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,4,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,83,376
494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,90,376
495,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,93,376
496,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,101,260


In [6]:
# X = tfidf_df[['user_id', 'post_id']].values
# post_freq = tfidf_df['post_id'].value_counts().to_dict()
# tfidf_df['post_freq'] = tfidf_df['post_id'].map(post_freq)
# y = tfidf_df['post_freq']

In [7]:
X = tfidf_df[['user_id', 'post_id']].values
X

array([[  0,  65],
       [  1, 138],
       [  2, 206],
       [  3, 268],
       [  4,  10],
       [106,  10],
       [273,  10],
       [290,  10],
       [ 39,  10],
       [ 49,  10],
       [ 62,  10],
       [ 63,  10],
       [ 68,  10],
       [ 88,  10],
       [ 52,  10],
       [  5,  93],
       [  6,  54],
       [  7, 191],
       [  8, 372],
       [  9, 116],
       [ 10,  75],
       [ 11, 126],
       [ 12,   6],
       [ 13,  26],
       [ 14,  14],
       [ 15, 215],
       [ 16, 124],
       [ 17, 159],
       [ 18, 150],
       [ 19, 243],
       [ 20, 146],
       [ 21, 152],
       [ 22,  38],
       [ 23, 179],
       [ 24,  96],
       [ 25, 213],
       [ 26, 300],
       [ 27, 190],
       [ 28,  58],
       [ 29, 142],
       [ 30, 289],
       [ 31, 287],
       [ 32,  52],
       [ 33,  46],
       [ 34,  95],
       [ 35, 101],
       [ 36, 167],
       [ 37, 342],
       [ 38, 183],
       [ 39,   0],
       [ 40,  99],
       [ 41, 106],
       [ 42,

In [8]:
tfidf_df['post_freq'] = merged_df['interest_count']

y = tfidf_df['post_freq']

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print('> Train set posts: {}'.format(len(y_train)))
print('> Test set posts: {}'.format(len(y_test)))



> Train set posts: 398
> Test set posts: 100


In [10]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
y_train = scaler.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test = scaler.transform(y_test.values.reshape(-1, 1)).flatten()

In [11]:
X_train

array([[103, 157],
       [195,  98],
       [242,  45],
       [194, 376],
       [188,  15],
       [ 72, 291],
       [ 53, 340],
       [159, 367],
       [ 15, 215],
       [197, 369],
       [148, 313],
       [101, 260],
       [134, 334],
       [ 32,  52],
       [ 37, 251],
       [237, 345],
       [239,  18],
       [ 97, 232],
       [193, 172],
       [265, 338],
       [ 47,  81],
       [ 40, 321],
       [270, 383],
       [ 12,   6],
       [275, 192],
       [ 36, 167],
       [ 21, 152],
       [ 95, 218],
       [247,  85],
       [236, 123],
       [112, 251],
       [195, 157],
       [217, 195],
       [ 14,  14],
       [  7, 191],
       [140,  53],
       [ 52,   3],
       [ 83, 251],
       [ 44, 212],
       [ 76,   8],
       [130,  50],
       [ 92, 217],
       [ 13,  26],
       [202, 207],
       [ 11, 223],
       [161, 308],
       [106,  10],
       [ 98,  39],
       [ 35, 101],
       [  6,  54],
       [ 81, 363],
       [295,  55],
       [  3,

In [12]:
X_train_array = [X_train[:, 0], X_train[:, 1]]
X_test_array = [X_test[:, 0], X_test[:, 1]]

In [13]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout

def RecommenderNet(n_users, n_posts, embedding_size=128):
    user_input = Input(shape=(1,), name='user')
    user_embedding = Embedding(n_users, embedding_size, name='user_embedding')(user_input)
    user_flat = Flatten()(user_embedding)

    post_input = Input(shape=(1,), name='post')
    post_embedding = Embedding(n_posts, embedding_size, name='post_embedding')(post_input)
    post_flat = Flatten()(post_embedding)

    concat = Concatenate()([user_flat, post_flat])
    dense = Dense(128, activation='relu')(concat)
    dropout = Dropout(0.5)(dense)
    dense = Dense(64, activation='relu')(dropout)
    dropout = Dropout(0.5)(dense)
    dense = Dense(32, activation='relu')(dropout)
    output = Dense(1)(dense)

    model = Model(inputs=[user_input, post_input], outputs=output)
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae', 'mse'])

    return model


model = RecommenderNet(n_users, n_posts)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user (InputLayer)           [(None, 1)]                  0         []                            
                                                                                                  
 post (InputLayer)           [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 128)               38400     ['user[0][0]']                
                                                                                                  
 post_embedding (Embedding)  (None, 1, 128)               51200     ['post[0][0]']                
                                                                                              

In [14]:
history = model.fit(
    x=X_train_array,
    y=y_train,
    batch_size=64,
    epochs=20,
    verbose=1,
    validation_data=(X_test_array, y_test)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [15]:
model.save('model/model.h5')

  saving_api.save_model(


In [16]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.feature_extraction.text import TfidfVectorizer

posts_df = pd.read_csv('datasets/postsData.csv')
users_df = pd.read_csv('datasets/users.csv')

melted_df = users_df.melt(
    id_vars=['id'],
    value_vars=[col for col in users_df.columns if col.startswith('interested_posts__')],
    var_name='post_type',
    value_name='interested_posts'
)

melted_df = melted_df.dropna(subset=['interested_posts'])

merged_df = melted_df.merge(posts_df, left_on='interested_posts', right_on='id')
merged_df['content'] = merged_df['title'] + " " + merged_df['description']

merged_df['content'] = merged_df['content'].fillna('')

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(merged_df['content'])
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names_out())
tfidf_df['user_id'] = merged_df['id_x'].astype('category').cat.codes.values
tfidf_df['post_id'] = merged_df['id_y'].astype('category').cat.codes.values

n_users = merged_df['id_x'].nunique()
n_posts = merged_df['id_y'].nunique()

tfidf_df = tfidf_df[tfidf_df['user_id'] < n_users]
tfidf_df = tfidf_df[tfidf_df['post_id'] < n_posts]

X = tfidf_df[['user_id', 'post_id']].values

tfidf_df['post_freq'] = merged_df['interest_count']

y = tfidf_df['post_freq']

model = load_model('model/model.h5')

def get_recommendations(user_id, model, tfidf_vectorizer, merged_df, top_n=5):
    user_code = pd.Series(user_id).astype('category').cat.codes.values[0]

    unique_post_ids = merged_df['id_y'].astype('category').cat.codes.unique()

    user_array = np.array([user_code for _ in range(len(unique_post_ids))])

    predictions = model.predict([user_array, unique_post_ids]).flatten()

    top_indices = predictions.argsort()[-top_n:][::-1]

    recommended_post_ids = merged_df['id_y'].astype('category').cat.categories[top_indices]

    return recommended_post_ids

user_id = 'B2nZIGPRbFgLKr3RGxHOT84szen1'
recommendations = get_recommendations(user_id, model, tfidf_vectorizer, merged_df)

# Print the top 5 recommendations
print(f"Top 5 recommendations for user {user_id}: {recommendations}")


Top 5 recommendations for user B2nZIGPRbFgLKr3RGxHOT84szen1: Index(['zM0ar7XkCszG3PUmbO8V', 'LlhZ4sNT8PrItxdnYLTx', '0u6gU53MePU4W2UJCpYy',
       'sQJTUXfnPp6tfLzdPunv', 'msDIapbSdL9K6PbBBxmY'],
      dtype='object')


In [25]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import load_model

posts_df = pd.read_csv('datasets/postsData.csv')
users_df = pd.read_csv('datasets/users.csv')

interests = []
for i, user in users_df.iterrows():
    for col in users_df.columns:
        if 'interested_posts__' in col and pd.notna(user[col]):
            interests.append({'user_id': user['id'], 'post_id': user[col]})

interests_df = pd.DataFrame(interests)

interests_df['user_id'] = interests_df['user_id'].astype(str)
posts_df['userId'] = posts_df['userId'].astype(str)
users_df['id'] = users_df['id'].astype(str)

merged_df = interests_df.merge(posts_df, left_on='post_id', right_on='id')
merged_df = merged_df.merge(users_df, left_on='user_id', right_on='id')

merged_df['content'] = (
    merged_df['title'].fillna('') + ' ' +
    merged_df['description'].fillna('') + ' ' +
    merged_df['type'].fillna('') + ' ' +
    merged_df['status'].fillna('')
)

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(merged_df['content'])
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names_out())
tfidf_df['user_id'] = merged_df['user_id'].values
tfidf_df['post_id'] = merged_df['post_id'].values

user_encoder = LabelEncoder()
post_encoder = LabelEncoder()

tfidf_df['user_id_encoded'] = user_encoder.fit_transform(tfidf_df['user_id'])
tfidf_df['post_id_encoded'] = post_encoder.fit_transform(tfidf_df['post_id'])

posts_df['mapped_post_id'] = post_encoder.transform(posts_df['id'])

model = load_model('model/model.h5')

def recommend_posts_for_user(user_id, top_n=5):
    encoded_user_id = user_encoder.transform([user_id])[0]
    user_ids = np.array([encoded_user_id] * len(posts_df))
    post_ids = posts_df['mapped_post_id'].values

    print("User IDs: ", user_ids[:10])
    print("Post IDs: ", post_ids[:10])

    predictions = model.predict([user_ids, post_ids])

    print("Predictions: ", predictions[:10])

    top_post_indices = predictions.flatten().argsort()[-top_n:][::-1]

    recommended_posts = posts_df.iloc[top_post_indices]
    recommended_posts = recommended_posts.merge(users_df, left_on='userId', right_on='id')

    return recommended_posts[['id_x', 'title', 'description', 'type', 'city', 'username']]

if __name__ == "__main__":
    user_id_example = '3Be4eTsgUNbRcu05ehVjm5PGbkG2'  

    recommendations = recommend_posts_for_user(user_id_example, top_n=5)
    print("Top 5 post recommendations for user {}: \n{}".format(user_id_example, recommendations))


User IDs:  [9 9 9 9 9 9 9 9 9 9]
Post IDs:  [0 1 2 3 4 5 6 7 8 9]
Predictions:  [[-0.00556758]
 [ 0.04716261]
 [ 0.20464341]
 [-0.01002637]
 [ 0.00424913]
 [ 0.03313614]
 [-0.01395205]
 [ 0.00606324]
 [-0.01031427]
 [-0.0051432 ]]
Top 5 post recommendations for user 3Be4eTsgUNbRcu05ehVjm5PGbkG2: 
                   id_x                        title  \
0  uZqddjEpeEXlBMiBMNkW    Jam Tangan Garmin Fenix 6   
1  P8yF6I2WkMX0DbKEGvGC            Kursi Rotan Bekas   
2  1fI2AsboOWDnWYdGByLl            Blossom Landscape   
3  bwoohB9YAly2uYXMYaJL  Les Intensif Bahasa Inggris   
4  tUJp5lLnwNRPInRS6U70       Galon Plastik 19 Liter   

                                         description    type        city  \
0  Desain sporty, kondisi bekas namun masih baik....  Barang  Kotamobagu   
1  Kursi rotan bekas dengan bantal empuk, ada beb...  Barang      Binjai   
2  Desain dan perawatan taman yang artistik dan f...    Jasa       Depok   
3  Tingkatkan kemampuan bahasa Inggris Anda denga...    Jasa 