In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
posts_df = pd.read_csv('dataset/csv/posts.csv')
users_df = pd.read_csv('dataset/csv/users.csv')
interests_df = pd.read_csv('dataset/csv/interests.csv')

In [3]:
merged_df = interests_df.merge(posts_df, left_on='post_id', right_on='id', suffixes=('_interest', '_post'))

merged_df = merged_df.merge(users_df, left_on='user_id_interest', right_on='id', suffixes=('_post', '_user'))

merged_df = merged_df[['user_id_interest', 'post_id', 'title', 'description', 'type', 'status', 'city']]

merged_df['content'] = merged_df['title'] + ' ' + merged_df['description'] + ' ' + merged_df['type'] + ' ' + merged_df['status']

merged_df = merged_df[['user_id_interest', 'post_id', 'content', 'city']]

In [4]:
merged_df

Unnamed: 0,user_id_interest,post_id,content,city
0,279,199,Televisi Sebuah Televisi bekas Barang Tersedia,Salor
1,279,61,Kamera Sebuah Kamera bekas Barang Selesai,Salor
2,279,260,Smartphone Sebuah Smartphone bekas Barang Ters...,Salor
3,194,86,Smartphone Sebuah Smartphone bekas Barang Selesai,Banda Aceh
4,194,219,Kursi Sebuah Kursi bekas Barang Selesai,Banda Aceh
...,...,...,...,...
295,91,18,Laptop Sebuah Laptop bekas Barang Tersedia,Sorong
296,172,51,Rak Buku Sebuah Rak Buku bekas Barang Tersedia,Serang
297,197,95,Sepeda Sebuah Sepeda bekas Barang Tersedia,Pontianak
298,120,234,Meja Sebuah Meja bekas Barang Tersedia,Bandung


In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer()

tfidf_matrix = tfidf_vectorizer.fit_transform(merged_df['content'])

tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names_out())

tfidf_df['user_id'] = merged_df['user_id_interest'].values
tfidf_df['post_id'] = merged_df['post_id'].values

tfidf_df

Unnamed: 0,bahasa,barang,bekas,buku,cuci,fotografi,gitar,hewan,inggris,jam,...,sebuah,selesai,sepeda,smartphone,tangan,televisi,tersedia,tukang,user_id,post_id
0,0.0,0.155002,0.155002,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.121724,0.000000,0.000000,0.000000,0.0,0.943091,0.218428,0.0,279,199
1,0.0,0.149055,0.149055,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.117054,0.186716,0.000000,0.000000,0.0,0.000000,0.000000,0.0,279,61
2,0.0,0.183929,0.183929,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.144440,0.000000,0.000000,0.918857,0.0,0.000000,0.259192,0.0,279,260
3,0.0,0.185239,0.185239,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.145469,0.232041,0.000000,0.925402,0.0,0.000000,0.000000,0.0,194,86
4,0.0,0.180735,0.180735,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.141932,0.226399,0.000000,0.000000,0.0,0.000000,0.000000,0.0,194,219
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,0.0,0.150665,0.150665,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.118318,0.000000,0.000000,0.000000,0.0,0.000000,0.212316,0.0,91,18
296,0.0,0.126985,0.126985,0.680361,0.0,0.0,0.0,0.0,0.0,0.0,...,0.099722,0.000000,0.000000,0.000000,0.0,0.000000,0.178948,0.0,172,51
297,0.0,0.185354,0.185354,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.145559,0.000000,0.917538,0.000000,0.0,0.000000,0.261200,0.0,197,95
298,0.0,0.145978,0.145978,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.114638,0.000000,0.000000,0.000000,0.0,0.000000,0.205712,0.0,120,234


In [6]:
n_users = len(users_df)
n_posts = len(posts_df)

tfidf_df = tfidf_df[tfidf_df['user_id'] < n_users]
tfidf_df = tfidf_df[tfidf_df['post_id'] < n_posts]

In [7]:
X = tfidf_df[['user_id', 'post_id']].values
X

array([[279, 199],
       [279,  61],
       [279, 260],
       [194,  86],
       [194, 219],
       [194,  31],
       [183,  86],
       [183, 290],
       [183, 139],
       [ 78,   8],
       [ 78, 297],
       [ 53, 298],
       [ 53, 142],
       [ 53, 246],
       [241, 298],
       [241,   5],
       [274, 298],
       [168, 298],
       [168, 227],
       [168,  57],
       [ 71,  41],
       [138,  41],
       [138, 246],
       [138,  12],
       [293,  41],
       [293, 125],
       [165,  41],
       [165,  83],
       [ 51, 196],
       [123, 196],
       [192, 196],
       [192, 286],
       [223, 196],
       [223,  13],
       [ 37, 162],
       [ 37, 295],
       [ 34, 162],
       [ 34,  33],
       [ 95,  23],
       [ 95, 182],
       [269,  23],
       [105,  23],
       [105,  80],
       [255, 184],
       [255,  55],
       [ 77, 184],
       [ 77, 266],
       [ 77, 173],
       [214, 184],
       [214, 104],
       [141, 185],
       [141, 106],
       [114,

In [8]:
post_freq = tfidf_df['post_id'].value_counts().to_dict()
tfidf_df['post_freq'] = tfidf_df['post_id'].map(post_freq)

y = tfidf_df['post_freq']
y

0      1
1      1
2      1
3      2
4      2
      ..
295    1
296    1
297    1
298    1
299    1
Name: post_freq, Length: 299, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print('> Train set posts: {}'.format(len(y_train)))
print('> Test set posts: {}'.format(len(y_test)))



> Train set posts: 239
> Test set posts: 60


In [10]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
y_train = scaler.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test = scaler.transform(y_test.values.reshape(-1, 1)).flatten()

In [11]:
X_train

array([[183,  86],
       [ 30,  46],
       [ 61, 192],
       [ 86, 165],
       [192, 196],
       [138, 246],
       [ 45,  46],
       [268, 288],
       [267, 159],
       [151, 267],
       [199, 223],
       [103, 262],
       [ 49, 287],
       [ 11, 193],
       [122,  69],
       [221,  15],
       [112, 183],
       [ 46,  71],
       [197,  95],
       [161, 142],
       [296, 177],
       [168,  57],
       [ 68, 225],
       [ 69, 255],
       [ 80,  49],
       [130,  79],
       [297,  81],
       [241,   5],
       [ 78, 297],
       [ 47, 204],
       [288, 112],
       [ 23, 294],
       [299,  13],
       [ 34,  33],
       [274, 298],
       [287, 257],
       [278, 258],
       [219, 176],
       [294, 294],
       [261, 194],
       [291, 294],
       [192, 286],
       [204, 156],
       [163, 165],
       [288,  13],
       [218, 205],
       [168, 227],
       [198, 108],
       [  1,  64],
       [262, 279],
       [239,  74],
       [ 97, 293],
       [226,

In [12]:
X_train_array = [X_train[:, 0], X_train[:, 1]]
X_test_array = [X_test[:, 0], X_test[:, 1]]

In [13]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, BatchNormalization, Concatenate, Dropout

def RecommenderNet(n_users, n_posts, embedding_size=128):
    user_input = Input(shape=(1,), name='user')
    user_embedding = Embedding(n_users, embedding_size, name='user_embedding')(user_input)
    user_flat = Flatten()(user_embedding)

    post_input = Input(shape=(1,), name='post')
    post_embedding = Embedding(n_posts, embedding_size, name='post_embedding')(post_input)
    post_flat = Flatten()(post_embedding)

    concat = Concatenate()([user_flat, post_flat])
    dense = Dense(128, activation='relu')(concat)
    dropout = Dropout(0.5)(dense)
    dense = Dense(64, activation='relu')(dropout)
    dropout = Dropout(0.5)(dense)
    dense = Dense(32, activation='relu')(dropout)
    output = Dense(1)(dense)

    model = Model(inputs=[user_input, post_input], outputs=output)
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae', 'mse'])

    return model


model = RecommenderNet(n_users, n_posts)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user (InputLayer)           [(None, 1)]                  0         []                            
                                                                                                  
 post (InputLayer)           [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 128)               38400     ['user[0][0]']                
                                                                                                  
 post_embedding (Embedding)  (None, 1, 128)               38400     ['post[0][0]']                
                                                                                              

In [14]:
history = model.fit(
    x=X_train_array,
    y=y_train,
    batch_size=64,
    epochs=20,
    verbose=1,
    validation_data=(X_test_array, y_test)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [15]:
model.save('model.h5')

  saving_api.save_model(


In [30]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow as tf
from tensorflow.keras.models import load_model

posts_df = pd.read_csv('Datasets/csv/posts.csv')
users_df = pd.read_csv('Datasets/csv/users.csv')
interests_df = pd.read_csv('Datasets/csv/interests.csv')

merged_df = interests_df.merge(posts_df, left_on='post_id', right_on='id', suffixes=('_interest', '_post'))
merged_df = merged_df.merge(users_df, left_on='user_id_interest', right_on='id', suffixes=('_post', '_user'))
merged_df = merged_df[['user_id_interest', 'post_id', 'title', 'description', 'type', 'status', 'city']]
merged_df['content'] = merged_df['title'] + ' ' + merged_df['description'] + ' ' + merged_df['type'] + ' ' + merged_df['status']
merged_df = merged_df[['user_id_interest', 'post_id', 'content', 'city']]

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(merged_df['content'])
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names_out())
tfidf_df['user_id'] = merged_df['user_id_interest'].values
tfidf_df['post_id'] = merged_df['post_id'].values

n_users = len(users_df)
n_posts = len(posts_df)
tfidf_df = tfidf_df[tfidf_df['user_id'] < n_users]
tfidf_df = tfidf_df[tfidf_df['post_id'] < n_posts]

post_id_mapping = {post_id: i for i, post_id in enumerate(posts_df['id'])}
posts_df['mapped_post_id'] = posts_df['id'].map(post_id_mapping)

model = load_model('model/model.h5')

def recommend_posts_for_user(user_id, top_n=5):
    user_ids = np.array([user_id] * n_posts)
    post_ids = np.array([post_id_mapping[pid] for pid in posts_df['id']])

    predictions = model.predict([user_ids, post_ids])

    top_post_indices = predictions.flatten().argsort()[-top_n:][::-1]

    recommended_posts = posts_df.iloc[top_post_indices]
    recommended_posts = recommended_posts.merge(users_df, left_on='user_id', right_on='id', suffixes=('_post', '_user'))

    return recommended_posts[['id_post', 'title', 'description', 'type', 'city', 'username']]

if __name__ == "__main__":
    user_id_example = 3

    recommendations = recommend_posts_for_user(user_id_example, top_n=5)
    print("Top 5 post recommendations for user {}: \n{}".format(user_id_example, recommendations))

Top 5 post recommendations for user 3: 
   id_post              title               description    type      city  \
0      258         Smartphone   Sebuah Smartphone bekas  Barang     Salor   
1      197             Laptop       Sebuah Laptop bekas  Barang   Bandung   
2      166              Kursi        Sebuah Kursi bekas  Barang   Kendari   
3      263             Sepeda       Sebuah Sepeda bekas  Barang  Denpasar   
4      194  Jasa Tukang Kebun  Sebuah Jasa Tukang Kebun    Jasa    Sofifi   

  username  
0   user37  
1  user148  
2   user36  
3  user185  
4  user188  
