# Load dan prepare dataset

In [1]:
import numpy as np
import pandas as pd

In [2]:
posts_df = pd.read_csv('postsData.csv')
users_df = pd.read_csv('users.csv')

interest_cols = [col for col in users_df.columns if 'interested_posts__' in col]
interest_data = []

for idx, row in users_df.iterrows():
    user_id = row['id']
    for col in interest_cols:
        post_id = row[col]
        if pd.notna(post_id):
            interest_data.append({'user_id': user_id, 'post_id': post_id})

interest_df = pd.DataFrame(interest_data)
interest_df['id'] = range(1, len(interest_df) + 1)
interest_df = interest_df[['id', 'user_id', 'post_id']]

In [3]:
merged_df = interest_df.merge(posts_df, left_on='post_id', right_on='id', suffixes=('_interest', '_post'))
merged_df = merged_df[['user_id', 'post_id', 'interest_count']]
merged_df = merged_df.dropna(subset=['interest_count'])

In [4]:
print("Data setelah digabungkan:")
merged_df.head()

Data setelah digabungkan:


Unnamed: 0,user_id,post_id,interest_count
0,0KX5pZM33cMkDCw2tIxPWk8Cram1,9taCl56Ja8xxVEWHwLgU,1
1,0KX5pZM33cMkDCw2tIxPWk8Cram1,rN2BpiVoWmB99HkaVVOq,1
2,0TmaAkUBFzghYOcfEos14BauwiI3,LuwacWWBHtjPhnuOUkYE,1
3,0TmaAkUBFzghYOcfEos14BauwiI3,jmHQeRvMDui9kvMb5hIH,1
4,0TmaAkUBFzghYOcfEos14BauwiI3,P8yF6I2WkMX0DbKEGvGC,13


# Ekstrak fitur dan encoding

In [5]:
# from sklearn.feature_extraction.text import TfidfVectorizer

# tfidf_vectorizer = TfidfVectorizer()

# tfidf_matrix = tfidf_vectorizer.fit_transform(merged_df['content'])
# tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names_out())

# tfidf_df['user_id'] = merged_df['user_id'].values
# tfidf_df['post_id'] = merged_df['post_id'].values

In [6]:
from sklearn.preprocessing import LabelEncoder

user_id_encoder = LabelEncoder()
post_id_encoder = LabelEncoder()

merged_df['user_id'] = user_id_encoder.fit_transform(merged_df['user_id'])
merged_df['post_id'] = post_id_encoder.fit_transform(merged_df['post_id'])

train_user_ids = merged_df['user_id'].values
train_post_ids = merged_df['post_id'].values
train_labels = merged_df['interest_count'].values

In [7]:
merged_df.head()

Unnamed: 0,user_id,post_id,interest_count
0,0,65,1
1,0,349,1
2,1,138,1
3,1,290,1
4,1,169,13


# Train/test split

In [8]:
from sklearn.model_selection import train_test_split

X = merged_df[['user_id', 'post_id']].astype(np.int32)
y = merged_df['interest_count'].astype(np.int32)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print('> Train set posts: {}'.format(len(y_train)))
print('> Test set posts: {}'.format(len(y_test)))

> Train set posts: 398
> Test set posts: 100


In [9]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
y_train = scaler.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test = scaler.transform(y_test.values.reshape(-1, 1)).flatten()

In [10]:
X_train_array = [X_train['user_id'].values, X_train['post_id'].values]
X_test_array = [X_test['user_id'].values, X_test['post_id'].values]

# Membuat model

In [13]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout

n_users = merged_df['user_id'].nunique()
n_posts = merged_df['post_id'].nunique()

def RecommenderNet(n_users, n_posts, embedding_size=128):
    user_input = Input(shape=(1,), name='user')
    user_embedding = Embedding(n_users, embedding_size, name='user_embedding')(user_input)
    user_flat = Flatten()(user_embedding)

    post_input = Input(shape=(1,), name='post')
    post_embedding = Embedding(n_posts, embedding_size, name='post_embedding')(post_input)
    post_flat = Flatten()(post_embedding)

    concat = Concatenate()([user_flat, post_flat])
    dense = Dense(128, activation='relu')(concat)
    dropout = Dropout(0.5)(dense)
    dense = Dense(64, activation='relu')(dropout)
    dropout = Dropout(0.5)(dense)
    dense = Dense(32, activation='relu')(dropout)
    output = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[user_input, post_input], outputs=output)
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae', 'mse'])

    return model


model = RecommenderNet(n_users, n_posts)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user (InputLayer)           [(None, 1)]                  0         []                            
                                                                                                  
 post (InputLayer)           [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 128)               38400     ['user[0][0]']                
                                                                                                  
 post_embedding (Embedding)  (None, 1, 128)               51200     ['post[0][0]']                
                                                                                              

In [14]:
history = model.fit(
    x=X_train_array,
    y=y_train,
    batch_size=64,
    epochs=20,
    verbose=1,
    validation_data=(X_test_array, y_test)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [15]:
model.save('model/model.h5')
print("Model telah dilatih dan disimpan.")

Model telah dilatih dan disimpan.


  saving_api.save_model(


# Buat rekomendasi

In [28]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder

model = load_model('model/model.h5')

posts_df = pd.read_csv('datasets/postsData.csv')
users_df = pd.read_csv('datasets/users.csv')

def preprocess_data(users_df, posts_df):
    interest_cols = [col for col in users_df.columns if 'interested_posts__' in col]
    interest_data = []

    for idx, row in users_df.iterrows():
        user_id = row['id']
        for col in interest_cols:
            post_id = row[col]
            if pd.notna(post_id):
                interest_data.append({'user_id': user_id, 'post_id': post_id})

    interest_df = pd.DataFrame(interest_data)
    interest_df['id'] = range(1, len(interest_df) + 1)
    interest_df = interest_df[['id', 'user_id', 'post_id']]

    merged_df = interest_df.merge(posts_df, left_on='post_id', right_on='id', suffixes=('_interest', '_post'))
    merged_df = merged_df[['user_id', 'post_id', 'interest_count']]
    merged_df = merged_df.dropna(subset=['interest_count'])

    user_id_encoder = LabelEncoder()
    post_id_encoder = LabelEncoder()

    merged_df['user_id'] = user_id_encoder.fit_transform(merged_df['user_id'])
    merged_df['post_id'] = post_id_encoder.fit_transform(merged_df['post_id'])

    return merged_df, user_id_encoder, post_id_encoder

def recommend(user_id, user_id_encoder, post_id_encoder, posts_df, model, n_recommendations=5):
    encoded_user_id = user_id_encoder.transform([user_id])[0]

    post_ids = posts_df['id'].values
    encoded_post_ids = post_id_encoder.transform(post_ids)

    user_post_array = np.array([[encoded_user_id, encoded_post_id] for encoded_post_id in encoded_post_ids])

    predictions = model.predict([user_post_array[:, 0], user_post_array[:, 1]])

    top_post_indices = predictions.flatten().argsort()[-n_recommendations:][::-1]

    recommended_posts = posts_df.iloc[top_post_indices]

    return recommended_posts[['id', 'title', 'description', 'type']]

if __name__ == "__main__":
    merged_df, user_id_encoder, post_id_encoder = preprocess_data(users_df, posts_df)

    example_user_id = '887MkRkNDXPJr61WHirUuYoYoqu1'

    recommendations = recommend(example_user_id, user_id_encoder, post_id_encoder, posts_df, model)

    print("Rekomendasi untuk pengguna {}:".format(example_user_id))
    print(recommendations.to_string(index=False))


Rekomendasi untuk pengguna 887MkRkNDXPJr61WHirUuYoYoqu1:
                  id                       title                                                                                                                                                                description   type
uZqddjEpeEXlBMiBMNkW   Jam Tangan Garmin Fenix 6                                                                       Desain sporty, kondisi bekas namun masih baik. Bisa ditukar dengan gadget atau perangkat elektronik. Barang
P8yF6I2WkMX0DbKEGvGC           Kursi Rotan Bekas                                                          Kursi rotan bekas dengan bantal empuk, ada beberapa bagian anyaman yang lepas. Ingin ditukar dengan tanaman hias. Barang
1fI2AsboOWDnWYdGByLl           Blossom Landscape                                                                                                                   Desain dan perawatan taman yang artistik dan fungsional.   Jasa
tUJp5lLnwNRPInRS6U70      Galon Pla