In [57]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

In [58]:
# import data
data = pd.read_csv("data_set_fixed.csv")
data.head()

Unnamed: 0,No,Nama,JK,RB1,RB2,RB3,K1,K2,K3,Jurusan
0,1,Afrelya Amara,P,82,79,78,78,75,81,Jaringan & Backend
1,2,Agris Dekar Saputra,L,76,77,78,75,76,78,Backend
2,3,Aji Saputra,L,80,78,70,77,78,76,Jaringan
3,4,Akbar maulana,L,86,80,77,78,79,78,Jaringan
4,5,Al Diksha Alfiansyah,L,83,77,78,77,78,80,Jaringan


In [59]:
# menggunakan df sebagai data split dari data
df = data[['RB1', 'RB2','RB3', 'K1', 'K2', 'K3']]
df.head()

Unnamed: 0,RB1,RB2,RB3,K1,K2,K3
0,82,79,78,78,75,81
1,76,77,78,75,76,78
2,80,78,70,77,78,76
3,86,80,77,78,79,78
4,83,77,78,77,78,80


In [60]:
# menghapus missing value 
df.dropna(inplace=True)
df.isna().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(inplace=True)


RB1    0
RB2    0
RB3    0
K1     0
K2     0
K3     0
dtype: int64

In [61]:
# Function to compute custom cluster for each row
def get_custom_cluster(row):
    rb1_k1_avg = (row['RB1'] + row['K1']) / 2
    rb2_k2_avg = (row['RB2'] + row['K2']) / 2
    rb3_k3_avg = (row['RB3'] + row['K3']) / 2
    
    if rb1_k1_avg == rb2_k2_avg and rb2_k2_avg == rb3_k3_avg:
        return 'Semua'
    elif rb1_k1_avg == rb2_k2_avg:
        return 'Jaringan & Desain Grafis'
    elif rb1_k1_avg == rb3_k3_avg:
        return 'Jaringan & Backend'
    elif rb2_k2_avg == rb3_k3_avg:
        return 'Desain Grafis & Backend'
    elif rb1_k1_avg != rb2_k2_avg and rb1_k1_avg != rb3_k3_avg and rb2_k2_avg != rb3_k3_avg:
        if rb1_k1_avg > rb2_k2_avg and rb1_k1_avg > rb3_k3_avg:
            return 'Jaringan'
        elif rb2_k2_avg > rb1_k1_avg and rb2_k2_avg > rb3_k3_avg:
            return 'Desain Grafis'
        elif rb3_k3_avg > rb1_k1_avg and rb3_k3_avg > rb2_k2_avg:
            return 'Backend'
    else:
        return 'error'

# Buat custom cluster berdasarkan kondisi
custom_clusters = df.apply(get_custom_cluster, axis=1)

# Add "Custom Cluster" column to the DataFrame
data['Jurusan'] = custom_clusters

data.head()

Unnamed: 0,No,Nama,JK,RB1,RB2,RB3,K1,K2,K3,Jurusan
0,1,Afrelya Amara,P,82,79,78,78,75,81,Jaringan
1,2,Agris Dekar Saputra,L,76,77,78,75,76,78,Backend
2,3,Aji Saputra,L,80,78,70,77,78,76,Jaringan
3,4,Akbar maulana,L,86,80,77,78,79,78,Jaringan
4,5,Al Diksha Alfiansyah,L,83,77,78,77,78,80,Jaringan


In [62]:
# Export dataset yang telah diolah sebelumnya
data.to_csv('hasil_custom_cluster_fixed_training.csv', index=False)

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

In [3]:
# Load dataset
dataTraining = pd.read_csv("data_training_NCF_label.csv")
dataTraining.head()

Unnamed: 0,No,Nama,JK,RB1,RB2,RB3,K1,K2,K3,Jurusan
0,1.0,Afrelya Amara,P,82.0,79.0,78.0,78.0,75.0,81.0,Jaringan
1,2.0,Agris Dekar Saputra,L,76.0,77.0,78.0,75.0,76.0,78.0,Backend
2,3.0,Aji Saputra,L,80.0,78.0,70.0,77.0,78.0,76.0,Jaringan
3,4.0,Akbar maulana,L,86.0,80.0,77.0,78.0,79.0,78.0,Jaringan
4,5.0,Al Diksha Alfiansyah,L,83.0,77.0,78.0,77.0,78.0,80.0,Jaringan


In [4]:
# menghapus missing value 
dataTraining.dropna(inplace=True)
dataTraining.isna().sum()

No         0
Nama       0
JK         0
RB1        0
RB2        0
RB3        0
K1         0
K2         0
K3         0
Jurusan    0
dtype: int64

In [5]:
dataTesting = pd.read_csv("data_testing_NCF_nonLabel.csv")
dataTesting.head()

Unnamed: 0,No,Nama,JK,RB1,RB2,RB3,K1,K2,K3
0,1,Widya Ayu Lestari,P,76,75,81,80,77,77
1,2,Widya Rinjani,P,76,79,76,78,78,79
2,3,Wildan Nasution,L,85,75,80,77,80,77
3,4,Yuana Rianti,P,80,78,77,77,77,76
4,5,Achmad Fadli Ramdhani,L,75,79,77,77,77,81


In [6]:
# mengganti labeling dengan label encoder 
# 1 untuk tidak lulus
# 0 untuk lulus
from sklearn.preprocessing import LabelEncoder

enc = LabelEncoder()

dataTraining['Jurusan'] = enc.fit_transform(dataTraining['Jurusan'].values)

dataTraining.head()

Unnamed: 0,No,Nama,JK,RB1,RB2,RB3,K1,K2,K3,Jurusan
0,1.0,Afrelya Amara,P,82.0,79.0,78.0,78.0,75.0,81.0,2
1,2.0,Agris Dekar Saputra,L,76.0,77.0,78.0,75.0,76.0,78.0,0
2,3.0,Aji Saputra,L,80.0,78.0,70.0,77.0,78.0,76.0,2
3,4.0,Akbar maulana,L,86.0,80.0,77.0,78.0,79.0,78.0,2
4,5.0,Al Diksha Alfiansyah,L,83.0,77.0,78.0,77.0,78.0,80.0,2


In [7]:
num_users = len(dataTraining)
num_items = len(dataTraining[['RB1', 'RB2', 'RB3', 'K1', 'K2', 'K3']].unique())

user_encoder = {user: idx for idx, user in enumerate(dataTraining.index)}
item_encoder = {item: idx for idx, item in enumerate(dataTraining[['RB1', 'RB2', 'RB3', 'K1', 'K2', 'K3']].unique())}

dataTraining['user_id'] = dataTraining.index.map(user_encoder)
dataTraining['item_id'] = dataTraining[['RB1', 'RB2', 'RB3', 'K1', 'K2', 'K3']].map(item_encoder)

# Pisahkan data menjadi data latih dan data uji
train_data = dataTraining
test_data = dataTesting

AttributeError: 'DataFrame' object has no attribute 'unique'

In [7]:
# Preprocessing data
# user_id untuk training gpp
# Tapi item_id gal ambil dari jurusan, ambil dari RB1, RB2 ...
num_users = len(dataTraining)
num_items = len(dataTraining['Jurusan'].unique())

user_encoder = {user: idx for idx, user in enumerate(dataTraining.index)}
item_encoder = {item: idx for idx, item in enumerate(dataTraining['Jurusan'].unique())}

dataTraining['user_id'] = dataTraining.index.map(user_encoder)
dataTraining['item_id'] = dataTraining['Jurusan'].map(item_encoder)

# Pisahkan data menjadi data latih dan data uji
train_data = dataTraining
test_data = dataTesting


KeyError: ('RB1', 'RB2')

In [36]:
# Neural Collaborative Filtering model
def create_ncf_model(num_users, num_items, emb_dim=32):
    user_input = Input(shape=(1,))
    item_input = Input(shape=(1,))

    user_embedding = Embedding(num_users, emb_dim)(user_input)
    user_embedding = Flatten()(user_embedding)

    item_embedding = Embedding(num_items, emb_dim)(item_input)
    item_embedding = Flatten()(item_embedding)

    concat = Concatenate()([user_embedding, item_embedding])

    dense_layer = Dense(64, activation='relu')(concat)
    output_layer = Dense(1, activation='linear')(dense_layer)

    model = Model(inputs=[user_input, item_input], outputs=output_layer)

    return model

In [37]:
# Buat model
model = create_ncf_model(num_users, num_items)
model.compile(optimizer=Adam(lr=0.001), loss=MeanSquaredError())



In [38]:
# Training model
# Training harusnya berdasarkan value RB1, RB2 ... bukan jurusan ([rb1, RB2])
# Model.fit ambil x sebagai data training, y sebagai target data (data testing)
# https://keras.io/api/models/model_training_apis/
history = model.fit(
    x=[dataTraining['user_id'].values.astype(np.int64), dataTraining['item_id'].values.astype(np.int64)],
    y=dataTraining['Jurusan'].values,  # Ganti 'Jurusan' dengan kolom target yang sesuai
    batch_size=32,
    epochs=12,
    verbose=1
)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [39]:
# Evaluasi model
test_loss = model.evaluate(
    x=[dataTraining['user_id'].values.astype(np.int64), dataTraining['item_id'].values.astype(np.int64)],
    y=dataTraining['Jurusan'].values,  # Ganti 'Jurusan' dengan kolom target yang sesuai
    verbose=0
)
print("Test Loss:", test_loss)



Test Loss: 0.03506316989660263


In [47]:
# Fungsi untuk merekomendasikan penjurusan
def recommend_penjurusan(Nama, RB1, RB2, RB3, K1, K2, K3):
    # Siapkan data input untuk model
    user_id_encoded = user_encoder[Nama]
    item_ids = np.arange(num_items)
    user_ids = np.full_like(item_ids, user_id_encoded)

    # Lakukan prediksi menggunakan model
    predictions = model.predict([user_ids, item_ids])

    # Ambil jurusan dengan nilai prediksi tertinggi sebagai rekomendasi
    recommended_item_id = item_ids[np.argmax(predictions)]
    recommended_item = list(item_encoder.keys())[list(item_encoder.values()).index(recommended_item_id)]

    return recommended_item

In [52]:
# Contoh penggunaan model untuk merekomendasikan penjurusan

Nama = 145
RB1 = 80
RB2 = 69
RB3 = 81
K1 = 60
K2 = 73
K3 = 77
hasil_rekomendasi = recommend_penjurusan(Nama, RB1, RB2, RB3, K1, K2, K3)
print(f"Rekomendasi penjurusan untuk",Nama," adalah :", hasil_rekomendasi)


Rekomendasi penjurusan untuk 145  adalah : 2


In [43]:
# Cetak keterangan kode
# Diubah manggil data testing dan panggil recommend_penjurusan
print("Keterangan Kode:")
for idx, jurusan in enumerate(enc.classes_):
    print(f"Kode {idx}: {jurusan}")

print("\nData setelah diubah:")
print(dataTraining)

Keterangan Kode:
Kode 0: 0
Kode 1: 1
Kode 2: 2

Data setelah diubah:
        No                  Nama JK   RB1   RB2   RB3    K1    K2    K3  \
0      1.0        Afrelya Amara   P  82.0  79.0  78.0  78.0  75.0  81.0   
1      2.0   Agris Dekar Saputra  L  76.0  77.0  78.0  75.0  76.0  78.0   
2      3.0           Aji Saputra  L  80.0  78.0  70.0  77.0  78.0  76.0   
3      4.0         Akbar maulana  L  86.0  80.0  77.0  78.0  79.0  78.0   
4      5.0  Al Diksha Alfiansyah  L  83.0  77.0  78.0  77.0  78.0  80.0   
..     ...                   ... ..   ...   ...   ...   ...   ...   ...   
145  146.0          Nur Hafidzah  L  81.0  79.0  78.0  78.0  77.0  80.0   
146  147.0     Nurul Septiarahma  P  81.0  85.0  77.0  80.0  78.0  80.0   
147  148.0        Rafles Wijaya   L  80.0  78.0  77.0  80.0  77.0  80.0   
148  149.0         Rara Almahera  P  86.0  79.0  77.0  83.0  77.0  77.0   
149  150.0          Salsabila. R  P  83.0  78.0  76.0  84.0  76.0  77.0   

     Jurusan  user_id  item_id