In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
link_tps = 'https://raw.githubusercontent.com/Capstone-Buddies/Machine-Learning/main/Dataset/SNBT%20Datasets%20-%20TPS.csv'

In [None]:
link_answer_history_tps = 'https://raw.githubusercontent.com/Capstone-Buddies/Machine-Learning/main/Dataset/SNBT%20Datasets%20-%20Answer_History_TPS.csv'

#Load User Answer History TPS Question

In [None]:
data = pd.read_csv(link_answer_history_tps)
data

Unnamed: 0,ID_USER,ID_QUESTION,Question_Category,Question_Description,USER_ANSWER,CORRECT_ANSWER,Duration
0,1,59,PU,"1, 0, 8, 3, 3, 7, 5, 6, 6, 7, 9, 5, 9, 12, x, ...",0,0,76
1,1,481,PK,Seorang mahasiswa mendapat nilai untuk keempat...,0,1,113
2,1,66,PU,"Menjelang liburan sekolah, Budi berencana meng...",2,3,55
3,1,51,PU,Pajak merupakan pungutan wajib yang dibebankan...,2,1,71
4,1,348,PPU,"Menurut Sari (2012:34), stand up comedy adalah...",2,3,53
...,...,...,...,...,...,...,...
355,35,149,PU,Pada sebuah pertemuan yang terdiri atas para k...,2,0,27
356,35,291,PPU,(1) Perkembangan dunia Ilmu Pengetahuan dan Te...,0,2,118
357,35,309,PPU,(1) Kemajuan teknologi tidak dapat dipisahkan ...,0,0,18
358,35,484,PK,Apabila x adalah luas bujur-sangkar yang panja...,2,0,96


#Load SNBT TPS Question

In [None]:
tps_question_data = pd.read_csv(link_tps)
tps_question_data

##Check for null and duplicate values

In [None]:
# Cek null pada semua kolom
null_check = tps_question_data.isnull().sum()
print("Jumlah nilai null di setiap kolom:")
print(null_check)

#Tampilkan baris
rows_with_nulls = tps_question_data[tps_question_data.isnull().any(axis=1)]
print("\nBaris dengan nilai null:")
print(rows_with_nulls)

# Cek duplikat konten untuk kolom 'Question_Descriptions'
duplicate_check = tps_question_data['Questions_Descriptions'].duplicated().sum()
print("\nJumlah duplikat di kolom 'Questions_Descriptions':", duplicate_check)

# Tampilkan baris yang memiliki konten duplikat di kolom 'Question_Descriptions'
duplicate_rows = tps_question_data[tps_question_data['Questions_Descriptions'].duplicated(keep=False)]
print("\nBaris dengan konten duplikat di kolom 'Questions_Descriptions':")
print(duplicate_rows)

Jumlah nilai null di setiap kolom:
ID                        0
Question_Category         0
Questions_Descriptions    0
Choice_1                  0
Choice_2                  0
Choice_3                  0
Choice_4                  0
Right_Answer              0
dtype: int64

Baris dengan nilai null:
Empty DataFrame
Columns: [ID, Question_Category, Questions_Descriptions, Choice_1, Choice_2, Choice_3, Choice_4, Right_Answer]
Index: []

Jumlah duplikat di kolom 'Questions_Descriptions': 0

Baris dengan konten duplikat di kolom 'Questions_Descriptions':
Empty DataFrame
Columns: [ID, Question_Category, Questions_Descriptions, Choice_1, Choice_2, Choice_3, Choice_4, Right_Answer]
Index: []


#Recommendation System

In [None]:
# Step 1: Identify incorrect answers
data['is_correct'] = data['USER_ANSWER'] == data['CORRECT_ANSWER']

In [None]:
# Step 2: Aggregate the errors by category for each user
user_errors = data[~data['is_correct']].groupby(['ID_USER', 'Question_Category']).size().reset_index(name='errors')

In [None]:
# Splitting the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)

In [None]:
# # Define a function to create recommendations
def recommend_next_quiz(user_id, user_errors, total_questions=10, max_category_questions=6):
    user_specific_errors = user_errors[user_errors['ID_USER'] == user_id]
    if user_specific_errors.empty:
        recommended_proportions = {
            'PU': total_questions // 4,
            'PPU': total_questions // 4,
            'PBM': total_questions // 4,
            'PK': total_questions // 4
        }
    else:
        total_errors = user_specific_errors['errors'].sum()
        recommended_proportions = {}
        for _, row in user_specific_errors.iterrows():
            category = row['Question_Category']
            recommended_proportions[category] = min(max_category_questions, round((row['errors'] / total_errors) * total_questions))

        # Calculate remaining questions
        assigned_questions = sum(recommended_proportions.values())
        remaining_questions = total_questions - assigned_questions

        # Distribute remaining questions evenly among all categories
        all_categories = ['PU', 'PPU', 'PBM', 'PK']
        for category in all_categories:
            if category not in recommended_proportions:
                recommended_proportions[category] = 0

        while remaining_questions > 0:
            for category in all_categories:
                if remaining_questions > 0:
                    recommended_proportions[category] += 1
                    remaining_questions -= 1
                else:
                    break

    return recommended_proportions

In [None]:
# Example of generating recommendations for a user
user_id = 1
recommendations = recommend_next_quiz(user_id, user_errors)
print(f'Recommendations for User {user_id}: {recommendations}')

Recommendations for User 1: {'PK': 2, 'PPU': 5, 'PU': 3, 'PBM': 0}


#Get Question Based on Recommendation

In [None]:
# Function to retrieve questions based on category recommendations
def get_recommended_questions(user_id, user_errors, question_data, total_questions=10, max_category_questions=6):
    recommendations = recommend_next_quiz(user_id, user_errors, total_questions, max_category_questions)

    recommended_questions = []
    for category, count in recommendations.items():
        category_questions = question_data[question_data['Question_Category'] == category]
        selected_questions = category_questions.sample(n=count, random_state=42) if count > 0 else pd.DataFrame()
        recommended_questions.append(selected_questions)

    return pd.concat(recommended_questions)

In [None]:
# Display Question
user_id = 1
recommended_questions = get_recommended_questions(user_id, user_errors, tps_question_data)
print(f'Recommended Questions for User {user_id}:')
print(recommended_questions[['ID', 'Question_Category', 'Questions_Descriptions', 'Choice_1', 'Choice_2', 'Choice_3', 'Choice_4', 'Right_Answer']])

Recommended Questions for User 1:
      ID Question_Category                             Questions_Descriptions  \
563  564                PK  Diketahui A={9, 7, 6, 5, 4, 3, 2, 1}. Lima ang...   
532  533                PK                     Jika p = 2a maka nilai 5p + 5a   
360  361               PPU  Minggu-minggu pertama setelah proklamasi adala...   
280  281               PPU  Kain adat Ulos, Uis Karo, dan Uis Beka yang di...   
295  296               PPU  (1) Saat ini, pelaku UMKM harus melek masalah ...   
423  424               PPU  (1)Minum air putih atau air mineral merupakan ...   
393  394               PPU  Meskipun angka pecandu narkoba di Indonesia tu...   
76    77                PU  (1) Perkembangan teknologi informasi (TI) saat...   
18    19                PU  Semua pegawai negeri dapat berbahasa Inggris.\...   
82    83                PU  (1) Sebuah studi menunjukkan bahwa anak yang d...   

                                              Choice_1  \
563             

#Evaluate Model

In [None]:
# Evaluating the model
def evaluate_recommendations(test_data, user_errors):
    true_positives = 0
    total_recommended = 0

    for user_id in test_data['ID_USER'].unique():
        user_test_data = test_data[test_data['ID_USER'] == user_id]
        user_recommendations = recommend_next_quiz(user_id, user_errors)

        for category in user_test_data['Question_Category'].unique():
            questions_in_category = user_test_data[user_test_data['Question_Category'] == category]
            recommended_questions = user_recommendations.get(category, 0)

            # Counting correct predictions (true positives)
            true_positives += questions_in_category[questions_in_category['is_correct']].shape[0]

            # Counting total recommended questions
            total_recommended += recommended_questions

    # Calculate precision and accuracy
    precision = true_positives / total_recommended if total_recommended > 0 else 0
    accuracy = true_positives / len(test_data) if len(test_data) > 0 else 0

    return accuracy, precision



In [None]:
accuracy, precision = evaluate_recommendations(test_data, user_errors)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')

Accuracy: 0.25
Precision: 0.10843373493975904
