In [1]:
import os
import random

import numpy as np
from ScratchRnn import ScratchRNNModel
from EmbeddingLayer import EmbeddingLayer
from SimpleRnnLayer import SimpleRNNLayer
from BidirectionalLayer import BidirectionalLayer
from DropoutLayer import DropoutLayer
from DenseLayer import DenseLayer
from sklearn.metrics import f1_score


ModuleNotFoundError: No module named 'sklearn'

In [1]:
import tensorflow as tf
SEED = 42
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

ImportError: Traceback (most recent call last):
  File "c:\Users\ASUS\AppData\Local\Programs\Python\Python312\Lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 73, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: A dynamic link library (DLL) initialization routine failed.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.

In [None]:
def create_rnn_model_from_keras(keras_model, dropout_rate=0.5) -> ScratchRNNModel:
    layers = []
    
    # 1. Embedding Layer
    embedding_keras_layer = keras_model.layers[1]
    embedding_weights = embedding_keras_layer.get_weights()[0]
    layers.append(EmbeddingLayer(embedding_weights))
    
    # 2. RNN Layers
    rnn_layer_idx_start = 2 
    current_keras_idx = rnn_layer_idx_start
    
    while 'rnn' in keras_model.layers[current_keras_idx].name or 'bidirectional' in keras_model.layers[current_keras_idx].name:
        keras_layer = keras_model.layers[current_keras_idx]
        
        if 'bidirectional' in keras_layer.name:
            # Layer Bidirectional
            forward_rnn = keras_layer.forward_layer
            backward_rnn = keras_layer.backward_layer
            
            fw_weights = forward_rnn.get_weights()
            bw_weights = backward_rnn.get_weights()
            
            rnn_units = fw_weights[1].shape[0]
            
            forward_layer = SimpleRNNLayer(W_xh=fw_weights[0], W_hh=fw_weights[1], b_h=fw_weights[2], rnn_units=rnn_units)
            backward_layer = SimpleRNNLayer(W_xh=bw_weights[0], W_hh=bw_weights[1], b_h=bw_weights[2], rnn_units=rnn_units)
            
            layers.append(BidirectionalLayer(forward_layer, backward_layer))
            print(f"Layer {len(layers)-1}: Bidirectional(SimpleRNN), units: {rnn_units}")
            
        else: # Layer Unidirectional
            weights = keras_layer.get_weights()
            rnn_units = weights[1].shape[0]
            rnn_layer = SimpleRNNLayer(W_xh=weights[0], W_hh=weights[1], b_h=weights[2], rnn_units=rnn_units)
            layers.append(rnn_layer)
            print(f"Layer {len(layers)-1}: SimpleRNN, units: {rnn_units}")
            
        current_keras_idx += 1
        if current_keras_idx >= len(keras_model.layers):
            break

    # 3. Dropout Layer 
    layers.append(DropoutLayer(rate=dropout_rate))
    print(f"Layer {len(layers)-1}: Dropout, rate: {dropout_rate}")

    # 4. Dense Layer
    dense_keras_layer = [l for l in keras_model.layers if 'dense' in l.name][-1]
    dense_weights, dense_bias = dense_keras_layer.get_weights()
    layers.append(DenseLayer(weights=dense_weights, bias=dense_bias, activation='softmax'))
    print(f"Layer {len(layers)-1}: Dense, shape: {dense_weights.shape}, activation: softmax")

    return ScratchRNNModel(layers)

In [None]:


def compare_models(keras_model, scratch_model, X_test, y_test):
    """
    Membandingkan prediksi dan skor F1 dari model Keras dan model scratch.
    """
    print("\nMemperoleh prediksi dari model Keras...")
    keras_pred_probs = keras_model.predict(X_test, verbose=0)
    keras_predictions = np.argmax(keras_pred_probs, axis=1)
    
    print("Memperoleh prediksi dari model scratch...")
    # Menggunakan metode .predict() dari SimpleRNNModel (yang memanggil .forward(training=False))
    scratch_pred_probs = scratch_model.predict(X_test) 
    scratch_predictions = np.argmax(scratch_pred_probs, axis=1)
    
    # Hitung skor F1
    keras_f1 = f1_score(y_test, keras_predictions, average='macro')
    scratch_f1 = f1_score(y_test, scratch_predictions, average='macro')
    
    print(f"\nSkor F1 Keras: {keras_f1:.6f}")
    print(f"Skor F1 Scratch: {scratch_f1:.6f}")
    
    difference = abs(keras_f1 - scratch_f1)
    print(f"Perbedaan F1 (absolut): {difference:.8f}") # Tingkatkan presisi untuk perbedaan kecil
    
    # Cek kesamaan probabilitas output (lebih ketat)
    prob_diff = np.abs(keras_pred_probs - scratch_pred_probs).max()
    print(f"Perbedaan maksimum probabilitas output: {prob_diff:.8e}")

    if prob_diff < 1e-6: # Toleransi untuk perbedaan floating point
        print("Output probabilitas model Keras dan Scratch sangat mirip.")
    else:
        print("PERINGATAN: Output probabilitas model Keras dan Scratch memiliki perbedaan signifikan.")
        
    return keras_f1, scratch_f1, keras_predictions, scratch_predictions


In [None]:
from src.rnn.preprocessing.text_vectorizer import create_text_vectorizer


def prepare_test_data(test_df, train_df, vocab_size, sequence_length):
    """
    Mempersiapkan data tes: membuat vectorizer, memetakan label, dan membuat sekuens.
    """
    print("\nMembuat text vectorizer menggunakan data training...")
    # Pastikan create_text_vectorizer menggunakan vocab_size dan sequence_length yang konsisten
    # dan mengembalikan objek Keras TextVectorization atau yang kompatibel
    vectorizer = create_text_vectorizer(
        train_df['text'].values, 
        max_tokens=vocab_size, 
        output_sequence_length=sequence_length
    )
    
    # Pemetaan label
    label_map = {'negative': 0, 'neutral': 1, 'positive': 2}
    y_test_mapped = test_df['label'].map(label_map).values
    
    # Ubah teks menjadi tensor dan terapkan vectorizer
    test_texts_tensor = tf.convert_to_tensor(test_df['text'].values, dtype=tf.string)
    test_sequences = vectorizer(test_texts_tensor).numpy() # .numpy() untuk mendapatkan array NumPy
    
    return test_sequences, y_test_mapped


In [None]:
import pandas as pd
from src.rnn.keras_model import build_simple_rnn_model

VOCAB_SIZE = 10000        
SEQ_LENGTH = 100          
EMBEDDING_DIM = 128       
RNN_UNITS = 32            
NUM_CLASSES = 3           
NUM_LAYERS = 3            
BIDIRECTIONAL = True      
KERAS_DROPOUT_RATE = 0.5  

print("Parameter Model:")
print(f"  VOCAB_SIZE: {VOCAB_SIZE}, SEQ_LENGTH: {SEQ_LENGTH}, EMBEDDING_DIM: {EMBEDDING_DIM}")
print(f"  RNN_UNITS: {RNN_UNITS}, NUM_CLASSES: {NUM_CLASSES}, NUM_LAYERS: {NUM_LAYERS}")
print(f"  BIDIRECTIONAL: {BIDIRECTIONAL}, KERAS_DROPOUT_RATE: {KERAS_DROPOUT_RATE}")

base_path = '.' 
test_df = pd.read_csv(os.path.join(base_path, 'dataset/test.csv'))
train_df = pd.read_csv(os.path.join(base_path, "dataset/train.csv"))
print("Dataset berhasil dimuat.")

print("\nMempersiapkan data tes...")
X_test, y_test = prepare_test_data(test_df, train_df, VOCAB_SIZE, SEQ_LENGTH)

print(f"\nBentuk data tes (X_test): {X_test.shape}")
print(f"Bentuk label tes (y_test): {y_test.shape}")
print(f"Label unik: {np.unique(y_test)}")

print("\nMembangun model Keras...")
keras_model = build_simple_rnn_model(
    vocab_size=VOCAB_SIZE,
    embedding_dim=EMBEDDING_DIM,
    rnn_units=RNN_UNITS,
    num_classes=NUM_CLASSES,
    sequence_length=SEQ_LENGTH,
    bidirectional=BIDIRECTIONAL,
    num_layers=NUM_LAYERS,
    dropout_rate=KERAS_DROPOUT_RATE 
)


weights_path = os.path.join(base_path, f'weight_comparison_rnn.weights.h5')
print(f"Mencoba memuat bobot Keras dari: {weights_path}")
keras_model.load_weights(weights_path) 
print("Bobot Keras berhasil dimuat!")
    
print("\nRingkasan Model Keras:")
keras_model.summary(line_length=100)

print("\nMembuat model scratch menggunakan komponen yang telah direfactor...")
scratch_model = create_rnn_model_from_keras(keras_model, dropout_rate=KERAS_DROPOUT_RATE)

print("\n" + "="*80)
print("MEMBANDINGKAN MODEL KERAS DAN SCRATCH")
print("="*80)

keras_f1, scratch_f1, keras_preds, scratch_preds = compare_models(keras_model, scratch_model, X_test, y_test)

print(f"\n" + "="*80)
print("HASIL AKHIR PERBANDINGAN")
print("="*80)
print(f"Skor F1 Keras: {keras_f1:.6f}")
print(f"Skor F1 Scratch: {scratch_f1:.6f}")

difference_f1 = abs(keras_f1 - scratch_f1)
print(f"Perbedaan F1 (absolut): {difference_f1:.8f}")

if difference_f1 < 1e-7: 
    print("\nBERHASIL: Skor F1 model Keras dan Scratch sangat cocok!")
else:
    print("\nMASALAH: Skor F1 model Keras dan Scratch TIDAK cocok.")

results_dir = os.path.join(base_path, "results/comparison")
os.makedirs(results_dir, exist_ok=True)

results_df = pd.DataFrame({
    'true_label': y_test,
    'keras_pred_label': keras_preds,
    'scratch_pred_label': scratch_preds,
    'match': keras_preds == scratch_preds
})

comparison_filename = os.path.join(results_dir, f'model_comparison_L{NUM_LAYERS}_U{RNN_UNITS}_B{int(BIDIRECTIONAL)}.csv')
results_df.to_csv(comparison_filename, index=False)
print(f"\nHasil perbandingan detail disimpan ke: '{comparison_filename}'")

prediction_agreement = results_df['match'].mean()
print(f"Kecocokan prediksi (label): {prediction_agreement:.4f}")

if prediction_agreement == 1.0 and difference_f1 < 1e-7:
    print("Semua prediksi label cocok dan skor F1 identik. Implementasi scratch berhasil!")
else:
    print("Ada perbedaan dalam prediksi label atau skor F1. Perlu investigasi lebih lanjut.")