# **Fake News Detection - Method 2 (Training)**

Ignore warnings

In [1]:
import os
import warnings

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

Constants and hyperparameters

In [2]:
from pathlib import Path


# Constants
DATASETS = ["ISOT", "LIAR"]
PATHS = {
    "ISOT": {
        "train": "data/processed/ISOT/isot_train.pkl",
        "test": "data/processed/ISOT/isot_test.pkl",
        "aux": "data/features/ISOT/bow_min30_chi2700.joblib"
    },
    "LIAR": {
        "train": "data/processed/LIAR/liar_train.pkl",
        "test": "data/processed/LIAR/liar_test.pkl",
        "aux": "data/features/LIAR/tfidf_min40_chi2700.joblib"
    }
}
SAVED_MODELS_PATH = Path("saved_models/method2")

# Hyperparameters
W2V_DIMS = [100, 200] # Word2Vec dimensions
WINDOW_SIZES = [5, 8] # Word2Vec window sizes
MAX_LEN = 250 # pad length

Some functions to build model

In [3]:
import numpy as np
from gensim.models import Word2Vec


# Split text (into tokens)
def split_text(text):
    return text.split()

# Build Word2Vec
def build_w2v(sentences, dim, window):
    model = Word2Vec(
        sentences=sentences,
        vector_size=dim, # dimension
        window=window, # window size
        min_count=2, # minimum count
        sg=1, # skip-gram
        workers=4,
        epochs=5,
        seed=42
    )
    return model

# Build embedding matrix
def build_embedding_matrix(word_index, w2v_model, dim):
    # Initialize embedding matrix for all tokens with small random values
    emb = np.random.uniform(-0.05, 0.05, (len(word_index) + 1, dim)).astype(np.float32)

    # Update embedding matrix with Word2Vec vectors for known words
    for word, idx in word_index.items():
        if word in w2v_model.wv:
            emb[idx] = w2v_model.wv[word]

    return emb

Train model(Bi-LSTM)

In [4]:
from tensorflow.keras import mixed_precision # type: ignore
mixed_precision.set_global_policy("mixed_float16")

import itertools
import joblib
from scipy import sparse
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models, callbacks, optimizers # type: ignore
from tensorflow.keras.preprocessing.text import Tokenizer # type: ignore
from keras.preprocessing.sequence import pad_sequences


for ds in DATASETS:
    # Load train set and features that were best at method 1
    train_df = joblib.load(PATHS[ds]["train"])
    aux_obj = joblib.load(PATHS[ds]["aux"]) # method 1 features
    X_aux = aux_obj["X"]
    if sparse.issparse(X_aux): X_aux = X_aux.toarray().astype("float16")
    y = train_df["label"].values
    
    # Split text that was already cleaned
    tokens_list = train_df["cleaned"].apply(split_text).tolist()
    
    for dim, win in itertools.product(W2V_DIMS, WINDOW_SIZES):
        tag = f"{ds}_{dim}d_win{win}"
        print(f"\nTraining on: dataset={ds}, w2v_dim={dim}, w2v_window_size={win} model=Bi-LSTM")
        
        # Build Word2Vec
        w2v_path = SAVED_MODELS_PATH / "w2v" / f"{tag}.model"
        if w2v_path.exists():
            w2v_model = Word2Vec.load(str(w2v_path))
        else:
            w2v_model = build_w2v(tokens_list, dim, win)
            w2v_model.save(str(w2v_path))
        
        # Build tokenizer and tokenize
        tok_path = SAVED_MODELS_PATH / "tokenizer" / f"{ds}.pkl"
        if tok_path.exists():
            tok = joblib.load(tok_path)
        else:
            tok = Tokenizer(oov_token="<UNK>", filters="") # when tokenizer encounters a new word, it is marked as unknown
            tok.fit_on_texts(tokens_list) # gives each word an id
            joblib.dump(tok, tok_path)
        
        # Create sequences
        sequences = tok.texts_to_sequences(tokens_list)
        X_seq = pad_sequences(sequences, maxlen=MAX_LEN, padding="post", truncating="post") # padding for fitting into input vector of Bi-LSTM
        
        # Build embedding matrix
        emb_matrix = build_embedding_matrix(tok.word_index, w2v_model, dim)
        
        # Split dataset into train and validation
        X1_tr, X1_val, X2_tr, X2_val, y_tr, y_val = train_test_split(X_seq, X_aux, y, test_size=0.2, stratify=y, random_state=42)
        
        # Create input layers
        seq_input = layers.Input(shape=(MAX_LEN,), name="seq_input") # sequences
        aux_input = layers.Input(shape=(X_aux.shape[1],), name="aux_input") # method 1 features
        
        # Embedding layer
        emb = layers.Embedding(
            input_dim=emb_matrix.shape[0],
            output_dim=dim,
            weights=[emb_matrix],
            input_length=MAX_LEN,
            trainable=False,
            name="embedding",
        )(seq_input)
        
        # Bi-LSTM layers
        x = layers.SpatialDropout1D(0.25)(emb)

        x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
        x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
        x = layers.GlobalMaxPooling1D()(x)
        
        merged = layers.concatenate([x, aux_input]) # merge LSTM output vector with method 1 features
        merged = layers.Dropout(0.4)(merged) # randomly turns neurons off to prevent overfitting
        merged = layers.Dense(128, activation="relu")(merged)
        out = layers.Dense(1, activation="sigmoid", dtype="float32")(merged)
        
        # Build model
        model = models.Model([seq_input, aux_input], out, name=f"BiLSTM_{tag}")
        model.compile(
            optimizer=optimizers.Adam(learning_rate=1e-3),
            loss="binary_crossentropy",
            metrics=["accuracy"]
        )
        model.summary(line_length=120)
        
        # Callbacks
        ckpt_path = SAVED_MODELS_PATH / "bilstm" / f"{tag}.keras"
        callback_list = [
            callbacks.ModelCheckpoint(ckpt_path, monitor="val_loss", save_best_only=True, verbose=1), # saves only best model
            callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, verbose=1), # reduces learning rate adaptively
            callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True, verbose=1) # early stopping after 5 unimproved epochs
        ]
        
        # Training model
        model.fit(
            x=[X1_tr, X2_tr], y=y_tr,
            validation_data=([X1_val, X2_val], y_val),
            epochs=15,
            batch_size=512,
            verbose="auto",
            callbacks=callback_list
        )

2025-07-25 02:17:40.517688: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753399060.529201  163962 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753399060.532606  163962 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1753399060.542757  163962 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1753399060.542772  163962 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1753399060.542773  163962 computation_placer.cc:177] computation placer alr


Training on: dataset=ISOT, w2v_dim=100, w2v_window_size=5 model=Bi-LSTM


I0000 00:00:1753399069.302677  163962 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3586 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Epoch 1/15


2025-07-25 02:17:52.059174: E tensorflow/core/util/util.cc:131] oneDNN supports DT_HALF only on platforms with AVX-512. Falling back to the default Eigen-based implementation if present.
I0000 00:00:1753399072.801762  165367 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - accuracy: 0.8831 - loss: 0.2884
Epoch 1: val_loss improved from inf to 0.01205, saving model to saved_models/method2/bilstm/ISOT_100d_win5.keras
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 111ms/step - accuracy: 0.8842 - loss: 0.2859 - val_accuracy: 0.9971 - val_loss: 0.0120 - learning_rate: 0.0010
Epoch 2/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - accuracy: 0.9941 - loss: 0.0178
Epoch 2: val_loss improved from 0.01205 to 0.00724, saving model to saved_models/method2/bilstm/ISOT_100d_win5.keras
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 105ms/step - accuracy: 0.9941 - loss: 0.0178 - val_accuracy: 0.9975 - val_loss: 0.0072 - learning_rate: 0.0010
Epoch 3/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 0.9959 - loss: 0.0138
Epoch 3: val_loss improved from 0.00724 to 0.00482, saving model to saved

Epoch 1/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step - accuracy: 0.8654 - loss: 0.3065
Epoch 1: val_loss improved from inf to 0.01172, saving model to saved_models/method2/bilstm/ISOT_100d_win8.keras
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 120ms/step - accuracy: 0.8667 - loss: 0.3038 - val_accuracy: 0.9961 - val_loss: 0.0117 - learning_rate: 0.0010
Epoch 2/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - accuracy: 0.9939 - loss: 0.0199
Epoch 2: val_loss improved from 0.01172 to 0.00796, saving model to saved_models/method2/bilstm/ISOT_100d_win8.keras
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 111ms/step - accuracy: 0.9939 - loss: 0.0199 - val_accuracy: 0.9981 - val_loss: 0.0080 - learning_rate: 0.0010
Epoch 3/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - accuracy: 0.9954 - loss: 0.0148
Epoch 3: val_loss improved from 0.00796 to 0.00599, saving mode

Epoch 1/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.8754 - loss: 0.3014
Epoch 1: val_loss improved from inf to 0.00997, saving model to saved_models/method2/bilstm/ISOT_200d_win5.keras
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 133ms/step - accuracy: 0.8766 - loss: 0.2987 - val_accuracy: 0.9972 - val_loss: 0.0100 - learning_rate: 0.0010
Epoch 2/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - accuracy: 0.9949 - loss: 0.0191
Epoch 2: val_loss improved from 0.00997 to 0.00514, saving model to saved_models/method2/bilstm/ISOT_200d_win5.keras
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 123ms/step - accuracy: 0.9949 - loss: 0.0191 - val_accuracy: 0.9985 - val_loss: 0.0051 - learning_rate: 0.0010
Epoch 3/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.9974 - loss: 0.0082
Epoch 3: val_loss improved from 0.00514 to 0.00431, saving 

Epoch 1/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.8574 - loss: 0.3267
Epoch 1: val_loss improved from inf to 0.01065, saving model to saved_models/method2/bilstm/ISOT_200d_win8.keras
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 187ms/step - accuracy: 0.8588 - loss: 0.3238 - val_accuracy: 0.9968 - val_loss: 0.0106 - learning_rate: 0.0010
Epoch 2/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - accuracy: 0.9953 - loss: 0.0177
Epoch 2: val_loss improved from 0.01065 to 0.00561, saving model to saved_models/method2/bilstm/ISOT_200d_win8.keras
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 122ms/step - accuracy: 0.9953 - loss: 0.0177 - val_accuracy: 0.9983 - val_loss: 0.0056 - learning_rate: 0.0010
Epoch 3/15
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - accuracy: 0.9965 - loss: 0.0118
Epoch 3: val_loss improved from 0.00561 to 0.00546, saving 

Epoch 1/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - accuracy: 0.5652 - loss: 0.6823
Epoch 1: val_loss improved from inf to 0.66529, saving model to saved_models/method2/bilstm/LIAR_100d_win5.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 154ms/step - accuracy: 0.5650 - loss: 0.6821 - val_accuracy: 0.5882 - val_loss: 0.6653 - learning_rate: 0.0010
Epoch 2/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.5898 - loss: 0.6678
Epoch 2: val_loss improved from 0.66529 to 0.66393, saving model to saved_models/method2/bilstm/LIAR_100d_win5.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 129ms/step - accuracy: 0.5900 - loss: 0.6678 - val_accuracy: 0.5945 - val_loss: 0.6639 - learning_rate: 0.0010
Epoch 3/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - accuracy: 0.6031 - loss: 0.6620
Epoch 3: val_loss improved from 0.66393 to 0.65911, saving m

Epoch 1/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - accuracy: 0.5309 - loss: 0.6890
Epoch 1: val_loss improved from inf to 0.67389, saving model to saved_models/method2/bilstm/LIAR_100d_win8.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 167ms/step - accuracy: 0.5319 - loss: 0.6888 - val_accuracy: 0.5745 - val_loss: 0.6739 - learning_rate: 0.0010
Epoch 2/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step - accuracy: 0.5698 - loss: 0.6719
Epoch 2: val_loss improved from 0.67389 to 0.66184, saving model to saved_models/method2/bilstm/LIAR_100d_win8.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 130ms/step - accuracy: 0.5705 - loss: 0.6716 - val_accuracy: 0.5848 - val_loss: 0.6618 - learning_rate: 0.0010
Epoch 3/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step - accuracy: 0.5970 - loss: 0.6655
Epoch 3: val_loss improved from 0.66184 to 0.65906, saving m

Epoch 1/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step - accuracy: 0.5520 - loss: 0.6848
Epoch 1: val_loss improved from inf to 0.66601, saving model to saved_models/method2/bilstm/LIAR_200d_win5.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 162ms/step - accuracy: 0.5522 - loss: 0.6846 - val_accuracy: 0.5804 - val_loss: 0.6660 - learning_rate: 0.0010
Epoch 2/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.5902 - loss: 0.6655
Epoch 2: val_loss improved from 0.66601 to 0.66053, saving model to saved_models/method2/bilstm/LIAR_200d_win5.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 143ms/step - accuracy: 0.5901 - loss: 0.6655 - val_accuracy: 0.5936 - val_loss: 0.6605 - learning_rate: 0.0010
Epoch 3/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step - accuracy: 0.6067 - loss: 0.6577
Epoch 3: val_loss improved from 0.66053 to 0.65795, saving m

Epoch 1/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - accuracy: 0.5336 - loss: 0.6858
Epoch 1: val_loss improved from inf to 0.67122, saving model to saved_models/method2/bilstm/LIAR_200d_win8.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 161ms/step - accuracy: 0.5350 - loss: 0.6854 - val_accuracy: 0.5828 - val_loss: 0.6712 - learning_rate: 0.0010
Epoch 2/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step - accuracy: 0.5767 - loss: 0.6701
Epoch 2: val_loss improved from 0.67122 to 0.65952, saving model to saved_models/method2/bilstm/LIAR_200d_win8.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 138ms/step - accuracy: 0.5771 - loss: 0.6699 - val_accuracy: 0.5950 - val_loss: 0.6595 - learning_rate: 0.0010
Epoch 3/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - accuracy: 0.6150 - loss: 0.6583
Epoch 3: val_loss improved from 0.65952 to 0.65861, saving m