# Step 1: Data Loading and normalization

In [15]:
# Install sklearn if not already installed
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [5]:
# Install pywt if not already installed
%pip install PyWavelets

Collecting PyWaveletsNote: you may need to restart the kernel to use updated packages.

  Downloading pywavelets-1.6.0-cp311-cp311-win_amd64.whl.metadata (9.0 kB)
Downloading pywavelets-1.6.0-cp311-cp311-win_amd64.whl (4.3 MB)
   ---------------------------------------- 0.0/4.3 MB ? eta -:--:--
   ---------------------------------------- 0.1/4.3 MB 2.7 MB/s eta 0:00:02
   ---------------------------------------- 0.1/4.3 MB 2.7 MB/s eta 0:00:02
    --------------------------------------- 0.1/4.3 MB 787.7 kB/s eta 0:00:06
    --------------------------------------- 0.1/4.3 MB 512.0 kB/s eta 0:00:09
    --------------------------------------- 0.1/4.3 MB 512.0 kB/s eta 0:00:09
    --------------------------------------- 0.1/4.3 MB 393.8 kB/s eta 0:00:11
    --------------------------------------- 0.1/4.3 MB 393.8 kB/s eta 0:00:11
    --------------------------------------- 0.1/4.3 MB 393.8 kB/s eta 0:00:11
    --------------------------------------- 0.1/4.3 MB 393.8 kB/s eta 0:00:11
    --

In [16]:
# Install tensorflow if not already installed
%pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.16.1-cp311-cp311-win_amd64.whl.metadata (3.5 kB)
Collecting tensorflow-intel==2.16.1 (from tensorflow)
  Downloading tensorflow_intel-2.16.1-cp311-cp311-win_amd64.whl.metadata (5.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading gast-0.5.4-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting h5py>=3.10.0 (from tensorflow-

In [17]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np

# Paths to your data folders
# Make sure you are running this notebook in the same directory as the data folders
truth_path = '/Truth_Sessions/1_BandPass_Filtered/'
lie_path = '/Lie_Sessions/1_BandPass_Filtered/'

# Function to load all CSVs and concatenate them into a single DataFrame
def load_and_concatenate_data(path):
    data_frames = []
    for filename in os.listdir(path):
        if filename.endswith('.csv'):
            df = pd.read_csv(os.path.join(path, filename))
            data_frames.append(df)
    concatenated_df = pd.concat(data_frames, ignore_index=True)
    return concatenated_df

truth_data = load_and_concatenate_data(truth_path)
lie_data = load_and_concatenate_data(lie_path)

# Function to normalize data
scaler = StandardScaler()
def normalize_data(df):
    return scaler.fit_transform(df)

truth_data = normalize_data(truth_data)
lie_data = normalize_data(lie_data)



# Step 2: Segmenting the Data

In [18]:
# Function to segment data
def segment_data(df, window_size=128, overlap=64):
    segments = []
    for start in range(0, len(df) - window_size, overlap):
        segment = df[start:start + window_size, :]
        segments.append(segment)
    return np.array(segments)

# Segment the data
window_size = 128  # 1 second of data
overlap = 64  # 50% overlap

truth_segments = segment_data(truth_data, window_size, overlap)
lie_segments = segment_data(lie_data, window_size, overlap)

# Step 3: Feature Extraction with DWT

In [19]:
import pywt

# Function to extract DWT features
def extract_dwt_features(segments, wavelet='db4', level=4):
    features = []
    for segment in segments:
        segment_features = []
        for channel in range(segment.shape[1]):
            coeffs = pywt.wavedec(segment[:, channel], wavelet, level=level)
            coeffs_flat = np.hstack(coeffs)
            segment_features.append(coeffs_flat)
        features.append(np.hstack(segment_features))
    return np.array(features)

# Extract DWT features
truth_features = extract_dwt_features(truth_segments)
lie_features = extract_dwt_features(lie_segments)

# Step 4: Preparing Data for CNN

In [21]:
# Create labels: 1 for truth, 0 for lie
truth_labels = np.ones(truth_features.shape[0])
lie_labels = np.zeros(lie_features.shape[0])

# Combine features and labels
X = np.vstack((truth_features, lie_features))
y = np.hstack((truth_labels, lie_labels))

# Split data into training and testing sets
from sklearn.model_selection import train_test_split

# Ensure both classes are in the test set by using stratified split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Verify the split
print("Training set class distribution:", np.bincount(y_train.astype(int)))
print("Test set class distribution:", np.bincount(y_test.astype(int)))

Training set class distribution: [3238 3238]
Test set class distribution: [810 810]


# Step 5: Define and Train the CNN

In [22]:
import tensorflow as tf
from tensorflow.keras import layers, models,callbacks

# Define the CNN model
model = models.Sequential()

# Convolution Layers
# Stage 1
model.add(layers.Conv1D(256, 3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(tf.keras.layers.BatchNormalization())
model.add(layers.MaxPooling1D(2))
model.add(layers.Dropout(0.25))

# Stage 2
model.add(layers.Conv1D(128, 3, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(layers.MaxPooling1D(2))
model.add(layers.Dropout(0.25))

# Stage 3
model.add(layers.Conv1D(64, 3, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(layers.MaxPooling1D(2))
model.add(layers.Dropout(0.25))

# Flatten
model.add(layers.Flatten())

# Fully Connected Layers
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))

# Output
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Reshape data for the CNN
X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Define callbacks for early stopping and model checkpointing
callbacks = [
    callbacks.EarlyStopping(patience=5, restore_best_weights=True),
    callbacks.ModelCheckpoint('best_model.keras', save_best_only=True)
]

# Train the model
history = model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test), callbacks=callbacks)

# Load the best model
model.load_weights('best_model.keras')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 211ms/step - accuracy: 0.5063 - loss: 0.6208 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 2/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 214ms/step - accuracy: 0.7168 - loss: 0.5351 - val_accuracy: 0.5790 - val_loss: 0.6563
Epoch 3/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 214ms/step - accuracy: 0.8857 - loss: 0.4785 - val_accuracy: 0.7160 - val_loss: 0.5647
Epoch 4/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 216ms/step - accuracy: 0.8274 - loss: 0.4834 - val_accuracy: 0.8901 - val_loss: 0.4207
Epoch 5/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 201ms/step - accuracy: 0.8626 - loss: 0.4359 - val_accuracy: 0.8105 - val_loss: 0.4709
Epoch 6/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 190ms/step - accuracy: 0.8601 - loss: 0.4267 - val_accuracy: 0.8895 - val_loss: 0.3807
Epoch 7/10

# Step 6: Evaluate the Model

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, f1_score

# Make predictions
y_pred = model.predict(X_test_cnn)
y_pred_classes = (y_pred > 0.5).astype("int32")

# Evaluate the model
print(classification_report(y_test, y_pred_classes, zero_division=1))
print("F1 Score:", f1_score(y_test, y_pred_classes, zero_division=1))


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step
              precision    recall  f1-score   support

         0.0       0.99      0.84      0.91       810
         1.0       0.86      1.00      0.92       810

    accuracy                           0.92      1620
   macro avg       0.93      0.92      0.92      1620
weighted avg       0.93      0.92      0.92      1620

F1 Score: 0.9237822349570202
