In [1]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import joblib

In [2]:

# Define dataset paths
DATASET_PATH = r"C:\Users\PC\OneDrive\Documents\Spam Call Project\audiodataset"  # Update with your dataset folder
classes = ["real_calls", "scam_calls"]  # Real = Not Spam, Fraud = Spam

In [3]:

# Set fixed length for MFCC features (ensures uniform shape)
max_pad_len = 100  # Adjust this based on your dataset

In [4]:
# Feature extraction function
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=22050)  # Load audio
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)  # Extract MFCC features
    return np.mean(mfcc, axis=1)  # Compute mean across time dimension

In [5]:
X, y = [], []
for label in classes:
    folder = os.path.join(DATASET_PATH, label)
    for file in os.listdir(folder):
        file_path = os.path.join(folder, file)
        features = extract_features(file_path)
        X.append(features)
        y.append(label)

  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


In [6]:
X

[array([-3.5601968e+02,  1.0245705e+02, -2.1800255e+01,  3.1256548e+01,
        -6.8070459e-01,  1.0318842e+01, -1.0685477e+01, -1.9429264e+00,
        -4.5692601e+00,  7.4524417e+00, -7.2160673e+00, -5.8790582e-01,
         3.4491367e+00, -3.4453478e+00,  3.6376250e+00, -1.6576549e+00,
        -2.9500148e+00,  3.3844063e+00, -2.0156531e+00,  2.6432378e+00,
        -2.7474527e+00,  1.1478071e+00,  1.4474545e+00, -1.7456318e+00,
        -4.2926079e-01,  1.4500946e-01, -1.6296147e-01, -4.3466637e-01,
         1.1794472e-01, -2.0186496e-01, -8.0052131e-01,  3.4058514e-01,
        -2.2561425e-01, -7.0972514e-01,  4.2432323e-01, -7.3025912e-01,
        -3.0902994e-01, -1.0544515e+00,  2.6774782e-01, -1.0900500e+00],
       dtype=float32),
 array([-3.4051233e+02,  1.0719338e+02, -1.7955534e+01,  3.3404202e+01,
        -2.8066454e+00,  8.3003864e+00, -5.6812782e+00,  7.0646918e-01,
        -5.3435674e+00,  1.7370563e+00, -7.9517560e+00, -1.6033194e+00,
         3.7580743e+00, -3.3920414e+00, 

In [7]:
y


['real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_calls',
 'real_cal

In [8]:
from collections import Counter
print(Counter(y))  # Check how many real vs scam calls


Counter({'real_calls': 5284, 'scam_calls': 635})


In [9]:
from sklearn.utils import class_weight
weights = class_weight.compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weights = {i: weights[i] for i in range(len(weights))}


In [10]:
# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

In [11]:
# Encode labels (Real = 0, Fraud = 1)
encoder = LabelEncoder()
y = encoder.fit_transform(y)
joblib.dump(encoder, "label_encoder.pkl")  # Save label encoder

['label_encoder.pkl']

In [12]:
# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)
joblib.dump(scaler, "scaler.pkl")  # Save scaler

['scaler.pkl']

In [13]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Reshape for CNN + LSTM
X_train = X_train.reshape(-1, 40, 1)
X_test = X_test.reshape(-1, 40, 1)

In [15]:
# Build CNN + LSTM model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(64, kernel_size=3, activation="relu", input_shape=(40, 1)),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Conv1D(128, kernel_size=3, activation="relu"),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")  # Binary classification
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=12, batch_size=16, validation_data=(X_test, y_test))

# Save the model
model.save("fraud_detection_model.h5")
print("✅ Model trained and saved successfully!") 


Epoch 1/12
[1m296/296[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.8716 - loss: 0.3565 - val_accuracy: 0.9046 - val_loss: 0.2460
Epoch 2/12
[1m296/296[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9125 - loss: 0.2264 - val_accuracy: 0.9265 - val_loss: 0.2083
Epoch 3/12
[1m296/296[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9354 - loss: 0.1856 - val_accuracy: 0.9383 - val_loss: 0.1883
Epoch 4/12
[1m296/296[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9396 - loss: 0.1749 - val_accuracy: 0.9392 - val_loss: 0.1958
Epoch 5/12
[1m296/296[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9445 - loss: 0.1646 - val_accuracy: 0.9443 - val_loss: 0.1796
Epoch 6/12
[1m296/296[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9512 - loss: 0.1500 - val_accuracy: 0.9375 - val_loss: 0.1946
Epoch 7/12
[1m296/296[0m 



✅ Model trained and saved successfully!
