In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
from matplotlib.figure import figaspect
import warnings
import os
warnings.filterwarnings("ignore")
os.chdir('C:\\')


In [3]:
# Function to unroll data
def unroll(data, sequence_length):
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    return np.asarray(result)

# Reading and preparing data from CSV
df_test = pd.read_csv("./Train-Test Data/data_test_16.csv")
df_train = pd.read_csv("./Train-Test Data/data_train_16.csv")

In [6]:
print(len(df_train))
print(len(df_test))
print(len(df_test)/ len(df_train))

df_test.head()

778998
56144
0.07207207207207207


Unnamed: 0,t,Input,Output,Frequency,Pressure,Normalized Input,Output mean,Normalized Output,Output_div_mean
0,0.0,0.239258,3.041992,30,4.8859,2.324901,3.165,0.961135,1.055085
1,0.000285,0.22583,3.066406,30,4.8859,2.250964,3.165,0.968849,1.063553
2,0.00057,0.224609,3.076172,30,4.8859,2.201673,3.165,0.971934,1.06694
3,0.000855,0.211182,3.056641,30,4.8859,2.119521,3.165,0.965763,1.060166
4,0.00114,0.20874,3.041992,30,4.8859,2.012723,3.165,0.961135,1.055085


In [7]:
# Split features and target
features_train = df_train[['t', 'Frequency', 'Normalized Input']]
target_train = df_train['Output_div_mean']
features_test = df_test[['t', 'Frequency', 'Normalized Input']]
target_test = df_test['Output_div_mean']

# Normalize the features and target using StandardScaler
scaler_features = StandardScaler()
scaler_target = StandardScaler()

In [12]:
# Fit and transform the training data, transform the test data
features_train_scaled = scaler_features.fit_transform(features_train)
target_train_scaled = scaler_target.fit_transform(target_train.values.reshape(-1, 1))
features_test_scaled = scaler_features.transform(features_test)
target_test_scaled = scaler_target.transform(target_test.values.reshape(-1, 1))

In [13]:

# Important parameters and train/test size
prediction_time = 1
unroll_length = 20

# Train data
x_train = features_train_scaled[:-prediction_time]
y_train = target_train_scaled[:-prediction_time]

# Test data
x_test = features_test_scaled[:-prediction_time]
y_test = target_test_scaled[:-prediction_time]

In [14]:
# Adapt the datasets for the sequence data shape
x_train = unroll(x_train, unroll_length)
x_test = unroll(x_test, unroll_length)
y_train = y_train[-x_train.shape[0]:]
y_test = y_test[-x_test.shape[0]:]

# See the shape
print("x_train", x_train.shape)
print("y_train", y_train.shape)
print("x_test", x_test.shape)
print("y_test", y_test.shape)

# Specify the input shape (example: a 1D signal with 30 time steps and 3 features)
input_shape = (unroll_length, 3)

x_train (778977, 20, 3)
y_train (778977, 1)
x_test (56123, 20, 3)
y_test (56123, 1)


In [15]:
def create_transformer_model(input_shape, num_heads=4, ff_dim=32):
    inputs = layers.Input(shape=input_shape)

    # First CNN layer
    x1 = layers.Conv1D(16, 3, activation='tanh', padding='same')(inputs)
    x1 = layers.AvgPool1D(2, padding='same')(x1)
    x1 = layers.BatchNormalization()(x1)

    # Second CNN layer
    x2 = layers.Conv1D(32, 3, activation='tanh', padding='same')(x1)
    x2 = layers.AvgPool1D(2, padding='same')(x2)
    x2 = layers.BatchNormalization()(x2)

    # Third CNN layer
    x3 = layers.Conv1D(64, 3, activation='tanh', padding='same')(x2)
    x3 = layers.AvgPool1D(2, padding='same')(x3)
    x3 = layers.BatchNormalization()(x3)

    # Fourth CNN layer
    x4 = layers.Conv1D(128, 3, activation='tanh', padding='same')(x3)
    x4 = layers.AvgPool1D(2, padding='same')(x4)
    x4 = layers.BatchNormalization()(x4)

    # Ensure all layers have the same shape
    def match_shapes(x, target_shape):
        diff = target_shape - x.shape[1]
        if diff > 0:
            x = layers.ZeroPadding1D((0, diff))(x)
        elif diff < 0:
            x = layers.Cropping1D((0, -diff))(x)
        return x

    max_len = x4.shape[1]

    x1_padded = match_shapes(x1, max_len)
    x2_padded = match_shapes(x2, max_len)
    x3_padded = match_shapes(x3, max_len)

    # Concatenate the layers
    x_concat = layers.Concatenate()([x1_padded, x2_padded, x3_padded, x4])

    # Dense layer before transformer block
    x = layers.Dense(ff_dim, activation="relu")(x_concat)

    # Transformer block
    transformer_block = layers.MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)
    x = transformer_block(x, x)
    x = layers.Dropout(0.1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)

    # Final layers
    x = layers.Dense(32, activation="relu")(x)
    x = layers.Flatten()(x)
    outputs = layers.Dense(1, activation="linear")(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    return model

In [16]:
# Example usage
# input_shape = (30, 3)  # Example input shape (sequence length, features)
model = create_transformer_model(input_shape)
model.summary()

In [17]:
# Create and compile the Transformer model
transformer_model = create_transformer_model(input_shape)
transformer_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Define callbacks for early stopping and learning rate reduction
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)

In [19]:
# Train the Transformer model
history = transformer_model.fit(
    x_train,
    y_train,
    batch_size=32,
    epochs=20,
    validation_split=0.1,
    callbacks=[reduce_lr]
    # callbacks = [early_stopping, reduce_lr]
)

Epoch 1/20
[1m21909/21909[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 7ms/step - loss: 0.3390 - mae: 0.4279 - val_loss: 0.1988 - val_mae: 0.3323 - learning_rate: 0.0010
Epoch 2/20
[1m21909/21909[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m401s[0m 18ms/step - loss: 0.2264 - mae: 0.3547 - val_loss: 0.2297 - val_mae: 0.3594 - learning_rate: 0.0010
Epoch 3/20
[1m21909/21909[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m549s[0m 25ms/step - loss: 0.2136 - mae: 0.3446 - val_loss: 0.2174 - val_mae: 0.3491 - learning_rate: 0.0010
Epoch 4/20
[1m21909/21909[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m337s[0m 15ms/step - loss: 0.2077 - mae: 0.3403 - val_loss: 0.2209 - val_mae: 0.3510 - learning_rate: 0.0010
Epoch 5/20
[1m21909/21909[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 5ms/step - loss: 0.1743 - mae: 0.3149 - val_loss: 0.1896 - val_mae: 0.3320 - learning_rate: 2.0000e-04
Epoch 6/20
[1m21909/21909[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 