In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


In [None]:
# Define the folder containing CSV files
folder_path = r'Preprocessed_Data\node22c Oct23_Jan24'

# Read and combine all CSV files
csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]
data = pd.concat([pd.read_csv(file) for file in csv_files])

# Ensure 'Date Time' is in datetime format
data['Date Time'] = pd.to_datetime(data['Date Time'])
data = data.sort_values('Date Time').reset_index(drop=True)


In [None]:
# Define a burst threshold for Pressure (95th percentile as an example)
burst_threshold = data['Slave_Device1_CH3_Pressure BAR'].quantile(0.95)

# Create a burst indicator column
data['Burst'] = (data['Slave_Device1_CH3_Pressure BAR'] > burst_threshold).astype(int)


In [None]:
# Select features and target
features = ['Slave_Device1_CH1_FLOW m3/h', 'Slave_Device1_CH2_VELOCITY m/s', 'Slave_Device1_CH3_Pressure BAR']
target = 'Burst'

# Scale features
scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])

# Define the sequence length (e.g., 30 timesteps)
sequence_length = 30

# Create sequences of data
X = []
y = []
for i in range(sequence_length, len(data)):
    X.append(data[features].iloc[i-sequence_length:i].values)
    y.append(data[target].iloc[i])

X = np.array(X)
y = np.array(y)


In [None]:
# Split the data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Build the LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


In [None]:
# Predict on the test set
y_pred = model.predict(X_test)
y_pred_class = (y_pred.flatten() > 0.5).astype(int)

# Get corresponding dates for predictions
predicted_burst_dates = data['Date Time'].iloc[split_index + sequence_length:][y_pred_class == 1]
print("Predicted burst dates:")
print(predicted_burst_dates)


In [None]:
# Plot the Pressure with actual and predicted bursts highlighted
plt.figure(figsize=(14, 6))

# Plot Pressure curve
plt.plot(data['Date Time'], data['Slave_Device1_CH3_Pressure BAR'], label='Pressure', color='blue')

# Highlight actual burst events
plt.scatter(data.loc[data['Burst'] == 1, 'Date Time'], 
            data.loc[data['Burst'] == 1, 'Slave_Device1_CH3_Pressure BAR'], 
            color='red', label='Actual Burst Event', marker='x')

# Highlight predicted burst events
plt.scatter(predicted_burst_dates, 
            data.loc[data['Date Time'].isin(predicted_burst_dates), 'Slave_Device1_CH3_Pressure BAR'], 
            color='green', label='Predicted Burst Event', marker='o')

# Formatting the plot
plt.xlabel('Date Time')
plt.ylabel('Pressure (BAR)')
plt.title('Pressure over Time with Burst Events')
plt.legend()
plt.show()
