# Artificial Neural Network - Predict energy output of CCPP

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd 
import tensorflow as tf

In [2]:
tf.__version__, pd.__version__, np.__version__

('2.16.2', '2.2.3', '1.26.4')

## Data Preprocessing

### Importing the dataset

In [3]:
dataset = pd.read_excel('Folds5x2_pp.xlsx')
dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


### Splitting into independent (matrix) and dependent variable (vector)

In [4]:
X = dataset.iloc[:,:-1]
y = dataset.iloc[:,-1]
X, y, type(X), type(y)

(         AT      V       AP     RH
 0     14.96  41.76  1024.07  73.17
 1     25.18  62.96  1020.04  59.08
 2      5.11  39.40  1012.16  92.14
 3     20.86  57.32  1010.24  76.64
 4     10.82  37.50  1009.23  96.62
 ...     ...    ...      ...    ...
 9563  16.65  49.69  1014.01  91.00
 9564  13.19  39.18  1023.67  66.78
 9565  31.32  74.33  1012.92  36.48
 9566  24.48  69.45  1013.86  62.39
 9567  21.60  62.52  1017.23  67.87
 
 [9568 rows x 4 columns],
 0       463.26
 1       444.37
 2       488.56
 3       446.48
 4       473.90
          ...  
 9563    460.03
 9564    469.62
 9565    429.57
 9566    435.74
 9567    453.28
 Name: PE, Length: 9568, dtype: float64,
 pandas.core.frame.DataFrame,
 pandas.core.series.Series)

### Spliitting the dataset into Training and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test

(         AT      V       AP     RH
 5487  21.92  49.02  1009.29  88.56
 3522  11.09  40.43  1025.47  74.97
 6916   8.49  39.61  1021.05  87.74
 7544  11.43  44.78  1013.43  82.45
 7600  17.28  39.99  1007.09  74.25
 ...     ...    ...      ...    ...
 5734  23.39  61.87  1010.35  48.91
 5191  11.63  40.55  1022.89  87.12
 5390  27.79  69.23  1012.86  45.71
 860   12.26  41.50  1014.87  89.41
 7270  11.75  40.55  1018.09  97.70
 
 [7654 rows x 4 columns],
          AT      V       AP     RH
 2513  19.64  48.06  1014.81  74.96
 9411  28.26  69.23  1013.01  42.10
 8745  27.98  67.17  1007.32  75.29
 9085  28.64  69.23  1013.11  37.13
 4950   9.34  38.08  1019.56  67.74
 ...     ...    ...      ...    ...
 7204  29.06  64.96  1000.88  62.07
 1599   9.87  40.81  1017.17  84.25
 5697   8.02  39.04  1018.49  68.07
 350   26.48  69.14  1009.31  84.11
 6210  15.34  71.14  1019.79  77.56
 
 [1914 rows x 4 columns])

In [6]:
y_train, y_test

(5487    443.31
 3522    490.96
 6916    483.94
 7544    471.09
 7600    463.28
          ...  
 5734    444.03
 5191    474.67
 5390    435.06
 860     471.13
 7270    472.43
 Name: PE, Length: 7654, dtype: float64,
 2513    455.27
 9411    436.31
 8745    440.68
 9085    434.40
 4950    482.06
          ...  
 7204    430.24
 1599    473.20
 5697    479.53
 350     435.76
 6210    457.10
 Name: PE, Length: 1914, dtype: float64)

## Building the ANN

### Initialize the ANN

In [7]:
from tensorflow.keras.models import Sequential
ann_model = Sequential()
ann_model

<Sequential name=sequential, built=False>

### Adding the input layer

In [8]:
from tensorflow.keras.layers import Input, Dense
ann_model.add(Input(shape=(4,)))

### Adding the 1st hidden layer

In [9]:
ann_model.add(Dense(units=64, activation='relu'))

2025-09-22 16:12:25.948643: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-09-22 16:12:25.948914: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-09-22 16:12:25.948925: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-09-22 16:12:25.949244: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-09-22 16:12:25.949277: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


### Adding the 2nd hidden layer

In [10]:
ann_model.add(Dense(units=32, activation='relu'))

<Sequential name=sequential, built=True>

### Adding the output layer

In [11]:
ann_model.add(Dense(units=1, activation='linear'))

## Training the ANN

### Compiling the ANN

In [12]:
ann_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['r2_score'])

### Training the ANN model on the Training set

In [13]:
ann_model.fit(X_train, y_train, batch_size=32, epochs=100)

Epoch 1/100


2025-09-22 16:40:43.286828: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - loss: 1912.8959 - r2_score: -5.5633 
Epoch 2/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 146.7149 - r2_score: 0.4970
Epoch 3/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 98.0356 - r2_score: 0.6637 
Epoch 4/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 68.3168 - r2_score: 0.7657
Epoch 5/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 52.8787 - r2_score: 0.8185
Epoch 6/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 46.6692 - r2_score: 0.8399
Epoch 7/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 40.3962 - r2_score: 0.8613
Epoch 8/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 38.3098 - r2_score: 0.8686
Epoch 9/100
[1m240/240[0m 

[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 33.5178 - r2_score: 0.8850
Epoch 68/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 30.4327 - r2_score: 0.8956
Epoch 69/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 32.0836 - r2_score: 0.8899
Epoch 70/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 30.9946 - r2_score: 0.8937
Epoch 71/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 31.1614 - r2_score: 0.8931
Epoch 72/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 32.9457 - r2_score: 0.8870
Epoch 73/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 33.0886 - r2_score: 0.8865
Epoch 74/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 32.3706 - r2_score: 0.8890
Epoch 75/100
[1m240/240[0m 

<keras.src.callbacks.history.History at 0x354db9160>

## Predicting the result set on test set

In [14]:
y_pred = ann_model.predict(X_test)
y_pred

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


array([[456.23676],
       [438.52042],
       [433.47278],
       ...,
       [480.36438],
       [435.5445 ],
       [459.2343 ]], dtype=float32)

In [25]:
y_pred_series = pd.Series(y_pred.squeeze(), name='Predicted', index=X_test.index)
y_pred_series

2513    456.236755
9411    438.520416
8745    433.472778
9085    438.427063
4950    479.021515
           ...    
7204    430.355713
1599    474.599396
5697    480.364380
350     435.544495
6210    459.234314
Name: Predicted, Length: 1914, dtype: float32

In [28]:
predicted_df = pd.concat([y_test, y_pred_series], axis=1)
predicted_df['diff'] = predicted_df['PE'] - predicted_df['Predicted']
predicted_df.max()

PE           495.350000
Predicted    494.461792
diff          14.890631
dtype: float64