## Importing necessary libraries

In [25]:
import pandas as pd
import numpy 
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
from sklearn.metrics import mean_squared_error

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Import Data

In [3]:
#  fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

Gasturbine = pd.read_csv('gas_turbines.csv')
Gasturbine

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
0,6.8594,1007.9,96.799,3.5000,19.663,1059.2,550.00,114.70,10.605,3.1547,82.722
1,6.7850,1008.4,97.118,3.4998,19.728,1059.3,550.00,114.72,10.598,3.2363,82.776
2,6.8977,1008.8,95.939,3.4824,19.779,1059.4,549.87,114.71,10.601,3.2012,82.468
3,7.0569,1009.2,95.249,3.4805,19.792,1059.6,549.99,114.72,10.606,3.1923,82.670
4,7.3978,1009.7,95.150,3.4976,19.765,1059.7,549.98,114.72,10.612,3.2484,82.311
...,...,...,...,...,...,...,...,...,...,...,...
15034,9.0301,1005.6,98.460,3.5421,19.164,1049.7,546.21,111.61,10.400,4.5186,79.559
15035,7.8879,1005.9,99.093,3.5059,19.414,1046.3,543.22,111.78,10.433,4.8470,79.917
15036,7.2647,1006.3,99.496,3.4770,19.530,1037.7,537.32,110.19,10.483,7.9632,90.912
15037,7.0060,1006.8,99.008,3.4486,19.377,1043.2,541.24,110.74,10.533,6.2494,93.227


## Data Understanding

In [4]:
Gasturbine.dtypes

AT      float64
AP      float64
AH      float64
AFDP    float64
GTEP    float64
TIT     float64
TAT     float64
TEY     float64
CDP     float64
CO      float64
NOX     float64
dtype: object

In [5]:
Gasturbine.shape

(15039, 11)

In [6]:
Gasturbine.describe()

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
count,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0
mean,17.764381,1013.19924,79.124174,4.200294,25.419061,1083.79877,545.396183,134.188464,12.102353,1.972499,68.190934
std,7.574323,6.41076,13.793439,0.760197,4.173916,16.527806,7.866803,15.829717,1.103196,2.222206,10.470586
min,0.5223,985.85,30.344,2.0874,17.878,1000.8,512.45,100.17,9.9044,0.000388,27.765
25%,11.408,1008.9,69.75,3.7239,23.294,1079.6,542.17,127.985,11.622,0.858055,61.3035
50%,18.186,1012.8,82.266,4.1862,25.082,1088.7,549.89,133.78,12.025,1.3902,66.601
75%,23.8625,1016.9,90.0435,4.5509,27.184,1096.0,550.06,140.895,12.578,2.1604,73.9355
max,34.929,1034.2,100.2,7.6106,37.402,1100.8,550.61,174.61,15.081,44.103,119.89


In [7]:
Gasturbine.isnull().sum()

AT      0
AP      0
AH      0
AFDP    0
GTEP    0
TIT     0
TAT     0
TEY     0
CDP     0
CO      0
NOX     0
dtype: int64

## Data Preparation

## As per problem statement we need input variable which are ambient and out Energy yield
#### Input 
* Ambient Temp
* Ambient pressure
* Ambient humidity

#### Output
* Turbine energy yeild

In [12]:
X = Gasturbine[['AT','AP','AH']]
y = Gasturbine['TEY']

In [11]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(X)
x_scaled

array([[-1.4397781 , -0.82664395,  1.28143632],
       [-1.44960109, -0.74864748,  1.30456402],
       [-1.43472138, -0.68625031,  1.21908576],
       ...,
       [-1.38626659, -1.07623263,  1.47697056],
       [-1.42042259, -0.99823616,  1.44159024],
       [-1.43073409, -0.93583899,  1.33465179]])

## Model Building

In [14]:
X_train,X_test,y_train,y_test = train_test_split(x_scaled,y,test_size=0.20,random_state=15)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((12031, 3), (3008, 3), (12031,), (3008,))

In [15]:
model = Sequential()
model.add(Dense(3,input_dim=3,activation='relu'))
model.add(Dense(3,activation='relu'))
model.add(Dense(1,activation='sigmoid'))

In [16]:
# Compiling the model

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

## Model Training

In [17]:
# fit the model
model.fit(X_train,y_train,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1d15ecdbd88>

## Model Testing

In [18]:
y_pred_train = model.predict(X_train)

In [19]:
y_pred_test = model.predict(X_test)

## Model Evaluation

In [31]:
print(mean_squared_error(y_train,y_pred_train,squared=False))

133.97425852116916


In [34]:
print(mean_squared_error(y_test,y_pred_test,squared=False))

134.73021135758717
