In [25]:
import torch
from torch import nn
from rich import print
import os
import plotly.graph_objects as go
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

## Unzipping the data

In [6]:
# Unzipping the data
!unzip "/content/Gas_turbine.zip"

Archive:  /content/Gas_turbine.zip
  inflating: test.zip                
  inflating: train.zip               


In [7]:
# Unzipping the training and test data
!unzip "/content/train.zip"
!unzip "/content/test.zip"

Archive:  /content/train.zip
   creating: train/
  inflating: train/ex_24.csv         
  inflating: train/ex_1.csv          
  inflating: train/ex_21.csv         
  inflating: train/ex_20.csv         
  inflating: train/ex_23.csv         
  inflating: train/ex_9.csv          
Archive:  /content/test.zip
   creating: test/
  inflating: test/ex_22.csv          
  inflating: test/ex_4.csv           


[Dataset Source](https://archive.ics.uci.edu/dataset/994/micro+gas+turbine+electrical+energy+prediction)

**For what purpose was the dataset created?**

Its original purpose was to learn the gas turbine's input-output temporal behavior with machine learning.

In the original experiments we used experiments 1, 9, 20, 21, 23 and 24 for training and experiments 4 and 22 for testing. We used RMSE as the performance metric. See our referenced paper for experimental details.

## Loading the training and test data

In [8]:
root_paths = ["/content/train","/content/test"]
train_df = pd.DataFrame()
test_df = pd.DataFrame()

for root in root_paths:

    for file in os.listdir(root):

        # Creating the path
        file_path = os.path.join(root, file)

        # Checking if path exists
        if os.path.exists(file_path):

            # Reading the data from the file
            data = pd.read_csv(file_path)

            # Concatenating data to the appropriate DataFrame
            if root == "/content/train":
                train_df = pd.concat([train_df, data], axis=0)
            else:
                test_df = pd.concat([test_df, data], axis=0)

In [9]:
print(train_df.shape,test_df.shape)

In [10]:
# Checking any 5 random rows
train_df.sample(5)

Unnamed: 0,time,input_voltage,el_power
3452,4429.26,3.0,1077.817034
4913,6165.44,9.263158,3090.930301
58,906.46,3.0,1104.822436
9736,10537.82328,3.0,951.756932
2669,3647.68,3.0,1375.094137


In [11]:
# Getting descriptive summary of the data
train_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
time,52940.0,5606.444209,2960.330233,810.0,3086.93,5431.03389,7782.30137,12636.84
input_voltage,52940.0,5.505534,2.549721,3.0,3.0,4.842105,7.421053,10.0
el_power,52940.0,1843.315949,723.126377,932.83726,1205.7439,1605.753998,2399.419112,3249.891217


Before loading the data into the DataLoaders let's first scale the data

In [12]:
# Instantiating the StandardScaler class
scaling_obj_input = StandardScaler()
scaling_obj_output = StandardScaler()

train_input = train_df.drop(['el_power'],axis=1).values
train_output = train_df['el_power'].values

test_input = test_df.drop(['el_power'],axis=1).values
test_output = test_df['el_power'].values

# Scaling the training and test data
train_input_scaled = scaling_obj_input.fit_transform(train_input)
test_input_scaled = scaling_obj_input.transform(test_input)

train_output_scaled = scaling_obj_output.fit_transform(train_output.reshape(-1,1)).flatten()
test_output_scaled = scaling_obj_output.transform(test_output.reshape(-1,1)).flatten()

In [9]:
# help(StandardScaler())

## Loading the data into dataloaders

In [13]:
# Seperating features from labels and creating tensors
train_input = torch.tensor(train_input_scaled)
train_output = torch.tensor(train_output_scaled)

test_input = torch.tensor(test_input_scaled)
test_output = torch.tensor(test_output_scaled)

# Wrapping the tensors into the TensorDataset object
train_dataset = TensorDataset(train_input,train_output)
test_dataset = TensorDataset(test_input,test_output)

# Loading the datasets into the loaders
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [14]:
for X,y in train_dataloader:
  print(y)
  break

## Defining the Network

In [15]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

In [16]:
class Neural_network(nn.Module):

  def __init__(self):
    super(Neural_network,self).__init__()
    self.Relu_stack = nn.Sequential(
        nn.Linear(2,4),
        nn.ReLU(),
        nn.Linear(4,4),
        nn.ReLU(),
        nn.Linear(4,1)
    )


  def forward(self,x):
    output = self.Relu_stack(x)
    return output

# Instantiating the Model
model = Neural_network().to(device)
print(model)

In [17]:
total_params = sum(parms.numel() for parms in model.parameters())
print(f'Total number of parameters: {total_params}')

## Defining the training and testing of Model

In [18]:
batch_size = 64
learning_rate = 1e-3
epochs = 10

# Mean Squared Error for regression
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [23]:
# Lists to store loss values
train_losses = []
test_losses = []

def train_loop(dataloader, model, loss_fn, optimizer):
    model.train()
    total_loss = 0
    size = len(dataloader.dataset)

    for X, y in dataloader:
        X = X.to(device).float()  # Ensure X is of type float
        y = y.to(device).float()  # Ensure y is of type float

        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred.flatten(), y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * len(X)  # Accumulate loss

    avg_loss = total_loss / size
    train_losses.append(avg_loss)
    print(f"Epoch Loss: {avg_loss:.6f}")

def test_loop(dataloader, model, loss_fn):
    model.eval()
    total_loss = 0
    size = len(dataloader.dataset)

    with torch.no_grad():
        for X, y in dataloader:
            X = X.to(device).float()  # Ensure X is of type float
            y = y.to(device).float()  # Ensure y is of type float

            pred = model(X)
            total_loss += loss_fn(pred.flatten(), y).item() * len(X)  # Accumulate loss

    avg_loss = total_loss / size
    test_losses.append(avg_loss)
    print(f"Test Avg Loss: {avg_loss:.6f}")

In [24]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_func, optimizer)
    test_loop(test_dataloader, model, loss_func)
print("Done!")

## Visualizing the Results

In [26]:
# Plotting the losses using Plotly
fig = go.Figure()

# Add training loss trace
fig.add_trace(go.Scatter(
    x=list(range(1, epochs + 1)),
    y=train_losses,
    mode='lines+markers',
    name='Training Loss',
    line=dict(color='blue'),
    marker=dict(size=8)
))

# Add test loss trace
fig.add_trace(go.Scatter(
    x=list(range(1, epochs + 1)),
    y=test_losses,
    mode='lines+markers',
    name='Test Loss',
    line=dict(color='red'),
    marker=dict(size=8)
))

# Update layout
fig.update_layout(
    title='Training and Test Loss Over Epochs',
    xaxis_title='Epoch',
    yaxis_title='Loss',
    legend_title='Loss Type',
    template='plotly_white'
)

# Show the plot
fig.show()


We can see that our network is overfitting for now, we so in the next modules we will try to deal with this issue.