In [1]:
import pandas as pd 
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
import torch
from torch.utils.data import DataLoader

## Train Data

In [15]:
data_path = 'BATADAL_dataset03.csv'
data_df = pd.read_csv(data_path, parse_dates = ['DATETIME'], dayfirst=True)

In [16]:
dates_train = data_df['DATETIME']
sensor_cols = [col for col in data_df.columns if col not in ['DATETIME','ATT_FLAG']]

scaler = MinMaxScaler()
X = pd.DataFrame(index = data_df.index, columns = sensor_cols, data = scaler.fit_transform(data_df[sensor_cols]))

In [17]:
X_train, X_val, _, _ = train_test_split(X,X, test_size=0.2, random_state=42)

In [18]:
print(f'Train dataset size: {X_train.values.shape}')
print(f'Validation dataset size: {X_val.values.shape}')

Train dataset size: (7008, 43)
Validation dataset size: (1753, 43)


In [29]:

train_dataset = TensorDataset(torch.from_numpy(X_train.values).float(), 
                            torch.from_numpy(X_train.values).float())
val_dataset = TensorDataset(torch.from_numpy(X_val.values).float(),
                           torch.from_numpy(X_val.values).float())

print(f'Train dataset size: {train_dataset.tensors[0].shape}')
print(f'Validation dataset size: {val_dataset.tensors[0].shape}')

Train dataset size: torch.Size([7008, 43])
Validation dataset size: torch.Size([1753, 43])


In [32]:
torch.save(train_dataset.tensors[0], "BATADAL_train_dataset.pt")
torch.save(val_dataset.tensors[0], "BATADAL_val_dataset.pt")

In [13]:
# data loader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

print(f'Train dataset size: {train_loader.dataset.tensors[0].shape}')
print(f'Validation dataset size: {val_loader.dataset.tensors[0].shape}')


Train dataset size: torch.Size([7008, 43])
Validation dataset size: torch.Size([1753, 43])


## Test Data

In [22]:
test_data_path = 'BATADAL_dataset04.csv'
test_df = pd.read_csv(test_data_path, parse_dates = ['DATETIME'], dayfirst=True)
dates_test = test_df['DATETIME']
sensor_cols = [col for col in test_df.columns if col not in ['DATETIME','ATT_FLAG']]

scaler = MinMaxScaler()
X_test = pd.DataFrame(index = test_df.index, columns = sensor_cols, data = scaler.fit_transform(test_df[sensor_cols]))

# test_dataset = TensorDataset(torch.from_numpy(X_test.values).float(), 
#                             torch.from_numpy(y_test.values).float())



In [25]:
print(test_df.columns)
print(test_df['ATT_FLAG'])
print("\nUnique values in ATT_FLAG:")
print(test_df['ATT_FLAG'].value_counts())


Index(['DATETIME', ' L_T1', ' L_T2', ' L_T3', ' L_T4', ' L_T5', ' L_T6',
       ' L_T7', ' F_PU1', ' S_PU1', ' F_PU2', ' S_PU2', ' F_PU3', ' S_PU3',
       ' F_PU4', ' S_PU4', ' F_PU5', ' S_PU5', ' F_PU6', ' S_PU6', ' F_PU7',
       ' S_PU7', ' F_PU8', ' S_PU8', ' F_PU9', ' S_PU9', ' F_PU10', ' S_PU10',
       ' F_PU11', ' S_PU11', ' F_V2', ' S_V2', ' P_J280', ' P_J269', ' P_J300',
       ' P_J256', ' P_J289', ' P_J415', ' P_J302', ' P_J306', ' P_J307',
       ' P_J317', ' P_J14', ' P_J422', 'ATT_FLAG'],
      dtype='object')
0      -999
1      -999
2      -999
3      -999
4      -999
       ... 
4172   -999
4173   -999
4174   -999
4175   -999
4176   -999
Name: ATT_FLAG, Length: 4177, dtype: int64

Unique values in ATT_FLAG:
-999    3958
 1       219
Name: ATT_FLAG, dtype: int64
