In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import tensorflow as tf
from pathlib import Path
from tqdm import tqdm
import os

from keras.models import load_model


import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
if gpu_devices:
    print('Using GPU')
    tf.config.experimental.set_memory_growth(gpu_devices[0], True)
else:
    print('Using CPU')

In [None]:
seq_len = 200
n_seq = 13
batch_size = 16

feature_columns = ['Ipv', 'Vpv', 'Vdc', 'ia', 'ib', 'ic', 'va', 'vb', 'vc', 'Iabc', 'If', 'Vabc', 'Vf']

# Real data

In [None]:
dataset_folder = '/kaggle/input/gpvs-ts-npy'

x_train_load = np.load(os.path.join(dataset_folder, 'X_train.npy'))
y_train_load = np.load(os.path.join(dataset_folder, 'y_train.npy'))

scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train_load.reshape(-1, x_train_load.shape[-1])).reshape(x_train_load.shape).astype(np.float32)

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(np.ravel(y_train_load))

n_classes = len(label_encoder.classes_)
print(x_train.shape, y_train.shape)

In [None]:
uniq_vals, uniq_counts = np.unique(y_train, return_counts=True)
uniq_vals, uniq_counts

In [None]:
n_samples_to_gen = np.full(uniq_counts.shape, max(uniq_counts)) - uniq_counts
n_samples_to_gen

In [None]:
y_to_gen = np.hstack([np.repeat(uniq_vals[i], n_samples_to_gen[i]) for i in range(len(uniq_vals))])

In [None]:
y_series = (tf.data.Dataset
               .from_tensor_slices(y_to_gen)
               .shuffle(buffer_size=len(y_to_gen))
               .batch(batch_size, drop_remainder=True))

# Random series generator

In [None]:
def make_random_data():
    while True:
        yield np.random.uniform(low=0, high=1, size=(seq_len, n_seq))

In [None]:
random_series = iter(tf.data.Dataset
                     .from_generator(make_random_data, output_types=tf.float32)
                     .batch(batch_size)
                     .repeat())

# Generate Synthetic Data

In [None]:
synthetic_data = load_model('/kaggle/input/ctimegan-output/time_gan/experiment_00/synthetic_data')

In [None]:
generated_data = []
labels_enc = []
for Y_ in tqdm(y_series):
    Z_ = next(random_series)
    d = synthetic_data([Z_, Y_])
    generated_data.append(d)
    labels_enc.append(Y_)

In [None]:
generated_data = np.array(np.vstack(generated_data))
labels_enc = np.array(np.hstack(labels_enc))

generated_data.shape, labels_enc.shape

## Rescale

In [None]:
generated_data = (scaler.inverse_transform(generated_data
                  .reshape(-1, generated_data.shape[-1]))
                  .reshape(generated_data.shape))
generated_data.shape

# Plot

In [None]:
fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(14, 7))
axes = axes.flatten()

idx = np.random.randint(generated_data.shape[0])
synthetic = generated_data[idx]
label = labels_enc[idx]

x_test_label = x_train_load[y_train == label]
real = x_test_label[np.random.randint(x_test_label.shape[0]), :, :]

for j, ticker in enumerate(feature_columns):
    (pd.DataFrame({'Real': real[:, j],
                   'Synthetic': synthetic[:, j]})
     .plot(ax=axes[j],
           title=ticker,
           secondary_y='Synthetic', style=['-', '--'],
           lw=1))
sns.despine()
fig.suptitle(f'Label: {label}')
fig.tight_layout()

# Save augmented dataset

In [None]:
labels = label_encoder.inverse_transform(labels_enc)
np.unique(labels, return_counts=True)

In [None]:
np.save('generated_data.npy', generated_data)
np.save('generated_labels.npy', labels)