In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Dropout

<b>Load and preprocess data</b>

In [2]:
data = pd.read_csv('product_sales.csv')

data_relevant = data[['Retail_Price', 'Seasonal', 'Units_Sold']].copy()
label_encoder = LabelEncoder()
data_relevant['Seasonal'] = label_encoder.fit_transform(data_relevant['Seasonal'])
scaler = MinMaxScaler()
data_relevant[['Retail_Price', 'Units_Sold']] = scaler.fit_transform(data_relevant[['Retail_Price', 'Units_Sold']])


<b>Define improved generator and discriminator</b>

In [3]:
def build_generator():
    model = Sequential([
        tf.keras.layers.Input(shape=(2,)),
        Dense(64),
        LeakyReLU(negative_slope=0.2),
        BatchNormalization(momentum=0.8),
        Dense(128),
        LeakyReLU(negative_slope=0.2),
        BatchNormalization(momentum=0.8),
        Dense(1, activation='linear')
    ])
    return model

def build_discriminator():
    model = Sequential([
        tf.keras.layers.Input(shape=(3,)),
        Dense(64),
        LeakyReLU(negative_slope=0.2),
        Dropout(0.3),
        Dense(32),
        LeakyReLU(negative_slope=0.2),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    return model

discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5), metrics=['accuracy'])

generator = build_generator()

<b>Combine generator and discriminator into GAN</b>

In [4]:
z = tf.keras.layers.Input(shape=(2,))
generated_units_sold = generator(z)
discriminator.trainable = False 
validity = discriminator(tf.keras.layers.concatenate([z, generated_units_sold]))

gan = tf.keras.models.Model(z, validity)
gan.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5))


In [5]:
epochs = 500  
batch_size = 64 
half_batch = int(batch_size / 2)

X_train = data_relevant[['Retail_Price', 'Seasonal']].values
y_train = data_relevant['Units_Sold'].values

<b>Create dataset function</b>

In [6]:
def create_dataset(X, y, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.shuffle(buffer_size=len(X)).batch(batch_size).repeat()
    return dataset

train_dataset = create_dataset(X_train, y_train, half_batch)
train_iterator = iter(train_dataset)

<b>Training</b>

In [7]:
for epoch in range(epochs):
   
    discriminator.trainable = True  
    real_inputs, real_units_sold = next(train_iterator)
    real_units_sold = real_units_sold.numpy().reshape(-1, 1)
    
    noise = np.random.normal(0, 1, (half_batch, 2))
    gen_units_sold = generator.predict(noise)
    
    real = np.hstack((real_inputs.numpy(), real_units_sold))
    fake = np.hstack((noise, gen_units_sold))
    
    d_loss_real = discriminator.train_on_batch(real, np.ones((half_batch, 1)))
    d_loss_fake = discriminator.train_on_batch(fake, np.zeros((half_batch, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
    
    discriminator.trainable = False  
    
    noise = np.random.normal(0, 1, (batch_size, 2))
    valid_y = np.array([1] * batch_size)
    g_loss = gan.train_on_batch(noise, valid_y)
    
    if epoch % 50 == 0 or epoch == epochs - 1:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}%] [G loss: {g_loss}]")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
0 [D loss: 0.5809131264686584, acc.: 82.03125%] [G loss: 0.6264446377754211]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/st

<b>Generate synthetic data</b>


In [8]:
noise = np.random.normal(0, 1, (X_train.shape[0], 2))
synthetic_units_sold = generator.predict(noise)

synthetic_units_sold = scaler.inverse_transform(np.concatenate([np.zeros_like(synthetic_units_sold), synthetic_units_sold], axis=1))[:, 1]

synthetic_data = np.hstack((X_train, synthetic_units_sold.reshape(-1, 1)))

synthetic_df = pd.DataFrame(synthetic_data, columns=['Retail_Price', 'Seasonal', 'Units_Sold'])
synthetic_df['Retail_Price'] = scaler.inverse_transform(np.concatenate([synthetic_df[['Retail_Price']], np.zeros_like(synthetic_df[['Retail_Price']])], axis=1))[:, 0]

synthetic_df.head()

[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 299us/step


Unnamed: 0,Retail_Price,Seasonal,Units_Sold
0,459.67,0.0,-2.160845
1,239.95,1.0,78.171066
2,209.92,1.0,50.602005
3,125.57,0.0,57.317307
4,281.4,0.0,8.768553


<b>Analyze the synthetic data</b>

In [9]:
print("Synthetic Data Description:\n", synthetic_df.describe())
print("Real Data Description:\n", data_relevant.describe())


Synthetic Data Description:
         Retail_Price       Seasonal     Units_Sold
count  100000.000000  100000.000000  100000.000000
mean      255.357113       0.501130      31.627649
std       141.318357       0.500001      60.675122
min        10.000000       0.000000    -313.340454
25%       132.790000       0.000000      -1.354063
50%       255.550000       1.000000      32.903221
75%       377.315000       1.000000      69.600296
max       499.990000       1.000000     283.491302
Real Data Description:
         Retail_Price       Seasonal     Units_Sold
count  100000.000000  100000.000000  100000.000000
mean        0.500739       0.501130       0.499163
std         0.288411       0.500001       0.291212
min         0.000000       0.000000       0.000000
25%         0.250597       0.000000       0.244898
50%         0.501133       1.000000       0.500000
75%         0.749638       1.000000       0.755102
max         1.000000       1.000000       1.000000
