In [12]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler


In [13]:
data = pd.read_csv('/content/sample_data/screentime_analysis.csv')
data.head()

Unnamed: 0,Date,App,Usage (minutes),Notifications,Times Opened
0,2024-08-07,Instagram,81,24,57
1,2024-08-08,Instagram,90,30,53
2,2024-08-26,Instagram,112,33,17
3,2024-08-22,Instagram,82,11,38
4,2024-08-12,Instagram,59,47,16


In [14]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data['App'] = le.fit_transform(data['App'])
data.head()

Unnamed: 0,Date,App,Usage (minutes),Notifications,Times Opened
0,2024-08-07,2,81,24,57
1,2024-08-08,2,90,30,53
2,2024-08-26,2,112,33,17
3,2024-08-22,2,82,11,38
4,2024-08-12,2,59,47,16


In [15]:
data_gen = data.drop(columns=['Date'])
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data_gen)
normalized_df = pd.DataFrame(normalized_data, columns=data_gen.columns)
normalized_df.head()


Unnamed: 0,App,Usage (minutes),Notifications,Times Opened
0,0.285714,0.677966,0.163265,0.571429
1,0.285714,0.754237,0.204082,0.530612
2,0.285714,0.940678,0.22449,0.163265
3,0.285714,0.686441,0.07483,0.377551
4,0.285714,0.491525,0.319728,0.153061


Let’s start building the GAN. The generator will take a latent noise vector as input and generate a synthetic sample similar to the data. Use the LeakyReLU activation for better gradient flow


In [16]:
latent_dim = 100  # size of the random noise vector

latent_dim = 100  # latent space dimension (size of the random noise input)

def build_generator(latent_dim):
    model = Sequential([
        Dense(128, input_dim=latent_dim),
        LeakyReLU(alpha=0.01),
        BatchNormalization(momentum=0.8),
        Dense(256),
        LeakyReLU(alpha=0.01),
        BatchNormalization(momentum=0.8),
        Dense(512),
        LeakyReLU(alpha=0.01),
        BatchNormalization(momentum=0.8),
        Dense(4, activation='sigmoid')  # output layer for generating 4 features
    ])
    return model

# create the generator
generator = build_generator(latent_dim)
generator.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Generating data using generator network

In [17]:
# generate random noise for 1000 samples
noise = np.random.normal(0, 1, (1000, latent_dim))

# generate synthetic data using the generator
generated_data = generator.predict(noise)

# display the generated data
generated_data[:10]

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


array([[0.57674444, 0.44977343, 0.46098232, 0.518422  ],
       [0.60469663, 0.5089969 , 0.4479859 , 0.5988509 ],
       [0.50113976, 0.45793003, 0.54583263, 0.5498803 ],
       [0.5528166 , 0.5259393 , 0.43748823, 0.57015103],
       [0.5782038 , 0.47108227, 0.43338254, 0.61768985],
       [0.6115892 , 0.5566055 , 0.49769187, 0.56002724],
       [0.61735636, 0.54403096, 0.518031  , 0.58937615],
       [0.5827881 , 0.51824564, 0.46968943, 0.5474821 ],
       [0.52387923, 0.4898819 , 0.527112  , 0.5967843 ],
       [0.550922  , 0.47256318, 0.5601312 , 0.57048684]], dtype=float32)

Now, the discriminator will take a real or synthetic data sample and classify it as real or fake:



In [18]:
def build_discriminator():
    model = Sequential([
        Dense(512, input_shape=(4,)),
        LeakyReLU(alpha=0.01),
        Dense(256),
        LeakyReLU(alpha=0.01),
        Dense(128),
        LeakyReLU(alpha=0.01),
        Dense(1, activation='sigmoid')  # output: 1 neuron for real/fake classification
    ])
    model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    return model

# create the discriminator
discriminator = build_discriminator()
discriminator.summary()

Next, we will freeze the discriminator’s weights when training the generator to ensure only the generator is updated during those training steps:



In [19]:
def build_gan(generator, discriminator):
    # freeze the discriminator’s weights while training the generator
    discriminator.trainable = False

    model = Sequential([generator, discriminator])
    model.compile(loss='binary_crossentropy', optimizer=Adam())
    return model

# create the GAN
gan = build_gan(generator, discriminator)
gan.summary()

Now, we will train the GAN using the following steps:


1. Generate random noise.
2. Use the generator to create fake data.
3. Train the discriminator on both real and fake data.
4. Train the generator via the GAN to fool the discriminator.


In [21]:
def train_gan(gan, generator, discriminator, data, epochs=10000, batch_size=128, latent_dim=100):
    for epoch in range(1, epochs + 1):  # Start from 1 instead of 0
        # select a random batch of real data
        idx = np.random.randint(0, data.shape[0], batch_size)
        real_data = data[idx]

        # generate a batch of fake data
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        fake_data = generator.predict(noise)

        # labels for real and fake data
        real_labels = np.ones((batch_size, 1))  # real data has label 1
        fake_labels = np.zeros((batch_size, 1))  # fake data has label 0

        # train the discriminator
        d_loss_real = discriminator.train_on_batch(real_data, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)

        # train the generator via the GAN
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_labels = np.ones((batch_size, 1))
        g_loss = gan.train_on_batch(noise, valid_labels)

        # print the progress every 10 epochs
        if epoch % 10 == 0:
            # Access the loss values from the returned arrays using indexing [0]
            d_loss = 0.5 * np.add(d_loss_real[0], d_loss_fake[0])
            print(f"Epoch {epoch}/{epochs}: D Loss: {d_loss:.4f}, G Loss: {g_loss[0]:.4f}") # Accessing the loss value using g_loss[0]


train_gan(gan, generator, discriminator, normalized_data, epochs=1000, batch_size=128, latent_dim=latent_dim)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Epoch 10/1000: D Loss: 0.7055, G Loss: 0.7056
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━

In [23]:
# generate new data
noise = np.random.normal(0, 1, (1000, latent_dim))  # generate 10000 synthetic samples
generated_data = generator.predict(noise)

# convert the generated data back to the original scale
generated_data_rescaled = scaler.inverse_transform(generated_data)

# convert to DataFrame
generated_df = pd.DataFrame(generated_data_rescaled, columns=data_gen.columns)

generated_df.head()

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


Unnamed: 0,App,Usage (minutes),Notifications,Times Opened
0,0.000369,118.980209,146.991608,1.022839
1,0.000415,118.984947,146.994141,1.03405
2,0.000538,118.969688,146.990173,1.04544
3,0.000238,118.992714,146.996475,1.005592
4,0.000372,118.989243,146.997864,1.020606


In the task of generating synthetic data using Generative AI, specifically leveraging Generative Adversarial Networks (GANs). The process began with preprocessing a dataset containing app usage insights, concentrating on key features such as App, Usage, Notifications, and Times Opened. These features were normalized to prepare the data for GAN training. The GAN architecture consisted of a generator designed to produce synthetic data and a discriminator tasked with differentiating between real and generated data.