In [None]:
#!pip install amulet-core

**Import libs**

In [None]:
# Import the library to mount Google Drive
from google.colab import drive
# Mount the Google Drive at /content/drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/My Drive/MC_AI')

import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
#import libs.N_mc2data as MC_DATA
from libs.N_mc2data import MCReader, Block
from tensorflow.keras import regularizers
import gc

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Define models**

In [None]:
from tensorflow.keras import layers, models, backend as K

def generate_continuous_noise(batch_size, chunk_shape, seed):
    # Set the seed for reproducibility
    tf.random.set_seed(seed)
    # Generate continuous noise in range [0, 5]
    continuous_noise = tf.random.uniform(shape=(batch_size, *chunk_shape), minval=0.0, maxval=5.0)

    return continuous_noise

def generate_discrete_noise(batch_size, chunk_shape, seed):
    # Set the seed for reproducibility
    tf.random.set_seed(seed)
    # Generate random integers in range [0, 5]
    discrete_noise = tf.random.uniform(shape=(batch_size, *chunk_shape), minval=0, maxval=6, dtype=tf.int32)

    return discrete_noise

krn_size = 4
activ = 'leaky_relu'
# Denoise unet - and hope :)
# Process with convolutional layers
def Conv3D(x, filters, krn_size = 3, strides_shape = (1,1,1), normalize = True):
    x = layers.Conv3D(filters, kernel_size=krn_size, padding='same', strides=strides_shape, activation=activ)(x)
    if normalize:
        x = layers.BatchNormalization()(x)
    return x
def TConv3D(x, filters, krn_size = 3, strides_shape = (1,1,1), normalize = True):
    x = layers.Conv3DTranspose(filters, kernel_size=krn_size, padding='same', strides=strides_shape, activation=activ)(x)
    if normalize:
        x = layers.BatchNormalization()(x)
    return x
def upsample(x, factor=2):
    return layers.UpSampling3D(size=(factor, factor, factor))(x)

# apply conditions
def attention_thing(bottleneck_features, guidance_value):
    # Reshape the guidance_value to match the shape of the bottleneck features for dot product calculation
    guidance_reshaped = layers.Reshape((1, 1, 1, 1))(guidance_value)
    # Step 1: Compute the similarity between guidance and each bottleneck feature (dot product)
    similarity = layers.Multiply()([bottleneck_features, guidance_reshaped])
    # Step 2: Apply softmax over the last dimension (the 256 feature dimension) to get attention weights
    attention_weights = layers.Softmax(axis=-1)(similarity)
    # Step 3: Use the attention weights to scale the bottleneck features (weighted sum)
    attended_features = layers.Multiply()([bottleneck_features, attention_weights])
    return attended_features

def cave_attention(x, cave_map, depth, use_cave_attention):
    #cave_map = upsample(cave_map, depth)
    #cave_map = Conv3D(cave_map, 1, 1, (depth,depth,depth), False)
    cave_map = layers.Conv3D(1, kernel_size=1, padding='same', strides=(depth,depth,depth))(cave_map)
    '''
    similarity = layers.Multiply()([x, cave_map])
    attention_weights = layers.Softmax(axis=-1)(similarity)
    attended_features = layers.Multiply()([x, attention_weights])
    '''
    # Compute attention weights
    #attention_weights = tf.nn.sigmoid(cave_map)
    attention_weights = layers.Activation('sigmoid')(cave_map)

    # Apply attention weights to the feature map
    attended_features = layers.Multiply()([x, attention_weights])
    #return attended_features
    #return use_cave_attention * attended_features + (1 - use_cave_attention) * x
    return attended_features

class ConditionalCaveMapLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(ConditionalCaveMapLayer, self).__init__(**kwargs)
        # Define layers in __init__
        self.conv2d = layers.Conv2D(320, kernel_size=3, strides=(1, 1), padding='same')
        self.permute = layers.Permute((1, 3, 2))
        self.reshape = layers.Reshape((16, 320, 16, 1))
        self.conv3d1 = layers.Conv3D(32, kernel_size=3, padding='same', activation='leaky_relu')
        self.conv3d2 = layers.Conv3D(64, kernel_size=3, padding='same', activation='leaky_relu')
        self.conv3d3 = layers.Conv3D(128, kernel_size=3, padding='same', activation='leaky_relu')
        self.conv3d = layers.Conv3D(1, kernel_size=3, padding='same', activation='sigmoid')

    def call(self, inputs):
        input_heightmap, input_cave_density, use_cave_attention = inputs

        # Create cave map
        reshaped_heightmap = layers.Reshape((16, 16, 1))(input_heightmap)
        cave_init = self.conv2d(reshaped_heightmap)
        cave_init = layers.Multiply()([cave_init, input_cave_density])

        cave_init_reshaped = self.permute(cave_init)
        x = self.reshape(cave_init_reshaped)
        x = self.conv3d1(x)
        x = self.conv3d2(x)
        x = self.conv3d3(x)
        cave_map_3d = self.conv3d(x)

        return cave_map_3d


def build_generator():
    # Inputs
    init_chunks_indices = layers.Input(shape=(16, 320, 16, 6), name='init_chunks')  # (None, 16, 16)
    input_heightmap = layers.Input(shape=(16, 16), name='input_heightmap')  # (None, 16, 16)
    input_cave_density = layers.Input(shape=(1,), name='input_cave_density')  # (None, 1)
    use_cave_attention = layers.Input(shape=(), dtype=tf.float32, name='use_cave_attention')

    cave_map_3d = ConditionalCaveMapLayer()([input_heightmap, input_cave_density, use_cave_attention])

    # Embbeding chunk
    #embedding_layer = tf.keras.layers.Embedding(input_dim=6, output_dim=8) # 6.. hm?
    #embedded_chunk = embedding_layer(init_chunks_indices) # (_, 16, 320, 16, 8)
    #print(np.shape(embedded_chunk))

    embedded_chunk = init_chunks_indices

    # INPUT
    # 8, 16x320

    # depth 0
    #  conv 3x3 64
    d0_conv_1 = Conv3D(embedded_chunk, 64, 3, (1,1,1), False) # 64, 16 x 320
    #  conv 3x3 64
    d0_conv_2 = Conv3D(d0_conv_1, 64, 3, (1,1,1)) # 64, 16 x 320
    #  conv 3x3 64 max-pool
    d0_conv_3 = Conv3D(d0_conv_2, 64, 3, (1,1,1)) # 64, 16 x 320
    d0_conv_3 = cave_attention(d0_conv_3, cave_map_3d, 1, use_cave_attention)

    # depth 1
    #  conv 3x3 128
    d1_conv_1 = Conv3D(d0_conv_3, 128, 3, (2,2,2)) # 128, 8 x 160
    d1_conv_2 = Conv3D(d1_conv_1, 128, 3, (1,1,1)) # 128, 8 x 160
    d1_conv_3 = Conv3D(d1_conv_2, 128, 3, (1,1,1)) # 128, 8 x 160
    d1_conv_3 = cave_attention(d1_conv_3, cave_map_3d, 2, use_cave_attention)

    # depth 2
    d2_conv_1 = Conv3D(d1_conv_3, 256, 3, (2,2,2)) # 256, 4 x 80
    d2_conv_2 = Conv3D(d2_conv_1, 256, 3, (1,1,1)) # 256, 4 x 80
    d2_conv_3 = Conv3D(d2_conv_2, 256, 3, (1,1,1)) # 256, 4 x 80
    d2_conv_3 = cave_attention(d2_conv_3, cave_map_3d, 4, use_cave_attention)

    # depth 3 (bottleneck)
    d3_conv_1 = Conv3D(d2_conv_3, 512, 3, (2,2,2)) # 512, 2 x 40
    d3_conv_2 = Conv3D(d3_conv_1, 512, 3, (1,1,1)) # 512, 2 x 40
    d3_conv_3 = Conv3D(d3_conv_2, 512, 3, (1,1,1)) # 512, 2 x 40

    d3_conv_3 = cave_attention(d3_conv_3, cave_map_3d, 8, use_cave_attention)


    # depth 2
    print(np.shape(d3_conv_3))
    d2_up = upsample(d3_conv_3)                    # 512, 4 x 80
    print(np.shape(d2_up))
    d2_tconv_half = TConv3D(d2_up, 256, 3, (1,1,1))# 256, 4 x 80
    d2_tconv_full = layers.Concatenate()([d2_conv_3, d2_tconv_half]) # 512, 4 x 80
    d2_tconv_1 = TConv3D(d2_tconv_full, 256, 3, (1,1,1)) # 256, 4 x 80
    d2_tconv_2 = TConv3D(d2_tconv_1, 256, 3, (1,1,1)) # 256, 4 x 80

    # depth 1
    d1_up = upsample(d2_tconv_2)                   # 256, 8 x 160
    d1_tconv_half = TConv3D(d1_up, 128, 3, (1,1,1))# 128, 8 x 160
    d1_tconv_full = layers.Concatenate()([d1_conv_3, d1_tconv_half]) # 256, 8 x 160
    d1_tconv_1 = TConv3D(d1_tconv_full, 128, 3, (1,1,1)) # 128, 8 x 160
    d1_tconv_2 = TConv3D(d1_tconv_1, 128, 3, (1,1,1)) # 128, 8 x 160

    # depth 0
    d0_up = upsample(d1_tconv_2)                   # 128, 16 x 320
    d0_tconv_half = TConv3D(d0_up, 64, 3, (1,1,1)) # 64, 16 x 320
    d0_tconv_full = layers.Concatenate()([d0_conv_3, d0_tconv_half]) # 128, 16 x 320
    d0_tconv_1 = TConv3D(d0_tconv_full, 64, 3, (1,1,1)) # 64, 16 x 320
    d0_tconv_2 = TConv3D(d0_tconv_1, 64, 3, (1,1,1)) # 64, 16 x 320

    # OUTPUT
    output = layers.Conv3DTranspose(6, kernel_size=1, padding='same', strides=(1, 1, 1), activation='softmax')(d0_tconv_2) # 6, 16 x 320


    # Build and return the model
    generator_model = models.Model(inputs=[init_chunks_indices, input_heightmap, input_cave_density, use_cave_attention],
                                   outputs=[output, cave_map_3d])
    return generator_model


# STRIDES, is a kernel shift on reading




def build_discriminator(heightmap_shape=(16, 16, 1), cave_density_shape=(1,), chunk_shape=(16, 320, 16, 6)):
    # Inputs
    input_chunk = layers.Input(shape=chunk_shape)
    #input_heightmap = layers.Input(shape=heightmap_shape)
    #input_cave_density = layers.Input(shape=cave_density_shape)

    # Embbeding chunk
    #embedding_layer = tf.keras.layers.Embedding(input_dim=6, output_dim=8) # 6.. hm?
    #embedded_chunk = embedding_layer(input_chunk)

    # On first layer there is used kernel of size 5 with a hope to better capture huge caves

    # depth 0
    #  conv 3x3 64
    d0_conv_1 = Conv3D(input_chunk, 64, 5, (1,1,1), False) # 64, 16 x 320
    #  conv 3x3 64
    d0_conv_2 = Conv3D(d0_conv_1, 64, 3, (1,1,1)) # 64, 16 x 320
    #  conv 3x3 64 max-pool
    d0_conv_3 = Conv3D(d0_conv_2, 64, 3, (1,1,1)) # 64, 16 x 320
    #d0_conv_3 = cave_attention(d0_conv_3, cave_map_3d, 1)

    # depth 1
    #  conv 3x3 128
    d1_conv_1 = Conv3D(d0_conv_3, 128, 3, (2,2,2)) # 128, 8 x 160
    d1_conv_2 = Conv3D(d1_conv_1, 128, 3, (1,1,1)) # 128, 8 x 160
    d1_conv_3 = Conv3D(d1_conv_2, 128, 3, (1,1,1)) # 128, 8 x 160
    #d1_conv_3 = cave_attention(d1_conv_3, cave_map_3d, 2)

    # depth 2
    d2_conv_1 = Conv3D(d1_conv_3, 256, 3, (2,2,2)) # 256, 4 x 80
    d2_conv_2 = Conv3D(d2_conv_1, 256, 3, (1,1,1)) # 256, 4 x 80
    d2_conv_3 = Conv3D(d2_conv_2, 256, 3, (1,1,1)) # 256, 4 x 80
    #d2_conv_3 = cave_attention(d2_conv_3, cave_map_3d, 4)

    # depth 3 (bottleneck)
    d3_conv_1 = Conv3D(d2_conv_3, 512, 3, (2,2,2)) # 512, 2 x 40
    d3_conv_2 = Conv3D(d3_conv_1, 512, 3, (1,1,1)) # 512, 2 x 40
    d3_conv_3 = Conv3D(d3_conv_2, 512, 3, (1,1,1)) # 512, 2 x 40

    #d3_conv_3 = cave_attention(d3_conv_3, cave_map_3d, 8)

    d2_up = upsample(d3_conv_3)                    # 512, 4 x 80
    print(np.shape(d2_up))
    d2_tconv_half = TConv3D(d2_up, 256, 3, (1,1,1))# 256, 4 x 80
    d2_tconv_full = layers.Concatenate()([d2_conv_3, d2_tconv_half]) # 512, 4 x 80
    d2_tconv_1 = TConv3D(d2_tconv_full, 256, 3, (1,1,1)) # 256, 4 x 80
    d2_tconv_2 = TConv3D(d2_tconv_1, 256, 3, (1,1,1)) # 256, 4 x 80

    # depth 1
    d1_up = upsample(d2_tconv_2)                   # 256, 8 x 160
    d1_tconv_half = TConv3D(d1_up, 128, 3, (1,1,1))# 128, 8 x 160
    d1_tconv_full = layers.Concatenate()([d1_conv_3, d1_tconv_half]) # 256, 8 x 160
    d1_tconv_1 = TConv3D(d1_tconv_full, 128, 3, (1,1,1)) # 128, 8 x 160
    d1_tconv_2 = TConv3D(d1_tconv_1, 128, 3, (1,1,1)) # 128, 8 x 160

    # depth 0
    d0_up = upsample(d1_tconv_2)                   # 128, 16 x 320
    d0_tconv_half = TConv3D(d0_up, 64, 3, (1,1,1)) # 64, 16 x 320
    d0_tconv_full = layers.Concatenate()([d0_conv_3, d0_tconv_half]) # 128, 16 x 320
    d0_tconv_1 = TConv3D(d0_tconv_full, 64, 3, (1,1,1)) # 64, 16 x 320
    d0_tconv_2 = TConv3D(d0_tconv_1, 64, 3, (1,1,1)) # 64, 16 x 320

    output = layers.Conv3D(1, kernel_size=1, strides=(1,1,1), padding='same', activation='sigmoid')(d0_tconv_2)


    # Build model
    discriminator_model = models.Model(inputs=[input_chunk], outputs=output)
    return discriminator_model




**Build models**

In [None]:
# Instantiate Models
generator = build_generator()
discriminator = build_discriminator()

#generator.summary()
#discriminator.summary()

(None, 2, 40, 2, 512)
(None, 4, 80, 4, 512)
(None, 4, 80, 4, 512)


**Load dataset**

In [None]:
data = MCReader(128)
data.load("drive/MyDrive/MC_AI/datasets/DATA123")
#data.chunk_info(0,0)

Estimated size: 1342.18 MB (thats also a minimum requirement of free RAM)


True

**Train GAN**

In [None]:
import random

# Optimizers
generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005, beta_1=0.5)

# Finetune
generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5)

# Loss Function
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=False)
# False if descrimintor have activation on the last layer !!! (so when it is scalled to something like 0..1)

def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def MSE(real, predict):
    return tf.reduce_mean(tf.square(real - predict))

def MAE(real, predict):
    return tf.reduce_mean(tf.abs(real - predict))

def generator_loss(fake_output, generated_chunks, real_chunks, generated_cave_map):
    '''
    adv_loss = cross_entropy(tf.ones_like(fake_output), fake_output)
    l1_loss = tf.reduce_mean(tf.abs(generated_chunks - real_chunks))
    total_gen_loss = adv_loss + (10.0 * l1_loss)  # Weight L1 loss as needed
    return total_gen_loss
    '''
    adv_loss = cross_entropy(tf.ones_like(fake_output), fake_output)

    # Calculate the real and generated cave volumes
    #real_cave_volume = tf.reduce_sum(real_cave_map)
    #generated_cave_volume = tf.reduce_sum(generated_cave_map)

    # Penalize when there's too much or too little cave-air (volume-based)
    #cave_volume_diff = tf.abs(real_cave_volume - generated_cave_volume)
    #volume_loss = cave_volume_diff / (tf.reduce_sum(tf.cast(collapsed_chunks != 3, tf.float32)) + 1e-6)  # Normalize by the terrain size
    #volume_loss = (cave_volume_diff / real_cave_volume) * 100

    '''
    # Calculate cave-air blocks for each chunk independently (per-chunk loss)
    cave_air_blocks_real = tf.reduce_sum(tf.cast(real_chunks[..., 4] > 0.95, tf.float32), axis=[1, 2, 3])
    cave_air_blocks_gen = tf.reduce_sum(tf.cast(generated_chunks[..., 4] > 0.8, tf.float32), axis=[1, 2, 3])

    # Calculate the loss per chunk
    cave_loss_per_chunk = tf.square(cave_air_blocks_gen - cave_air_blocks_real)

    # Take the mean loss across the batch
    cave_loss = tf.reduce_mean(cave_loss_per_chunk)

    # Optionally, scale the cave_loss if needed
    cave_loss = cave_loss
    '''
    '''
    tf.print("")
    general_air_block_loss = tf.abs(cave_air_blocks - real_cave_volume) / (real_cave_volume + 1e-6)
    tf.print("general_air_block_loss: ", general_air_block_loss," = ",cave_air_blocks," - ",real_cave_volume," / ",real_cave_volume + 1e-6)

    cave_map_loss = tf.abs(generated_cave_volume - real_cave_volume) / (real_cave_volume + 1e-6)
    tf.print("cave_map_loss: ",cave_map_loss," = ",generated_cave_volume," - ",real_cave_volume," / ",real_cave_volume + 1e-6)

    combined_volume_loss = general_air_block_loss + cave_map_loss

    volume_loss = combined_volume_loss * 100
    tf.print("volume_loss: ",volume_loss," = ",combined_volume_loss," * 100")
    tf.print("")



    # Penalize cave-air that exists above the terrain (where collapsed_chunks is air, i.e., class 0)
    #cave_map = tf.cast(generated_chunks[..., 4] > 0, tf.float32)
    mask_above_height = tf.cast(tf.range(320) > 126, tf.float32)

    mask_above_height = tf.reshape(mask_above_height, [1, 1, 320, 1, 1])

    #mask_above_terrain = generated_cave_map * tf.expand_dims(tf.cast(collapsed_chunks[..., 0] == 1, tf.float32), axis=-1)
    mask_above_terrain = generated_cave_map * mask_above_height

    # Apply penalty for cave-air above terrain, normalized by terrain size (collapsed_chunks not stone)
    height_penalty = tf.reduce_sum(mask_above_terrain) / (tf.reduce_sum(generated_cave_map) + 1e-6)

    # Penalize any cave-air blocks above the terrain
    height_loss = height_penalty * 100  # Strong penalty if cave_air is above the terrain, it still doesnt seem like much

    # Total cave-specific loss
    cave_map_loss = (volume_loss + height_loss)
    '''


    # Define class weights (for 6 classes)
    # We dont use custom cave_weight's since cave_map_loss should already guide it..
    class_weights = tf.constant([1.0, 2.0, 5.0, 1.0, 2.0, 2.0])  # Adjust weights as needed
    #stack / constant
    # Dirt and Bedrock are rare, and Sand is even more rare

    # Get weights for each class from one-hot encoded real chunks
    weights = tf.reduce_sum(real_chunks * class_weights, axis=-1)  # Shape: (batch_size, 16, 320, 16)
    # Compute categorical cross-entropy loss,
    #  because this data one-hot encoded/categorised etc
    ce_loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False, reduction='none')(real_chunks, generated_chunks)
    # Apply weights to the cross-entropy loss
    ce_loss = tf.reduce_mean(ce_loss * weights)


    #real_cave_map = tf.cast(real_chunks[..., 4] > 0.95, tf.float32)
    #cave_map_loss = tf.reduce_mean(tf.keras.losses.BinaryCrossentropy(from_logits=False)(real_cave_map, generated_cave_map))



    # now this is not the same so we use MSE
    # Cave map loss (Mean Squared Error between real and generated cave maps)

    '''
    This loss function separates the cave and non-cave regions and calculates their
    losses independently. The cave region uses MAE to focus on relative differences
    without heavily penalizing outliers, while the non-cave region also uses MAE to prevent
    zeros from dominating. The losses are combined proportionally based on the voxel count of
    each region, ensuring that the smaller cave region is not ignored despite its size. This helps
    balance the focus between the small cave and the rest of the chunk.
    '''
    '''
    Well never mind MAE/MSE will never work
    '''
     # force model to not ignore it

    # MSE vs MAE
    # mse tries to be perfect for a possibly impossible problem, bc it heavly panlize outliners making entire loss go bonkers
    # mae tries to fit most data points and doesnt care that much about some one random data in the wild
    # In Minecraft, one random block (in the ground ofc!!, not in the air) isnt really a problem and adds to
    # the randomness nature of the game and generally 'nature is somewhat random'
    # So it seems that MAE is actaully better

    # Total generator loss
    total_gen_loss = adv_loss + ce_loss #+ cave_map_loss
    return total_gen_loss, 0, 0, 0

def gradient_penalty(real_chunks, fake_chunks):
    alpha = tf.random.uniform(shape=[batch_size, 1, 1, 1, 1], minval=0., maxval=1.)
    interpolated = alpha * real_chunks + (1 - alpha) * fake_chunks

    with tf.GradientTape() as gp_tape:
        gp_tape.watch(interpolated)
        d_interpolated = discriminator([interpolated], training=True)

    gradients = gp_tape.gradient(d_interpolated, [interpolated])[0]
    gradients_l2 = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3, 4]))
    gradient_penalty = tf.reduce_mean((gradients_l2 - 1.0) ** 2)
    return gradient_penalty

real_label = 0.9  # Real label is smoothed
fake_label = 0.1  # Fake label is slightly noisy
cave_scale = tf.Variable(2.0, trainable=True, dtype=tf.float32)

# Chunk shape: (batch_size, 16, 320, 16), Values: 0.0...5.0
def apply_noise(chunks, seed = None):
    batch_size = tf.shape(chunks)[0]
    if seed is None:
        seed = random.randint(1, 100)
    noise_intensity = 0.5
    noise = generate_discrete_noise(batch_size, (16, 320, 16), seed)
    noised_chunks = (1 - noise_intensity) * chunks + noise_intensity * tf.cast(noise, tf.float32)
    noised_chunks = tf.round(noised_chunks)  # Round to nearest integer
    noised_chunks = tf.clip_by_value(noised_chunks, 0, 5)
    return noised_chunks

@tf.function
def D_train_step(real_chunks_one_hot, collapsed_chunks):
    with tf.GradientTape(persistent=True) as tape:
        real_output = discriminator([real_chunks_one_hot], training=True)
        fake_output = discriminator([collapsed_chunks], training=True)
        gp = gradient_penalty(real_chunks_one_hot, collapsed_chunks)
        disc_loss = discriminator_loss(real_output, fake_output) + (10.0 * gp)
    gradients_of_discriminator = tape.gradient(disc_loss, discriminator.trainable_variables)
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    return disc_loss

@tf.function
def train_step(real_chunks, heightmaps, cave_densities, input_volume):
    batch_size = tf.shape(real_chunks)[0]

    # Apply noise on chunk volumes (there is problem, if batch_size is high there wont be that much randomnes. For now it can be like that.)
    # [TODO] modify to apply diffrent noise to every single chunk
    input_chunks = apply_noise(input_volume) # what about collapsed chunks????[TODO]

    input_chunks = tf.one_hot(tf.cast(input_chunks, tf.uint8), depth=6)

    use_cave_attention = tf.convert_to_tensor([0.], dtype=tf.float32)
    # Alternate chunk type, randomly switch from chunk with caves to chunk with 0 caves
    # to further separate cave_map layer from terrain generation
    '''
    if random.choice([True, False]):
        cave_densities = cave_densities * cave_scale
        real_chunks_one_hot = real_chunks  # Shape: (batch_size, 16, 320, 16, 6)
        use_cave_attention =  tf.convert_to_tensor([1.], dtype=tf.float32)
    else:
        cave_densities = cave_densities * 0
        real_chunks_one_hot = collapsed_chunks  # Shape: (batch_size, 16, 320, 16, 6)
        use_cave_attention = tf.convert_to_tensor([0.], dtype=tf.float32)
    '''
    # Generate real cave map by masking `real_chunks` for cave_air (block type 4)
    #real_cave_map = tf.cast(tf.equal(real_chunks, 4), tf.float32) # bruh it is onehot encoded... tf are you doing
    cave_densities = cave_densities #* cave_scale
    real_chunks_one_hot = real_chunks
    use_cave_attention =  tf.convert_to_tensor([1.], dtype=tf.float32)

    with tf.GradientTape(persistent=True) as tape:
        # Generate fake chunks
        generated_chunks, gen_cave_map_3d = generator([input_chunks, heightmaps, cave_densities, use_cave_attention], training=True)
        # generated_chunks (batch_size, 16, 320, 16, 6)

        # Discriminator output for real chunks
        real_output = discriminator([real_chunks_one_hot], training=True)

        # Discriminator output for fake chunks
        fake_output = discriminator([generated_chunks], training=True)

        # Gradient penalty
        gp = gradient_penalty(real_chunks_one_hot, generated_chunks)

        # Compute losses
        gen_loss, cave_map_loss, volume_loss, height_loss = generator_loss(fake_output, generated_chunks, real_chunks_one_hot,
                                                 gen_cave_map_3d)
        disc_loss = discriminator_loss(real_output, fake_output) + (10.0 * gp)

    # Compute gradients
    gradients_of_generator = tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = tape.gradient(disc_loss, discriminator.trainable_variables)

    gradients_of_generator = [tf.clip_by_norm(g, 1.0) for g in gradients_of_generator]
    gradients_of_discriminator = [tf.clip_by_norm(g, 1.0) for g in gradients_of_discriminator]

    # Check for NaNs in gradients
    generator_nan_check = tf.reduce_any([tf.reduce_any(tf.math.is_nan(g)) for g in gradients_of_generator])
    discriminator_nan_check = tf.reduce_any([tf.reduce_any(tf.math.is_nan(g)) for g in gradients_of_discriminator])

    # Apply gradients if no NaNs are detected
    if not generator_nan_check:
        generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
        asd = 0
    else:
        tf.print("NaNs detected in generator gradients, skipping update.")

    if not discriminator_nan_check:
        discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
        asd = 0
    else:
        tf.print("NaNs detected in discriminator gradients, skipping update.")

    cave_air_blocks = tf.reduce_sum(tf.cast(generated_chunks[..., 4] > 0.95, tf.float32))
    return gen_loss, disc_loss, cave_map_loss, volume_loss, height_loss, cave_air_blocks


num_epochs = 4  # Set the number of epochs
data_size = int(data.Wx * data.Wz / 2)  # Total number of data samples
batch_size = 8


DATASET_INPUT_HEIGHTMAP, DATASET_INPUT_CAVES, DATASET_INPUT_VOLUME, DATASET_OUTPUT_REAL, _ = data.get_ml_input_set(0, data_size)

in_heightmaps = tf.convert_to_tensor(DATASET_INPUT_HEIGHTMAP, dtype=tf.float32) / 320.0   # 0...320   -> 0.0...1.0
in_caves = tf.convert_to_tensor(DATASET_INPUT_CAVES, dtype=tf.float32) #/ (16*(126)*16)    # 0...32256 -> 0.0...1.0
in_volume = tf.convert_to_tensor(DATASET_INPUT_VOLUME, dtype=tf.float32)                  # 0...5     -> 0.0...5.0

del DATASET_INPUT_HEIGHTMAP, DATASET_INPUT_CAVES, DATASET_INPUT_VOLUME
gc.collect()

# 0...5      -> (0.0...1.0, 0.0...1.0, 0.0...1.0, 0.0...1.0, 0.0...1.0, 0.0...1.0)
# Shape: (batch_size, x, y, z) -> (batch_size, x, y, z, 6)
out_real_one_hot = tf.one_hot(tf.cast(DATASET_OUTPUT_REAL, tf.uint8), depth=6)
#out_real_collapsed_one_hot = tf.one_hot(tf.cast(DATASET_OUTPUT_REAL_COLLAPSED, tf.uint8), depth=6)
#del DATASET_OUTPUT_REAL_COLLAPSED
gc.collect()

training_history = {
    'step': [],
    'gen_loss': [],
    'disc_loss': [],
    'cave_map_loss': []
}

training_stats = {
    'epoch': [],
    'air': [],
    'dirt': [],
    'sand': [],
    'stone': [],
    'cave_air': [],
    'bedrock': [],
    'air_baseline': None,
    'dirt_baseline': None,
    'sand_baseline': None,
    'stone_baseline': None,
    'cave_air_baseline': None,
    'bedrock_baseline': None
}

training_stats['air_baseline'] = np.count_nonzero(DATASET_OUTPUT_REAL[0:batch_size] == 0)
training_stats['dirt_baseline'] = np.count_nonzero(DATASET_OUTPUT_REAL[0:batch_size] == 1)
training_stats['sand_baseline'] = np.count_nonzero(DATASET_OUTPUT_REAL[0:batch_size] == 2)
training_stats['stone_baseline'] = np.count_nonzero(DATASET_OUTPUT_REAL[0:batch_size] == 3)
training_stats['cave_air_baseline'] = np.count_nonzero(DATASET_OUTPUT_REAL[0:batch_size] == 4)
training_stats['bedrock_baseline'] = np.count_nonzero(DATASET_OUTPUT_REAL[0:batch_size] == 5)

def print_progress(txt):
    if 'idlelib.run' in sys.modules:
        print(txt)
    else:
        print(f"\r{txt}", end="", flush=True)


test_name = "one-hot-final"

step = 1
cave_loss_temp = 0
gen_loss = 0
disc_loss = 0
volume_loss = 0
height_loss = 0
cave_air_blocks = 0
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    for batch_offset in range(0, data_size, batch_size):
        print_progress(f"{batch_offset}/{data_size} [{cave_air_blocks}](cl: {cave_loss_temp}, gl: {gen_loss}, dl: {disc_loss}) (vl: {volume_loss}, hl: {height_loss})")

        a = batch_offset
        b = batch_offset+batch_size
        # Perform training step
        gen_loss, disc_loss, cave_map_loss, volume_loss, height_loss, cave_air_blocks = train_step(out_real_one_hot[a:b], in_heightmaps[a:b], in_caves[a:b],
                                                        in_volume[a:b])
        cave_loss_temp=cave_map_loss.numpy()
        training_history['step'].append(step)
        step += 1
        training_history['gen_loss'].append(gen_loss.numpy())  # Convert to numpy to avoid storing tensors
        training_history['disc_loss'].append(disc_loss.numpy())
        training_history['cave_map_loss'].append(cave_map_loss.numpy())


    def generate_chunks(amount):
        cave_dens = in_caves[0:amount] * cave_scale
        cave_dens = tf.reshape(cave_dens, (amount, 1))

        use_cave_attention = tf.convert_to_tensor([1.] * amount, dtype=tf.float32)
        use_cave_attention = tf.reshape(use_cave_attention, (amount, 1))

        heightmap = in_heightmaps[0:amount]
        heightmap = tf.reshape(heightmap, (amount, 16, 16))

        test_chunk = apply_noise(tf.reshape(in_volume[0:amount], (amount, 16, 320, 16)))
        test_chunk = tf.one_hot(tf.cast(test_chunk, tf.uint8), depth=6)

        # Batch_size 1 because there is limitation within the model
        prediction, _ = generator.predict([test_chunk, heightmap, cave_dens, use_cave_attention], batch_size=1)
        return prediction

    # Generate chunks
    prediction = generate_chunks(batch_size)
    # First chunk info
    data.chunk_info2(prediction[0])
    decoded_chunks = np.zeros((batch_size, 16, 320, 16), dtype=np.uint8)
    for b in range(batch_size):
        decoded_chunks[b] = data.decode(prediction[b])

    # Gather stats
    training_stats['epoch'].append(epoch + 1)
    training_stats['air'].append(np.count_nonzero(decoded_chunks == 0))
    training_stats['dirt'].append(np.count_nonzero(decoded_chunks == 1))
    training_stats['sand'].append(np.count_nonzero(decoded_chunks == 2))
    training_stats['stone'].append(np.count_nonzero(decoded_chunks == 3))
    training_stats['cave_air'].append(np.count_nonzero(decoded_chunks == 4))
    training_stats['bedrock'].append(np.count_nonzero(decoded_chunks == 5))

    # Tile Pattern KL-Divergence
    # --its too slow to do it in colab--
    #np.save(f"drive/MyDrive/MC_AI/TESTS/{test_name}}/epoch_{epoch}", decoded_chunks)


    np.save("drive/MyDrive/MC_AI/res1", prediction[0])
    print(f"Epoch {epoch + 1}, Generator Loss: {gen_loss.numpy()}, Discriminator Loss: {disc_loss.numpy()}, Cave-map Loss: {cave_map_loss.numpy()}")

Epoch 1/4
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step
[Chunk info]
 Unique blocks [0 1 2 3 5]
 Blocks:
  [0] 'air' : 51976 (63.45 %)
  [1] 'dirt' : 713 (0.87 %)
  [2] 'sand' : 80 (0.1 %)
  [3] 'stone' : 28228 (34.46 %)
  [4] 'cave_air' : 0 (0.0 %)
  [5] 'bedrock' : 923 (1.13 %)
 Caves density: 0 (0.0 %)
 Elevation:
  Highest point (0)
  Lowest point (-27)
 Radius:
  Highest point (126) (In-game: 62)
  Lowest point (99) (In-game: 35)
 Neighbours:
  [TODO]
 Heightmap (In-game):
[[46 46 46 46 46 46 46 46 46 46 46 47 47 47 47 47]
 [46 46 46 46 46 46 46 46 46 46 47 48 49 49 49 48]
 [46 46 46 46 46 46 46 46 46 47 49 50 51 51 51 51]
 [46 46 46 46 46 46 46 46 46 48 50 51 52 53 55 57]
 [46 46 45 45 45 45 45 45 45 48 51 52 53 55 58 59]
 [46 45 45 44 44 44 44 45 45 48 51 52 54 57 59 60]
 [45 45 45 43 43 43 43 44 44 48 51 52 54 59 60 60]
 [45 45 44 43 42 42 42 43 43 45 51 53 58 59 60 60]
 [44 44 43 41 40 40 40 41 42 44 51 57 59 60 60 60]
 [44 43 42 40 36 35 35 40 41 46 5

In [None]:
def generate_chunks(amount):
    cave_dens = in_caves[0:amount] * cave_scale
    cave_dens = tf.reshape(cave_dens, (amount, 1))

    use_cave_attention = tf.convert_to_tensor([1.] * amount, dtype=tf.float32)
    use_cave_attention = tf.reshape(use_cave_attention, (amount, 1))

    heightmap = in_heightmaps[0:amount]
    heightmap = tf.reshape(heightmap, (amount, 16, 16))

    test_chunk = apply_noise(tf.reshape(in_volume[0:amount], (amount, 16, 320, 16)))
    test_chunk = tf.one_hot(tf.cast(test_chunk, tf.uint8), depth=6)

    # Batch_size 1 because there is limitation within the model
    prediction, map = generator.predict([test_chunk, heightmap, cave_dens, use_cave_attention], batch_size=1)
    return prediction, map

pred, map = generate_chunks(1)
np.save("drive/MyDrive/MC_AI/res_map_one_hot_unguided", map[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step


**Save training history**

In [None]:
import pandas as pd
import os

os.makedirs(f"drive/MyDrive/MC_AI/TESTS/{test_name}", exist_ok=True)

training_history_df = pd.DataFrame({
    'step': training_history['step'],
    'gen_loss': training_history['gen_loss'],
    'disc_loss': training_history['disc_loss'],
    'cave_map_loss': training_history['cave_map_loss']
})

# Save to CSV
training_history_df.to_csv(f"drive/MyDrive/MC_AI/TESTS/{test_name}/training_history.csv", index=False, sep=';', decimal=',')

# Create a DataFrame for epoch-level data (e.g., air, dirt, cave_air, etc.)
epoch_data_df = pd.DataFrame({
    'epoch': training_stats['epoch'],
    'air': training_stats['air'],
    'dirt': training_stats['dirt'],
    'sand': training_stats['sand'],
    'stone': training_stats['stone'],
    'cave_air': training_stats['cave_air'],
    'bedrock': training_stats['bedrock'],
    'air_baseline': [training_stats['air_baseline']] * len(training_stats['air']),
    'dirt_baseline': [training_stats['dirt_baseline']] * len(training_stats['air']),
    'sand_baseline': [training_stats['sand_baseline']] * len(training_stats['air']),
    'stone_baseline': [training_stats['stone_baseline']] * len(training_stats['air']),
    'cave_air_baseline': [training_stats['cave_air_baseline']] * len(training_stats['air']),
    'bedrock_baseline': [training_stats['bedrock_baseline']] * len(training_stats['air'])
})

epoch_data_df.to_csv(f"drive/MyDrive/MC_AI/TESTS/{test_name}/training_stats.csv", index=False, sep=';', decimal=',')

In [None]:
# Save the generator model
generator.save(f'drive/MyDrive/MC_AI/TESTS/{test_name}/generator_model_{test_name}.h5')

# Save the discriminator model
discriminator.save(f'drive/MyDrive/MC_AI/TESTS/{test_name}/discriminator_model_{test_name}.h5')



In [None]:
test_name = "one-hot"
generator.load_weights(f'drive/MyDrive/MC_AI/TESTS/{test_name}/generator_model_{test_name}.h5')

# Save the discriminator model
discriminator.load_weights(f'drive/MyDrive/MC_AI/TESTS/{test_name}/discriminator_model_{test_name}.h5')

In [None]:
cave_dens = tf.cast(data.cave_densities[0,0], tf.float32) / (16*(64+62)*16) * 500
cave_dens = tf.reshape(cave_dens, (1, 1))

heightmap = tf.cast(data.heightmaps[0,0], tf.float32) / 320.0
heightmap = tf.reshape(heightmap, (1, 16, 16))

seed = random.randint(1, 100)
noise_intensity = 0.5
noise = generate_discrete_noise(1, (16, 320, 16), seed)
# init_chunks (batch_size, 16, 320, 16, 1)
test_chunk = (1 - noise_intensity) * init_chunks_data[0].reshape(1, 16, 320, 16) + noise_intensity * tf.cast(noise, tf.float32)
test_chunk = tf.round(test_chunk)  # Round to nearest integer
test_chunk = tf.clip_by_value(test_chunk, 0, 5)
print(np.shape(test_chunk))
prediction = generator.predict([test_chunk, heightmap, cave_dens])
data.chunk_info2(prediction[0])
np.save("drive/MyDrive/MC_AI/res1", prediction[0])

(1, 16, 320, 16)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 265ms/step
[Chunk info]
 Unique blocks [0 1 3 5]
 Blocks:
  [0] 'air' : 52290 (63.83 %)
  [1] 'dirt' : 766 (0.94 %)
  [2] 'sand' : 0 (0.0 %)
  [3] 'stone' : 27855 (34.0 %)
  [4] 'cave_air' : 0 (0.0 %)
  [5] 'bedrock' : 1009 (1.23 %)
 Caves density: 0 (0.0 %)
 Elevation:
  Highest point (0)
  Lowest point (-27)
 Radius:
  Highest point (126) (In-game: 62)
  Lowest point (99) (In-game: 35)
 Neighbours:
  [TODO]
 Heightmap (In-game):
[[46 46 46 46 46 46 46 46 46 46 47 47 47 47 47 46]
 [46 46 46 46 46 46 46 46 46 46 47 48 48 48 48 47]
 [46 46 46 46 46 46 46 46 46 46 49 49 49 49 48 48]
 [46 46 46 45 45 45 45 45 45 46 50 50 50 51 52 57]
 [46 46 45 45 45 44 45 45 45 45 50 51 51 52 57 59]
 [45 45 45 45 44 44 44 44 44 45 50 51 53 55 59 59]
 [45 45 45 44 43 43 43 44 44 45 49 51 54 56 59 60]
 [44 44 44 40 38 38 39 41 42 44 48 51 56 59 60 60]
 [44 44 40 37 35 35 36 39 41 42 45 52 58 60 60 61]
 [43 43 39 36 35 35 35 36 4

**Other stuff**

In [None]:
seed = random.randint(1, 100)
noise_intensity = 0.5
noise = generate_discrete_noise(1, (16, 320, 16), seed)
# init_chunks (batch_size, 16, 320, 16, 1)
test_chunk = (1 - noise_intensity) * init_chunks_data[0].reshape(1, 16, 320, 16) + noise_intensity * tf.cast(noise, tf.float32)
test_chunk = tf.round(test_chunk)  # Round to nearest integer
test_chunk = tf.clip_by_value(test_chunk, 0, 5)
print(np.shape(test_chunk))
prediction = generator.predict([init_chunks_data[0].reshape(1, 16, 320, 16), data.cave_densities[0,0].reshape(1, 1)])
np.save("drive/MyDrive/MC_AI/res1", prediction[0])

(1, 16, 320, 16)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step


In [None]:

init_chunk3 = np.zeros(shape=(16, 320, 16, 6), dtype=np.float32)
for x in range(16):
    for z in range(16):
        init_chunk3[x, 0:data.heightmaps[0,0][x, z],z] = [0,0,0,1,0,0]

random_noise = tf.random.normal([1, noise_dim])
prediction = generator.predict([random_noise, init_chunk3.reshape(1, 16,320, 16,6), data.cave_densities[0,0].reshape(1, 1)])
data.chunk_info2(prediction[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[Chunk info]
 Unique blocks [0 1 3 4]
 Blocks:
  [0] 'air' : 77314 (94.38 %)
  [1] 'dirt' : 4451 (5.43 %)
  [2] 'sand' : 0 (0.0 %)
  [3] 'stone' : 123 (0.15 %)
  [4] 'cave_air' : 32 (0.04 %)
  [5] 'bedrock' : 0 (0.0 %)
 Caves density: 0 (0.0 %)
 Elevation:
  Highest point (193)
  Lowest point (-126)
 Radius:
  Highest point (319) (In-game: 255)
  Lowest point (0) (In-game: -64)
 Neighbours:
  [TODO]
 Heightmap (In-game):
[[  251   247 65472    98 65472 65472 65472   180 65472 65472 65472 65472
  65472    81 65472   255]
 [  252   239   191   225   135   248   154   206   248   248   221   230
    252   250   234   255]
 [  252   252   246   214   150   210   236   238    58   215   209   216
    159   203   217 65472]
 [  252   252   209   202   206   159   226   203   237   159   207   247
    243   231   215   255]
 [  251   234   236   137 65472   149   225   251   174   218   251   247
    241   247   239   255]


In [None]:
DATASET_INPUT_HEIGHTMAP, DATASET_INPUT_CAVES, DATASET_OUTPUT = data.get_ml_input_set(0, 4)

print(np.shape(DATASET_INPUT_HEIGHTMAP))
print(type(DATASET_INPUT_HEIGHTMAP))

print(np.shape(DATASET_INPUT_CAVES))
print(type(DATASET_INPUT_CAVES))

print(np.shape(DATASET_OUTPUT))
print(type(DATASET_OUTPUT))

(4, 16, 16)
<class 'numpy.ndarray'>
(4, 1)
<class 'numpy.ndarray'>
(4, 16, 320, 16)
<class 'numpy.ndarray'>


In [None]:
random_noise = tf.random.normal([1, noise_dim])
prediction = generator.predict([random_noise, data.heightmaps[0,0].reshape(1, 16, 16), data.cave_densities[0,0].reshape(1, 1)])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


In [None]:
np.save("drive/MyDrive/MC_AI/res1", prediction[0])

In [None]:
from google.colab import runtime
runtime.unassign()