In [1]:
import numpy as np
import tensorflow as tf
import keras

for g in tf.config.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

print(tf.config.list_physical_devices())

np.random.seed(42)
tf.random.set_seed(42)

2025-09-16 17:27:32.117679: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-16 17:27:32.667645: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-09-16 17:27:34.735512: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Data Preparation

In [10]:
nodes = [
    {"current_node" : 0, "next_node" : [(1, "T"), (2, "P")]},
    {"current_node" : 1, "next_node" : [(1, "S"), (4, "X")]},
    {"current_node" : 2, "next_node" : [(2, "T"), (3, "V")]},
    {"current_node" : 3, "next_node" : [(4, "P"), (5, "V")]},
    {"current_node" : 4, "next_node" : [(2, "X"), (5, "S")]},
    {"current_node" : 5, "next_node" : [(6, "E"), (6, "E")]}
]

In [11]:
import pandas as pd

df = pd.DataFrame(nodes)
df # to visualize the graph of nodes

Unnamed: 0,current_node,next_node
0,0,"[(1, T), (2, P)]"
1,1,"[(1, S), (4, X)]"
2,2,"[(2, T), (3, V)]"
3,3,"[(4, P), (5, V)]"
4,4,"[(2, X), (5, S)]"
5,5,"[(6, E), (6, E)]"


In [80]:
def pick_path(randomized_node=False):
    path_or_node = np.random.randint(0, 2) if randomized_node == False else np.random.randint(0, 6)
    return path_or_node


def generate_reber_string(nodes, is_reber=True, is_generator=False, **kwargs):
    """This method creates one instance of reber string, returns a string or
    yields a tuple (reber, is_reber)"""
    def create_reber_string(nodes, is_reber):
        node = 0
        reber = "B"
        
        if is_reber:
            while node < 6:
                selected_path = pick_path()
                reber += nodes[node]["next_node"][selected_path][1] # adding new char
                node = nodes[node]["next_node"][selected_path][0] 
        else:
            try:            
                mistake_count = 0
                max_char_count = 0
                while (node < 6 or mistake_count == 0) and max_char_count <= 16:
                    selected_path = pick_path()
                    reber += nodes[node]["next_node"][selected_path][1] # adding new char
                    node = nodes[node]["next_node"][selected_path][0]
                    if node != 6:
                        random_node = pick_path(True)
                        mistake_count += 1 if random_node != node else 0
                    else:
                        node = pick_path(True)
                    max_char_count += 1
            except IndexError:
                print(f"IndexError : {node}, {reber}, {mistake_count}")
        
        return reber
    
    if not is_generator:
        return create_reber_string(nodes, is_reber)
    else:
        dataset_size = kwargs.get("dataset_size", 10000)
        if is_reber == "random":
            is_reber = (bool(pick_path()) for i in range(dataset_size))
        return ((create_reber_string(nodes, is_reber), is_reber) for _ in range(dataset_size))

In [81]:
for i in range(5):
    print(generate_reber_string(nodes))

BTXSE
BTXXVPSE
BTXSE
BTSSXXVPSE
BPVPXTTTTVVE


In [82]:
for i in range(10):
    is_reber = bool(pick_path())
    print(generate_reber_string(nodes, is_reber), is_reber)

BTSXXTVPSE True
BTXXTTVVE True
BTXSE True
BTXXVPSE True
BPVPSEPXTTVPSEVPSE False
BTSSSXSE True
BPVVE True
BPTVPXVVEPVVEEETVP False
BTSSXSEEPVVEPTVVES False
BPVVEXTTTTTVVEXXTV False


In [140]:
dataset = tf.data.Dataset.from_generator(
    lambda: generate_reber_string(nodes, "random", True, dataset_size=20000),
    output_signature=(
        tf.TensorSpec(shape=(), dtype=tf.string),
        tf.TensorSpec(shape=(), dtype=tf.bool)
    )
).shuffle(10000)