In [4]:
import random
from datetime import datetime, timedelta

# Generate random timestamps with milliseconds difference
def random_timestamp(start, max_millisec_increment):
    increment = timedelta(milliseconds=random.randint(1, max_millisec_increment))
    return start + increment

# Generate fictitious log lines with timestamps
def generate_log_line(i, timestamp):
    events = [
        "User logged in",
        "Session started successfully",
        "Connection error",
        "Reconnection attempt",
        "Database query",
        "Database updated",
        "Security alert issued",
        "Backup started",
        "Backup completed",
        "Configuration file loaded",
        "User logged out",
        "Apache server restarted",
        "RAM memory exceeding limit",
        "Low disk space",
        "Cache cleared successfully"
    ]
    event = random.choice(events)
    return f"{timestamp.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} - {event} [ID: {i}]"

# Define timestamp start
start_time = datetime(2023, 1, 1, 0, 0, 0)

# Create 200 log lines with timestamps spaced by milliseconds
num_lines = 200
logs = []
current_time = start_time
for i in range(num_lines):
    current_time = random_timestamp(current_time, 5000)  # max 5 seconds increment (5000 ms)
    logs.append(generate_log_line(i, current_time))

# Insert explicit causal pairs with dynamic line difference
for i in range(0, num_lines - 10, 20):  # insert a causal pair every 20 lines
    causal_gap = random.randint(1, 5)  # causal event occurs 1 to 5 lines later
    logs[i] = f"{logs[i][:23]} - User logged in [ID: {i}]"
    logs[i + causal_gap] = f"{logs[i + causal_gap][:23]} - Session started successfully [ID: {i+causal_gap}]"

# Save log file
with open("logs.log", "w") as f:
    for log_entry in logs:
        f.write(log_entry + "\n")

print("Log file successfully generated in logs.log")


Log file successfully generated in logs.log


In [1]:
import random
import string
NUM_SEQUENCES = 100  # scale up
EVENT_POOL = list(string.ascii_uppercase)  # Events A-Z
# Causal chains
causal_rules = [
    ['A', 'B', 'C'],
    ['D', 'E'],
    ['F', 'G', 'H'],
]
# Common cause pattern: H → I and H → J (but no direct I :flecha_esquerda_direita: J)
common_causes = [('H', ['I', 'J'])]
# Spurious reversed causality: J appears before I
reversed_pairs = [('K', 'L')]  # L appears before K in sequences
# Independent noisy events
noise_events = ['X', 'Y', 'Z']
def generate_sequence():
    sequence = []
    # Real causal chains
    for chain in causal_rules:
        sub = []
        for e in chain:
            if random.random() > 0.1:  # 10% chance to drop event
                sub.append(e)
        sequence.extend(sub)
    # Common cause-based correlations
    for cause, effects in common_causes:
        sequence.append(cause)
        for e in effects:
            if random.random() > 0.05:
                sequence.append(e)
    # Spurious reversed causality
    for a, b in reversed_pairs:
        sequence.append(b)
        sequence.append(a)
    # Add noise
    for _ in range(random.randint(1, 3)):
        sequence.append(random.choice(noise_events))
    random.shuffle(sequence)
    return sequence
# Save to file
with open("synthetic_sequences_large.txt", "w") as f:
    for _ in range(NUM_SEQUENCES):
        seq = generate_sequence()
        f.write(' '.join(seq) + '\n')