New data generation file

In [None]:
import numpy as np
import pandas as pd
from noise import *
import random
import os 
from sklearn.model_selection import train_test_split  # Import train_test_split

Functions Used for Data Generation

In [None]:
# x values
def generate_x_values(start_x, end_x, n_datapoints):
    # create n x values
    xs = np.linspace(start_x, end_x, n_datapoints)
    return xs

In [None]:
# True functions
def x(xs):
    return x

def x_sq(xs):
    return xs**2

def sin(xs):
    return np.sin(xs)

In [90]:
# Noise functions
def pink(xs):
    return pink_noise(xs)

def uniform(xs, lower_bound, upper_bound):
    return np.random.uniform(lower_bound, upper_bound)

In [91]:
# SNR
def calculate_snr(y_true, y_noise): ### write own code later
    # Calculate signal power
    signal_power = np.mean(y_true ** 2)
    # Calculate noise power
    noise_power = np.mean((y_noise - y_true) ** 2)
    # Calculate SNR
    snr = signal_power / noise_power
    # Convert to dB
    snr_db = 10 * np.log10(snr)
    return snr, snr_db

Change Parameters for Data Generation Here

In [92]:
# Dataset parameters
n_datapoints = 100  # number of (total) datapoints generated
start_x, end_x = -10, 10  # x values to be used

# Train/validation split parameters
train_size = 0.7
validation_size = 0.2
test_size = 0.1

# Function parameters
func = sin  # Available: x, x_sq, sin

# Noise parameters
noise = uniform  # Available: pink, uniform
upper_bound, lower_bound = -1, 1  # If uniform noise

Generation of Datasets

In [93]:
# Dataset generation
xs = generate_x_values(start_x, end_x, n_datapoints)  # x values
y_true = function(xs)
if noise == pink:
    y_added_noise = noise(xs)
elif noise == uniform:
    y_added_noise = noise(xs, lower_bound, upper_bound)
y_noise = y_true + y_added_noise

# Calculate snr, snr_db for the entire dataset
snr, snr_db = calculate_snr(y_true, y_noise)

In [94]:
# Gather data
data = {'x': xs,'y_noise': y_noise, 'y_true': y_true}
df = pd.DataFrame(data)

# Split into train/validation/test sets
train_df, test_val_test_df = train_test_split(df, train_size=train_size, random_state=42)
if test_size == 0:  # to be able to run the code when we don't need a test set
    validation_df = test_val_test_df
    test_df = None
else:
    validation_df, test_df = train_test_split(test_val_test_df, train_size=validation_size/(validation_size+test_size), random_state=42)

In [95]:
# Convert DataFrame to CSV
print(train_df.head())
print("------------")
print(test_df.head())
print("------------")
print(validation_df.head())

           x   y_noise    y_true
11 -7.777778 -1.400727 -0.997098
47 -0.505051 -0.887481 -0.483852
85  7.171717  0.372518  0.776147
28 -4.343434  0.529076  0.932705
93  8.787879  0.191076  0.594705
------------
           x   y_noise    y_true
96  9.393939 -0.372795  0.030834
4  -9.191919 -0.634389 -0.230760
42 -1.515152 -1.402081 -0.998452
77  5.555556 -1.068731 -0.665102
10 -7.979798 -1.395725 -0.992096
------------
           x   y_noise    y_true
44 -1.111111 -1.299821 -0.896192
76  5.353535 -1.205040 -0.801411
39 -2.121212 -1.255936 -0.852307
33 -3.333333 -0.213061  0.190568
30 -3.939394  0.312193  0.715822


Save Data

In [None]:
# Naming of the folder and file
function_name = function.__name__
noise_name = noise.__name__
folder_name = noise_name + "_" + function_name
print(folder_name)

# Create folder
os.makedirs(f'./datasets/{folder_name}', exist_ok=True)

# Save training, validation, and test sets to CSV files
df.to_csv(f'./datasets/{folder_name}/true_data.csv', index=False)
train_df.to_csv(f'./datasets/{folder_name}/train_data.csv', index=False)
validation_df.to_csv(f'./datasets/{folder_name}/validation_data.csv', index=False)
test_df.to_csv(f'./datasets/{folder_name}/test_data.csv', index=False)

# Save parameters to a text file
file_path = f'./datasets/{folder_name}/params.txt'

with open(file_path, "w") as file:
    file.write(f"n_datapoints: {n_datapoints}\n")
    file.write(f"start_x, end_x: {start_x, end_x }\n")
    file.write(f"snr, snr_db: {snr, snr_db}\n")
    file.write(f"function name: {func_name}\n")
    file.write(f"noise:{noise_name}")

pink_sin
