In [5]:
import pandas as pd
import numpy as np

num_val = 300

# Define realistic ranges for each gas
# Units: CO2 in ppm, NO2 and CH4 in ppb (CH4 converted from ppm to ppb)
no_range = (0.1, 300)
no2_range = (0.1, 200)
n2o_range = (260, 420)

# Generate 1000 samples for each gas
np.random.seed(42)  # For reproducibility
no_samples = np.random.uniform(*no_range, num_val)
no2_samples = np.random.uniform(*no2_range, num_val)
n2o_samples = np.random.uniform(*n2o_range, num_val)

# Create DataFrame
df = pd.DataFrame({
    'NO': no_samples*1e-9,
    'NO2': no2_samples*1e-9,
    'N2O': n2o_samples*1e-9
})

# Set fixed values (mean of each range)
fixed_no = np.mean(no_range)
fixed_no2 = np.mean(no2_range)
fixed_n2o = np.mean(n2o_range)

# Create block where only NO varies
no_varied = np.random.uniform(*no_range, 50)
df_no_only = pd.DataFrame({
    'NO': no_varied * 1e-9,
    'NO2': np.full(50, fixed_no2 * 1e-9),
    'N2O': np.full(50, fixed_n2o * 1e-9)
})

# Create block where only NO2 varies
no2_varied = np.random.uniform(*no2_range, 50)
df_no2_only = pd.DataFrame({
    'NO': np.full(50, fixed_no * 1e-9),
    'NO2': no2_varied * 1e-9,
    'N2O': np.full(50, fixed_n2o * 1e-9)
})

# Create block where only N2O varies
n2o_varied = np.random.uniform(*n2o_range, 50)
df_n2o_only = pd.DataFrame({
    'NO': np.full(50, fixed_no * 1e-9),
    'NO2': np.full(50, fixed_no2 * 1e-9),
    'N2O': n2o_varied * 1e-9
})

# Combine all data
df = pd.concat([df, df_no_only, df_no2_only, df_n2o_only], ignore_index=True)

print(df.shape)  # Should be (1150, 2)

# Save to Excel
output_path = "NOx_labels.csv"
df.to_csv(output_path, index=False)

output_path

(450, 3)


'NOx_labels.csv'

In [6]:
print(df.head())

             NO           NO2           N2O
0  1.124246e-07  1.043118e-08  2.870296e-07
1  2.852192e-07  1.063178e-07  3.045745e-07
2  2.196250e-07  1.081730e-07  2.883217e-07
3  1.796377e-07  1.275222e-07  2.741924e-07
4  4.688999e-08  1.452457e-07  2.793017e-07
