In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)

n_samples = 1000

data = pd.DataFrame({
    "material_E": np.random.uniform(190, 220, n_samples),
    "material_density": np.random.uniform(7.5, 8.2, n_samples),
    "surface_finish": np.random.uniform(0.1, 3.0, n_samples),
    "load_mean": np.random.uniform(100, 800, n_samples),
    "load_amplitude": np.random.uniform(50, 500, n_samples),
    "temperature": np.random.uniform(40, 160, n_samples),
    "rpm": np.random.uniform(500, 6000, n_samples),
    "stress_max": np.random.uniform(50, 600, n_samples),
    "stress_ratio": np.random.uniform(-1.0, 0.5, n_samples),
    "geometry_factor": np.random.uniform(0.8, 1.5, n_samples),
    "simulation_time": np.random.uniform(0.1, 5.0, n_samples)
})

# Define a synthetic fatigue life model with noise
data["predicted_fatigue_life"] = (
    1e7 / (data["stress_max"] * data["geometry_factor"]) *
    np.exp(-0.002 * (data["temperature"] - 40)) *
    (1 - 0.2 * data["stress_ratio"]) +
    np.random.normal(0, 1e5, n_samples)
).clip(1e4, 1e7)

data.head()


Unnamed: 0,material_E,material_density,surface_finish,load_mean,load_amplitude,temperature,rpm,stress_max,stress_ratio,geometry_factor,simulation_time,predicted_fatigue_life
0,201.236204,7.629593,0.858946,570.892096,307.398145,87.236262,4065.413247,71.3397,0.080402,1.439505,1.93084,10000.0
1,218.521429,7.879331,0.816239,657.676978,412.444548,96.812279,1448.124992,152.724891,0.030925,1.167752,1.731269,59993.310577
2,211.959818,8.111062,2.728138,275.327529,392.072418,142.545687,5298.170098,507.185193,-0.856369,1.307437,0.963154,10000.0
3,207.959755,8.012557,0.823684,537.41187,119.254957,80.800526,3872.139315,471.722596,0.383859,1.105234,3.075607,99441.456643
4,194.680559,8.064593,0.888654,500.222188,117.162261,144.357962,1364.62136,242.85348,-0.147292,1.241024,2.435458,114878.583438


In [2]:
import numpy as np
import pandas as pd

np.random.seed(42)
n_samples = 1000

# Base input features
data = pd.DataFrame({
    "material_E": np.random.uniform(190, 220, n_samples),
    "material_density": np.random.uniform(7.5, 8.2, n_samples),
    "surface_finish": np.random.uniform(0.1, 3.0, n_samples),
    "load_mean": np.random.uniform(100, 800, n_samples),
    "load_amplitude": np.random.uniform(50, 500, n_samples),
    "temperature": np.random.uniform(40, 160, n_samples),
    "rpm": np.random.uniform(500, 6000, n_samples),
    "stress_max": np.random.uniform(50, 600, n_samples),
    "stress_ratio": np.random.uniform(-1.0, 0.5, n_samples),
    "geometry_factor": np.random.uniform(0.8, 1.5, n_samples),
    "simulation_time": np.random.uniform(0.1, 5.0, n_samples)
})

# Simulated FEM-predicted fatigue life
data["predicted_fatigue_life"] = (
    1e7 / (data["stress_max"] * data["geometry_factor"]) *
    np.exp(-0.002 * (data["temperature"] - 40)) *
    (1 - 0.2 * data["stress_ratio"]) +
    np.random.normal(0, 1e5, n_samples)
).clip(1e4, 1e7)

# Add bias + noise to simulate measurement deviation
bias_factor = (
    1.0
    + 0.05 * np.sin(data["temperature"] / 50)  # systematic bias due to temperature
    - 0.03 * (data["surface_finish"] - 1.0)    # effect of surface quality
    + np.random.normal(0, 0.05, n_samples)     # random test variability
)

# "Measured" fatigue life = FEM life * bias + random scatter
data["measured_fatigue_life"] = (
    data["predicted_fatigue_life"] * bias_factor
).clip(1e4, 1e7)

# Error column for easy visualization
data["relative_error_%"] = (
    100 * (data["measured_fatigue_life"] - data["predicted_fatigue_life"])
    / data["predicted_fatigue_life"]
)

print(data.head())


   material_E  material_density  surface_finish   load_mean  load_amplitude  \
0  201.236204          7.629593        0.858946  570.892096      307.398145   
1  218.521429          7.879331        0.816239  657.676978      412.444548   
2  211.959818          8.111062        2.728138  275.327529      392.072418   
3  207.959755          8.012557        0.823684  537.411870      119.254957   
4  194.680559          8.064593        0.888654  500.222188      117.162261   

   temperature          rpm  stress_max  stress_ratio  geometry_factor  \
0    87.236262  4065.413247   71.339700      0.080402         1.439505   
1    96.812279  1448.124992  152.724891      0.030925         1.167752   
2   142.545687  5298.170098  507.185193     -0.856369         1.307437   
3    80.800526  3872.139315  471.722596      0.383859         1.105234   
4   144.357962  1364.621360  242.853480     -0.147292         1.241024   

   simulation_time  predicted_fatigue_life  measured_fatigue_life  \
0         1