# Fault diagnostic using simulated data

We have computers and we want to know whether their cpu is broken or not.   
To be able to know that we use the model of the digital twin to create a model of the cpu.   
The digital twin is a way to modelize something material with a program and visualize it on the computer.  
This model allows us to create a lot of data that we will use to train an AI model.

In [1]:
import random as rd
import pandas as pd

from cosapp.drivers import NonLinearSolver, RungeKutta, LinearDoE, RunSingleCase
from cosapp.recorders import DataFrameRecorder

from cpu.systems import CPUSystem

## Sequence definition of dataset simulation

To create our datasets we used two digital twins of a cpu made up of a fan, a cpu, a heat exchanger and a controler. The first is working without issue, the second has a broken fan and can't cool the cpu.      The simulation is a usage of 100% for 20 seconds and then 0% for 10 seconds. We take the temperature of the cpu after those 30 seconds.   

We then run each digital twins for one thousand air temperature points (with the same distance between two adjacent points) from 0 to 30°C, for a total of two thousand cases. We then choose randomly 200 samples of the broken twin and 800 from the one working.   
To avoid giving the exact same data in the training and testing sets we won't use the same number of points when running the digital twins: for the test set we will make one thousand and one. We then get the two thousand first cases (one thousand broken and the others working) to create our test set.   
The data given in the dataset impact the way our neural network learns. Try to change the dataset: number, pourcentage of each class...   

The data in our datasets are: the temperature of the air, the temperature of the cpu and the tension that should be used to make the fan spin.

In [2]:
sampleNumbers = 1001
datasetSize = 1000
percentageBroken = 20
rd.seed(9)

In [3]:
cpu = CPUSystem("cpu")
cpu.fan.mass_flow_scalar = 1.0

design = cpu.add_driver(NonLinearSolver("solver"))
runner = design.add_driver(RunSingleCase("runner"))
design.extend(cpu.design_methods["exchanger_surface"])

design.runner.set_values({"fan.T_air": 40.0, "T_cpu": 80.0, "cpu.usage": 100.0})

# run design
cpu.run_drivers()

In [4]:
cpu.exchanger.surface

np.float64(0.008467741935483872)

In [5]:
doe = cpu.add_driver(LinearDoE("doe"))

In [6]:
time_driver = doe.add_child(RungeKutta(order=3))
solver = time_driver.add_child(NonLinearSolver("solver", max_iter=10, factor=1.0))
time_driver.time_interval = [0, 30]
time_driver.dt = 0.5

# Define a simulation scenario
time_driver.set_scenario(
    init={"T_cpu": 30, "fan.mass_flow_scalar": 1.0},
    values={
        # "fan.T_air": 40., mettre en commentaire !!! sinon remet à zéro quand on appelle time_driver
        "cpu.usage": "100 if time <20 else 0."
    },
)

In [7]:
doe.add_input_var({"fan.T_air": {"lower": 0.0, "upper": 30.0, "count": sampleNumbers}})

In [8]:
doe.add_recorder(
    DataFrameRecorder(
        includes=["fan.tension", "cpu.usage", "T_cpu", "fan.T_air", "exchanger.surface"]
    )
)

<cosapp.recorders.dataframe_recorder.DataFrameRecorder at 0x7043ee2ef950>

In [9]:
cpu.fan.T_air

40.0

In [10]:
cpu.run_drivers()

# Dataset creation

In [11]:
df = doe.recorder.data

In [12]:
working = [True for k in range(len(df))]

In [13]:
df = df.assign(working=working)
df

Unnamed: 0,Section,Status,Error code,Reference,T_cpu,cpu.usage,exchanger.surface,fan.T_air,fan.tension,working
0,,,0,0,29.541101,0.0,0.008468,0.00,0.0,True
1,,,0,1,29.568053,0.0,0.008468,0.03,0.0,True
2,,,0,2,29.595005,0.0,0.008468,0.06,0.0,True
3,,,0,3,29.621957,0.0,0.008468,0.09,0.0,True
4,,,0,4,29.648909,0.0,0.008468,0.12,0.0,True
...,...,...,...,...,...,...,...,...,...,...
996,,,0,996,44.464961,0.0,0.008468,29.88,6.0,True
997,,,0,997,44.493687,0.0,0.008468,29.91,6.0,True
998,,,0,998,44.522414,0.0,0.008468,29.94,6.0,True
999,,,0,999,44.551140,0.0,0.008468,29.97,6.0,True


In [14]:
time_driver.set_scenario(
    init={"T_cpu": 30, "fan.mass_flow_scalar": 0.0},
    values={
        # "fan.T_air": 40., mettre en commentaire !!! sinon remet à zéro quand on appelle time_driver
        "cpu.usage": "100 if time <20 else 0."
    },
)

In [15]:
cpu.run_drivers()

# Test set creation

In [16]:
df2 = doe.recorder.data

In [17]:
working = [False for k in range(len(df2))]

In [18]:
df2 = df2.assign(working=working)
df2

Unnamed: 0,Section,Status,Error code,Reference,T_cpu,cpu.usage,exchanger.surface,fan.T_air,fan.tension,working
0,,,0,0,50.298624,0.0,0.008468,0.00,6.0,False
1,,,0,1,50.321205,0.0,0.008468,0.03,6.0,False
2,,,0,2,50.343786,0.0,0.008468,0.06,6.0,False
3,,,0,3,50.366367,0.0,0.008468,0.09,6.0,False
4,,,0,4,50.388948,0.0,0.008468,0.12,6.0,False
...,...,...,...,...,...,...,...,...,...,...
996,,,0,996,72.789485,0.0,0.008468,29.88,12.0,False
997,,,0,997,72.812066,0.0,0.008468,29.91,12.0,False
998,,,0,998,72.834647,0.0,0.008468,29.94,12.0,False
999,,,0,999,72.857228,0.0,0.008468,29.97,12.0,False


In [19]:
dfclean = df.drop(
    ["Section", "Status", "Error code", "Reference", "cpu.usage", "exchanger.surface"], axis=1
)

In [20]:
df2clean = df2.drop(
    ["Section", "Status", "Error code", "Reference", "cpu.usage", "exchanger.surface"], axis=1
)

In [21]:
dataset = []
for k in range(datasetSize):
    if k < datasetSize * percentageBroken / 100:
        i = rd.randint(0, len(df2clean) - 1)
        dataset.append(df2clean.iloc[i])
        df2clean = df2clean.drop(df2clean.index[i])
    else:
        i = rd.randint(0, len(dfclean) - 1)
        dataset.append(dfclean.iloc[i])
        dfclean = dfclean.drop(dfclean.index[i])
    dataset[k].name = k

dfclean = df.drop(
    ["Section", "Status", "Error code", "Reference", "cpu.usage", "exchanger.surface"], axis=1
)
df2clean = df2.drop(
    ["Section", "Status", "Error code", "Reference", "cpu.usage", "exchanger.surface"], axis=1
)

In [22]:
"""
dataset=[]
for k in range(tailleDataset):
    if k < tailleDataset*percentageCasse/100:
        i=rd.randint(0, len(dfclean)-1)
        dataset.append(df2clean.iloc[i])
    else:
        i=rd.randint(0, len(dfclean)-1)
        dataset.append(dfclean.iloc[i])
    dataset[k].name=k
"""

'\ndataset=[]\nfor k in range(tailleDataset):\n    if k < tailleDataset*percentageCasse/100:\n        i=rd.randint(0, len(dfclean)-1)\n        dataset.append(df2clean.iloc[i])\n    else:\n        i=rd.randint(0, len(dfclean)-1)\n        dataset.append(dfclean.iloc[i])\n    dataset[k].name=k\n'

In [23]:
type(dfclean.iloc[1])

pandas.core.series.Series

In [24]:
cols = ["T_cpu", "fan.T_air", "fan.tension", "working"]
dfFinal = pd.DataFrame(dataset, columns=cols)

In [25]:
dfFinal

Unnamed: 0,T_cpu,fan.T_air,fan.tension,working
0,61.002106,14.22,12.0,False
1,64.479608,18.84,12.0,False
2,58.924637,11.46,6.0,False
3,56.463287,8.19,6.0,False
4,53.482571,4.23,6.0,False
...,...,...,...,...
995,37.374200,18.15,0.0,True
996,31.414915,2.76,0.0,True
997,36.791209,16.17,0.0,True
998,33.335178,6.36,0.0,True


In [26]:
dfFinal.to_csv(
    f"data/dataset_{datasetSize}_cases_{percentageBroken}_percent_broken.csv", index=False
)

In [27]:
dataset = []
for k in range(datasetSize):
    dataset.append(df2clean.iloc[k])
    dataset.append(dfclean.iloc[k])
    dataset[2 * k].name = 2 * k
    dataset[2 * k + 1].name = 2 * k + 1

In [28]:
len(dataset)

2000

In [29]:
cols = ["T_cpu", "fan.T_air", "fan.tension", "working"]
testfinal = pd.DataFrame(dataset, columns=cols)
testfinal.to_csv(
    f"data/test_set_{datasetSize}_cases_{percentageBroken}_percent_broken.csv", index=False
)