In [1]:
import pandas as pd
import numpy as np
from fault_injector import DataFrameFault
from fault_injector import DataFrameInjector

# Original DF

In [2]:
original_df = pd.DataFrame({
    "ColA": [np.array([10, 20, 30, 40, 50]), np.array([10, 20, 30, 40, 50]), np.array([10, 20, 30, 40, 50]), np.array([10, 20, 30, 40, 50]), np.array([10, 20, 30, 40, 50])],
    "ColB": [np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0])],
    "ColC": [np.array([100, 200, 300, 400, 500]), np.array([100, 200, 300, 400, 500]), np.array([100, 200, 300, 400, 500]), np.array([100, 200, 300, 400, 500]), np.array([100, 200, 300, 400, 500])],
    "ColD": [np.array([1, 2, 3, 4, 5]), np.array([1, 2, 3, 4, 5]), np.array([1, 2, 3, 4, 5]), np.array([1, 2, 3, 4, 5]), np.array([1, 2, 3, 4, 5])],
})

original_df.head()

Unnamed: 0,ColA,ColB,ColC,ColD
0,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
1,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
2,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
3,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
4,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"


# Faulty DF

In [3]:
df_fault_gen_obj = DataFrameFault(
    col_names=["ColA", "ColB", "ColC"],
    horizontal=True,
    fault_length=5,
    df_length=3,
    repeat=True,
    multi_fault=False,
    include_individual_faults=False,
    persist_state=True,
)

df_fault_gen_obj.assign_fault(
    col="ColA",
    fault_type="StuckValueFault",
    params={"stuck_val": 1},
)

df_fault_gen_obj.assign_fault(
    col="ColB",
    fault_type="DriftFault",
    params={"drift_rate": 1, "continuous": True},
)

df_fault_gen_obj.assign_fault(
    col="ColC",
    fault_type="StuckValueFault",
    params={"stuck_val": [1, 2, 3]},
    fault_length=4,
)


df_fault_gen_obj.assign_fault("ColD", "StuckValueFault", {"stuck_val": 2})
df_fault_gen_obj.assign_fault("ColE", "DriftFault", {"drift_rate": 1, "continuous": True})

fault_df_4 = df_fault_gen_obj.generate_faults()
print("\n--- AFTER ADDING ColD=2 AND ColE=drift ---")
fault_df_4.head()

Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!
Fault Updated!

--- AFTER ADDING ColD=2 AND ColE=drift ---


Unnamed: 0,ColA,ColB,ColC,ColD,ColE
0,"[1, 1, 1, 1, 1]","[1, 2, 3, 4, 5]","[1, 1, 1, 1]","[2, 2, 2, 2, 2]","[1, 2, 3, 4, 5]"
1,"[1, 1, 1, 1, 1]","[6, 7, 8, 9, 10]","[2, 2, 2, 2]","[2, 2, 2, 2, 2]","[6, 7, 8, 9, 10]"
2,"[1, 1, 1, 1, 1]","[11, 12, 13, 14, 15]","[3, 3, 3, 3]","[2, 2, 2, 2, 2]","[11, 12, 13, 14, 15]"


# Injector

In [4]:
injector = DataFrameInjector(original_df)
injector.original_df

Unnamed: 0,ColA,ColB,ColC,ColD
0,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
1,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
2,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
3,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
4,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"


# Inject Faulty DF into Original DF

## Just ColB

In [5]:
updated_df_1 = injector.inject_faults(
    faulty_df=fault_df_4,
    columns_to_inject=["ColB"],
    start=1,
    stop=4,
    replace=True,
)
updated_df_1.head()

Unnamed: 0,ColA,ColB,ColC,ColD
0,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
1,"[10, 20, 30, 40, 50]","[1.0, 2.0, 3.0, 4.0, 5.0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
2,"[10, 20, 30, 40, 50]","[6.0, 7.0, 8.0, 9.0, 10.0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
3,"[10, 20, 30, 40, 50]","[11.0, 12.0, 13.0, 14.0, 15.0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
4,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"


## ColA and ColB

In [6]:
injector.restore_original()
injector.inject_faults(faulty_df=df_fault_gen_obj.df_final,
                       columns_to_inject=["ColA","ColB"],
                       start=1,
                       stop=4,
                       replace=True)


Unnamed: 0,ColA,ColB,ColC,ColD
0,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
1,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 2.0, 3.0, 4.0, 5.0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
2,"[1.0, 1.0, 1.0, 1.0, 1.0]","[6.0, 7.0, 8.0, 9.0, 10.0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
3,"[1.0, 1.0, 1.0, 1.0, 1.0]","[11.0, 12.0, 13.0, 14.0, 15.0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
4,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"


## ColA and ColC get the same fault

In [7]:
injector.restore_original()
col_map = {"ColA":"ColA",
           "ColB":"ColB",
           "ColC":"ColA",}

injector.inject_faults(faulty_df=df_fault_gen_obj.df_final,
                       columns_to_inject=["ColA","ColB", "ColC"],
                       column_map=col_map,
                       start=1,
                       stop=4,
                       replace=True)

Unnamed: 0,ColA,ColB,ColC,ColD
0,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"
1,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 2.0, 3.0, 4.0, 5.0]","[1.0, 1.0, 1.0, 1.0, 1.0]","[1, 2, 3, 4, 5]"
2,"[1.0, 1.0, 1.0, 1.0, 1.0]","[6.0, 7.0, 8.0, 9.0, 10.0]","[1.0, 1.0, 1.0, 1.0, 1.0]","[1, 2, 3, 4, 5]"
3,"[1.0, 1.0, 1.0, 1.0, 1.0]","[11.0, 12.0, 13.0, 14.0, 15.0]","[1.0, 1.0, 1.0, 1.0, 1.0]","[1, 2, 3, 4, 5]"
4,"[10, 20, 30, 40, 50]","[0, 0, 0, 0, 0]","[100, 200, 300, 400, 500]","[1, 2, 3, 4, 5]"


## Reuse same faults for a different dataset

In [None]:
# create new DF
new_df = pd.DataFrame({
    "ColA": [np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0])],
    "ColB": [np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0])],
    "ColC": [np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0])],
    "ColD": [np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0])],
    })

# create new injector instance
injector2 = DataFrameInjector(new_df)

# inject the same fault
injector2.inject_faults(faulty_df=df_fault_gen_obj.df_final,
                       columns_to_inject=["ColA","ColB", "ColC"],
                       column_map=col_map,
                       start=1,
                       stop=4,
                       replace=True)

Unnamed: 0,ColA,ColB,ColC,ColD
0,"[0, 0, 0, 0, 0]","[0, 0, 0, 0, 0]","[0, 0, 0, 0, 0]","[0, 0, 0, 0, 0]"
1,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 2.0, 3.0, 4.0, 5.0]","[1.0, 1.0, 1.0, 1.0, 1.0]","[0, 0, 0, 0, 0]"
2,"[1.0, 1.0, 1.0, 1.0, 1.0]","[6.0, 7.0, 8.0, 9.0, 10.0]","[1.0, 1.0, 1.0, 1.0, 1.0]","[0, 0, 0, 0, 0]"
3,"[1.0, 1.0, 1.0, 1.0, 1.0]","[11.0, 12.0, 13.0, 14.0, 15.0]","[1.0, 1.0, 1.0, 1.0, 1.0]","[0, 0, 0, 0, 0]"
4,"[0, 0, 0, 0, 0]","[0, 0, 0, 0, 0]","[0, 0, 0, 0, 0]","[0, 0, 0, 0, 0]"
