Package manager:
Conda

Environment: Python 3.10

Packages:
Pyarrow, Ydata-synthetic, jupyter, pandas

In [11]:
import pandas as pd

Reading original data

In [12]:
original_data = pd.read_parquet('data/test_107_reduced.parquet')

Defining relevant column types

In [13]:
failure_columns = [
	'cd_counter_rollers_installation',
	'ld_counter_rollers_installation',
	'closing_device_failure',
	'misalignment_cd_vs_ld',
	'misalignment_cd_vs_ld_sill_gap',
	'belt_tension',
	'pulley_is_touching_belt',
	'zero_position',
]

environment_columns = [
	'cmcouplerfriction',
	'cmdoorfriction',
	'cmelectronicage',
	'cmvibration',
]

static_state_columns = [
	'doorcyclecounter',
]

time_state_columns = [
	'doorforce',
	'doorspeed',
	'doorposition',
	# 'time',
]

Organizing the data based on failure types

In [14]:
failure_groups = original_data.groupby(failure_columns)

In [17]:
failure_groups_meta = pd.DataFrame(
	[group_values for group_values, _ in failure_groups],
	columns = failure_columns,
)
failure_groups_meta.to_parquet('data/preprocessed/group_meta.parquet')
failure_groups_meta

Unnamed: 0,cd_counter_rollers_installation,ld_counter_rollers_installation,closing_device_failure,misalignment_cd_vs_ld,misalignment_cd_vs_ld_sill_gap,belt_tension,pulley_is_touching_belt,zero_position
0,-1.0,0.0,False,0.0,0.0,0.0,False,0.0
1,0.0,-1.0,False,0.0,0.0,0.0,False,0.0
2,0.0,0.0,False,-1.0,0.0,0.0,False,0.0
3,0.0,0.0,False,0.0,0.0,-1.0,False,0.0
4,0.0,0.0,False,0.0,0.0,0.0,False,0.0
5,0.0,0.0,False,0.0,0.0,0.0,False,1.0
6,0.0,0.0,False,0.0,0.0,1.0,False,0.0
7,0.0,0.0,False,0.0,1.0,0.0,False,0.0
8,0.0,0.0,False,1.0,0.0,0.0,False,0.0
9,0.0,0.0,True,0.0,0.0,0.0,False,0.0


Saving the data in separate files based on failure types

In [19]:
def save_groups():
	ref_values = list(failure_groups_meta.itertuples(index=False, name=None))

	for (group_index, (values, group)) in enumerate(failure_groups):
		assert(ref_values[group_index] == values)
		g = group[environment_columns + static_state_columns + time_state_columns]
		g_exploded = g.explode(time_state_columns)

		# testing the accuracy by comparing the expected results to the actual result
		expected_num_rows = sum([len(group.iloc[j]['doorforce']) for j in range(group.shape[0])])
		assert(expected_num_rows == g_exploded.shape[0])

		g_exploded.to_parquet(f'data/preprocessed/group_{group_index}.parquet')

save_groups()

Viewing a failure group file

In [21]:
group_index = 0
current_group = pd.read_parquet(f'data/preprocessed/group_{group_index}.parquet')
current_group

Unnamed: 0,cmcouplerfriction,cmdoorfriction,cmelectronicage,cmvibration,doorcyclecounter,doorforce,doorspeed,doorposition
1573,113.800003,107.099998,1.02,1.2,3082,0.000000,-0.0017,0.0067
1573,113.800003,107.099998,1.02,1.2,3082,0.000000,-0.0017,0.0067
1573,113.800003,107.099998,1.02,1.2,3082,0.000000,-0.0017,0.0067
1573,113.800003,107.099998,1.02,1.2,3082,-0.100000,-0.0017,0.0067
1573,113.800003,107.099998,1.02,1.2,3082,0.500000,-0.0017,0.0067
...,...,...,...,...,...,...,...,...
1635,108.199997,148.300003,0.82,1.0,3232,-88.699997,-0.0982,-0.0400
1635,108.199997,148.300003,0.82,1.0,3232,-121.400002,-0.0079,-0.0403
1635,108.199997,148.300003,0.82,1.0,3232,-93.400002,-0.0537,-0.0423
1635,108.199997,148.300003,0.82,1.0,3232,-142.500000,-0.0072,-0.0424
