In [39]:
import pyreadr
import numpy as np
import xarray as xr
from bcolz import ctable
import pandas as pd
import shutil

In [29]:
training_data_paths = [
    ('tennesee-eastman-archive/TEP_FaultFree_Training.RData', 'fault_free_training'),
    ('tennesee-eastman-archive/TEP_Faulty_Training.RData', 'faulty_training')
]

testing_data_paths = [
    ('tennesee-eastman-archive/TEP_FaultFree_Testing.RData', 'fault_free_testing'),
    ('tennesee-eastman-archive/TEP_Faulty_Testing.RData', 'faulty_testing')
]

In [50]:
def load_data(spec, output_name):
    output = None
    for file, key in spec:
        print(f'Read {file}')
        data = pyreadr.read_r(file, use_objects=[key])[key]
        print('Convert to bcolz.ctable')
        if output is None:
            shutil.rmtree(f'{output_name}.bcolz', ignore_errors=True)
            output = ctable.fromdataframe(data, rootdir=f'{output_name}.bcolz')
        else:
            shutil.rmtree(f'{key}.bcolz', ignore_errors=True)
            output.append(ctable.fromdataframe(data, rootdir=f'{key}.bcolz'))
            shutil.rmtree(f'{key}.bcolz', ignore_errors=False)
        for rep in range(10):
#             shutil.rmtree(f'{key}.bcolz', ignore_errors=False)
            output.append(ctable.fromdataframe(data, rootdir=f'{key}.bcolz'))
            shutil.rmtree(f'{key}.bcolz', ignore_errors=False)
        print(f'Done {file}')
        del data
    return output

In [51]:
training_data = load_data(training_data_paths, 'training_data')
training_data

Read tennesee-eastman-archive/TEP_FaultFree_Training.RData
Convert to bcolz.ctable
Done tennesee-eastman-archive/TEP_FaultFree_Training.RData
Read tennesee-eastman-archive/TEP_Faulty_Training.RData
Convert to bcolz.ctable
Done tennesee-eastman-archive/TEP_Faulty_Training.RData


ctable((57750000,), [('faultNumber', '<f8'), ('simulationRun', '<f8'), ('sample', '<i4'), ('xmeas_1', '<f8'), ('xmeas_2', '<f8'), ('xmeas_3', '<f8'), ('xmeas_4', '<f8'), ('xmeas_5', '<f8'), ('xmeas_6', '<f8'), ('xmeas_7', '<f8'), ('xmeas_8', '<f8'), ('xmeas_9', '<f8'), ('xmeas_10', '<f8'), ('xmeas_11', '<f8'), ('xmeas_12', '<f8'), ('xmeas_13', '<f8'), ('xmeas_14', '<f8'), ('xmeas_15', '<f8'), ('xmeas_16', '<f8'), ('xmeas_17', '<f8'), ('xmeas_18', '<f8'), ('xmeas_19', '<f8'), ('xmeas_20', '<f8'), ('xmeas_21', '<f8'), ('xmeas_22', '<f8'), ('xmeas_23', '<f8'), ('xmeas_24', '<f8'), ('xmeas_25', '<f8'), ('xmeas_26', '<f8'), ('xmeas_27', '<f8'), ('xmeas_28', '<f8'), ('xmeas_29', '<f8'), ('xmeas_30', '<f8'), ('xmeas_31', '<f8'), ('xmeas_32', '<f8'), ('xmeas_33', '<f8'), ('xmeas_34', '<f8'), ('xmeas_35', '<f8'), ('xmeas_36', '<f8'), ('xmeas_37', '<f8'), ('xmeas_38', '<f8'), ('xmeas_39', '<f8'), ('xmeas_40', '<f8'), ('xmeas_41', '<f8'), ('xmv_1', '<f8'), ('xmv_2', '<f8'), ('xmv_3', '<f8'), ('xm

In [11]:
testing_data = load_data(testing_data_paths, 'testing_data')
testing_data

Read tennesee-eastman-archive/TEP_FaultFree_Testing.RData
Read tennesee-eastman-archive/TEP_Faulty_Testing.RData


ctable((10080000,), [('faultNumber', '<i4'), ('simulationRun', '<f8'), ('sample', '<i4'), ('xmeas_1', '<f8'), ('xmeas_2', '<f8'), ('xmeas_3', '<f8'), ('xmeas_4', '<f8'), ('xmeas_5', '<f8'), ('xmeas_6', '<f8'), ('xmeas_7', '<f8'), ('xmeas_8', '<f8'), ('xmeas_9', '<f8'), ('xmeas_10', '<f8'), ('xmeas_11', '<f8'), ('xmeas_12', '<f8'), ('xmeas_13', '<f8'), ('xmeas_14', '<f8'), ('xmeas_15', '<f8'), ('xmeas_16', '<f8'), ('xmeas_17', '<f8'), ('xmeas_18', '<f8'), ('xmeas_19', '<f8'), ('xmeas_20', '<f8'), ('xmeas_21', '<f8'), ('xmeas_22', '<f8'), ('xmeas_23', '<f8'), ('xmeas_24', '<f8'), ('xmeas_25', '<f8'), ('xmeas_26', '<f8'), ('xmeas_27', '<f8'), ('xmeas_28', '<f8'), ('xmeas_29', '<f8'), ('xmeas_30', '<f8'), ('xmeas_31', '<f8'), ('xmeas_32', '<f8'), ('xmeas_33', '<f8'), ('xmeas_34', '<f8'), ('xmeas_35', '<f8'), ('xmeas_36', '<f8'), ('xmeas_37', '<f8'), ('xmeas_38', '<f8'), ('xmeas_39', '<f8'), ('xmeas_40', '<f8'), ('xmeas_41', '<f8'), ('xmv_1', '<f8'), ('xmv_2', '<f8'), ('xmv_3', '<f8'), ('xm

In [26]:
training_data[training_data.cols.names[3:]][[10, 20, 30]]

array([(0.2348 , 3677.4, 4489.8, 9.3199, 26.695, 42.014, 2703.9, 75.193, 120.39, 0.35435, 80.241, 48.487, 2632.9, 26.304, 48.203, 3102.2, 23.319, 65.774, 230.92, 341.2 , 94.645, 77.569, 32.148, 8.9493, 26.111, 6.7796, 18.826, 1.6824, 32.876, 13.811, 23.921, 1.2803, 18.58 , 2.2525, 4.8822, 2.2569, 0.014489 , 0.81503, 0.1046  , 54.279, 44.352, 62.645, 54.542, 23.133, 61.425, 21.93 , 42.262, 33.648, 42.375, 47.328, 40.344, 17.198),
       (0.27833, 3649.7, 4479.9, 9.3486, 26.387, 42.564, 2701.5, 75.073, 120.4 , 0.33729, 80.384, 50.172, 2630.3, 27.059, 50.066, 3097.6, 22.868, 65.649, 226.86, 341.45, 94.518, 77.321, 32.294, 9.0822, 26.056, 6.9624, 18.749, 1.6767, 33.065, 13.966, 23.52 , 1.3563, 18.466, 2.2213, 4.8492, 2.232 , 0.018032 , 0.87043, 0.10962 , 53.559, 43.529, 63.137, 53.947, 27.761, 60.589, 21.743, 39.398, 38.607, 46.686, 46.688, 41.585, 18.294),
       (0.22515, 3689.6, 4525.4, 9.4095, 27.133, 42.395, 2698.9, 75.073, 120.41, 0.32419, 80.437, 50.174, 2627.2, 24.056, 51.358, 3096

In [27]:
del training_data
del testing_data