In [35]:
import numpy as np
from numpy import nan
import pandas as pd

import pickle
import collections
from collections import defaultdict 

from matplotlib import pyplot as plt
from os import listdir
from os.path import isfile, join

import re

In [36]:
# Convert field names to dict for easy access.
# Can be hard coded 
# 
fields_path = '../../input/training_validation_2/fields.csv'  
fields_df = pd.read_csv(fields_path)
fields_df.columns = ['name', 'f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6']

fields_dict = {}

for idx in range(fields_df.shape[0]):
    name = fields_df.loc[idx, 'name']

    _fields = []
    
    for f in fields_df.columns[1:]:
        if not (str(fields_df.loc[idx, f]) == 'nan'):
            _fields.append(name + "_" + str(fields_df.loc[idx, f]))
    
    fields_dict[idx] = {'name': fields_df.loc[idx, 'name'] , 'fields': _fields}

fields_dict

{0: {'name': 'CpuTemperature',
  'fields': ['CpuTemperature_vMax',
   'CpuTemperature_vMin',
   'CpuTemperature_vStd',
   'CpuTemperature_value']},
 1: {'name': 'DurationPickToPick',
  'fields': ['DurationPickToPick_vCnt',
   'DurationPickToPick_vFreq',
   'DurationPickToPick_vMax',
   'DurationPickToPick_vMin',
   'DurationPickToPick_vStd',
   'DurationPickToPick_vTrend',
   'DurationPickToPick_value']},
 2: {'name': 'DurationRobotFromFeederToTestBench',
  'fields': ['DurationRobotFromFeederToTestBench_vCnt',
   'DurationRobotFromFeederToTestBench_vFreq',
   'DurationRobotFromFeederToTestBench_vMax',
   'DurationRobotFromFeederToTestBench_vMin',
   'DurationRobotFromFeederToTestBench_vStd',
   'DurationRobotFromFeederToTestBench_vTrend',
   'DurationRobotFromFeederToTestBench_value']},
 3: {'name': 'DurationRobotFromTestBenchToFeeder',
  'fields': ['DurationRobotFromTestBenchToFeeder_vCnt',
   'DurationRobotFromTestBenchToFeeder_vFreq',
   'DurationRobotFromTestBenchToFeeder_vMax',
  

In [37]:
# Get class id and run id from filename
def parse_class_name(fname):
    p = re.compile("^class[^\d]*(\d+)_(\d+).*.csv")
    m = p.match(fname)
    
    return m.groups()   


In [38]:
# Load one data file and return in a data frame
def load_data_file(path, fname):
    fullpath = join(path,fname)
    df = pd.read_csv(fullpath)
    df.columns = ['name', 'data']
    
    dfx = []
    
    for f in fields_dict:
        name = fields_dict[f]['name']
        fields = fields_dict[f]['fields']
        
        data = eval(df.loc[f,'data']) # convert data to array
        
        new_df = pd.DataFrame(data)
        if (f==33) and (new_df.shape[1] == 6): # NumberFuseDetected has a special case!
            new_df[6] = new_df[5]
            new_df[5] = np.NaN
            

        new_df.columns = fields_dict[f]['fields']
        
        dfx.append(new_df)
        
    merged_df = pd.concat(dfx, axis=1) # Merge columns
    
    c, r = parse_class_name(fname) # Get class id and run id

    # Add class labels and run id
    merged_df['class'] = int(c)
    merged_df['run'] = int(r)

    return merged_df

In [39]:
# Load data files from a directory and return merged data frame
def load_data_files(path):
    
    print ("In", path)
    files = [] 
    for f in listdir(path):
        if (isfile(join(path, f)) and (f.startswith("class"))):
            files.append(f)
    
    data_df_list = []
    for fname in files:
        print ("Loading:", fname)
        
        df = load_data_file(path, fname)
        
        data_df_list.append(df)

    data_df = pd.concat(data_df_list, axis=0) # Merge data frames
    
    return data_df

In [None]:
%%time
data_df_1 = load_data_files("../../input/training_validation_1/")
data_df_2 = load_data_files("../../input/training_validation_2/")
data_df_3 = load_data_files("../../input/ModelRefinement/")

In ../../input/training_validation_1/
Loading: class_ 0_36_data.csv
Loading: class_ 0_18_data.csv
Loading: class_ 0_76_data.csv
Loading: class_ 0_5_data.csv
Loading: class_ 0_45_data.csv
Loading: class_ 0_57_data.csv
Loading: class_ 0_47_data.csv
Loading: class_ 0_43_data.csv
Loading: class_ 0_8_data.csv
Loading: class_ 0_55_data.csv
Loading: class_ 0_9_data.csv
Loading: class_ 0_19_data.csv
Loading: class_ 0_31_data.csv
Loading: class_ 0_35_data.csv
Loading: class_ 0_74_data.csv
Loading: class_ 0_20_data.csv
Loading: class_ 0_27_data.csv
Loading: class_ 0_1_data.csv
Loading: class_ 0_32_data.csv
Loading: class_ 0_11_data.csv
Loading: class_ 0_67_data.csv
Loading: class_ 0_56_data.csv
Loading: class_ 0_0_data.csv
Loading: class_ 0_60_data.csv
Loading: class_ 0_29_data.csv
Loading: class_ 0_41_data.csv
Loading: class_ 0_25_data.csv
Loading: class_ 0_66_data.csv
Loading: class_ 0_23_data.csv
Loading: class_ 0_6_data.csv
Loading: class_ 0_75_data.csv
Loading: class_ 0_17_data.csv
Loading:

In [None]:
data_df_1.shape, data_df_2.shape, data_df_3.shape

In [None]:
# Uncomment to store data frames

data_df_1.to_csv("../../data/training_validation_1.csv", index=False)
data_df_2.to_csv("../../data/training_validation_2.csv", index=False)
data_df_3.to_csv("../../data/model_refinement.csv", index=False)
