# PHME21 Data Challenge Submission by Team-GTU
## Test Performance

Tested with
* Python 3.7
* Scikit-learn 0.23.2
* XGBoost 1.4.0

In [None]:
from numpy import nan
import pandas as pd
import numpy as np
import os
import pickle
from os.path import isfile, join
import re
from collections import Counter

In [None]:
# Get class id and run id from filename
def parse_class_name(fname):
    p = re.compile("^class[^\d]*(\d+)_(\d+).*.csv")
    m = p.match(fname)

    return m.groups()

In [None]:
# Load one data file and return in a data frame
def load_data_file(path, fname, fields_dict):
    
    fname = "{}.csv".format(fname)
    fullpath = join(path, fname)
    df = pd.read_csv(fullpath)
    df.columns = ['name', 'data']

    dfx = []

    for f in fields_dict:
        name = fields_dict[f]['name']
        fields = fields_dict[f]['fields']

        data = eval(df.loc[f, 'data'])  # convert data to array

        new_df = pd.DataFrame(data)
        if (f == 33) and (new_df.shape[1] == 6):  # NumberFuseDetected has a special case!
            new_df[6] = new_df[5]
            new_df[5] = np.NaN

        new_df.columns = fields_dict[f]['fields']

        dfx.append(new_df)

    merged_df = pd.concat(dfx, axis=1)  # Merge columns

    # # Do some imputation on the data file
    # merged_df = impute_df(merged_df.copy())

    c, r = parse_class_name(fname)  # Get class id and run id

    # Add class labels and run id
    merged_df['class'] = int(c)
    merged_df['run'] = int(r)

    return merged_df

In [None]:
def fill_nan_values(data, name, fields):

    field_df = data[fields]

    if field_df.isnull().values.any():
        data[fields] = field_df.interpolate(method='linear', limit_direction='both')

    return data[fields]

In [None]:
# split a sequence into samples
def create_sequence(sequence, n_steps):
    X = list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence):
            break
        # gather input and output parts of the pattern
        seq_x = sequence[i:end_ix]
        X.append(seq_x)

    return np.array(X)

def create_dataset_for_run(df, ws, sensor_list):
    #     data_data = np.empty((0, ws * len(sensor_list))) # for 1D
    #     data_data = np.empty((0, ws, len(sensor_list))) # for 2D
    #     data_data = np.empty((0, len(sensor_list), ws)) # for 2D
    #     label_data = np.empty((0, 1))

    sensors_df = df.filter(sensor_list)

    # Calculate seq of windows_size len
    seq = create_sequence(sensors_df.values, n_steps=ws)
    #     seq = np.transpose(seq, axes=(0, 2, 1))
    seq_count = seq.shape[0]
    seq = seq.reshape((seq_count, -1))  # for 1D

    # add new seq to data_data array
    # data_data = np.vstack((data_data, seq))

    # Calculate RULS
    labels = df['class'].values[:seq_count]

    # add rul to rul_data array
    #     rul_data = np.vstack((rul_data, ruls))

    # TODO: What is RUL_Max in this context?

    # print ("Shape:", seq.shape, labels.shape)

    return seq, labels

# TODO: X_t, X_tp1, y_t, y_tp1 should be calculated per run.
# TODO: Then should be merged into one X_t, X_tp1, y_t, y_tp1.

def create_datasets(df, ws, sensor_list):
    run_list = df['runId'].unique()
    l_len_runs = []

    X_df_list = []
    y_df_list = []

    for r in run_list:
        r_df = df[df['runId'] == r]
        # print ("--> r: ", r, r_df.shape)
        sensor_data, label_data = create_dataset_for_run(r_df, ws, sensor_list)

        # Post Processing for the model

        # Padding for model input
        padded_sensor_data = sensor_data.copy()  # np.hstack((sensor_data, np.zeros((sensor_data.shape[0], 2)))) # for AE

        # Calculate X(t) and X(t+1) for model input/output
        X_t = padded_sensor_data[:]

        # Calculate y(t) and y(t+1) for model input/output
        y_t = label_data[:]

        X_df_list.append(pd.DataFrame(X_t))
        y_df_list.append(pd.DataFrame(y_t))
        l_len_runs.append(len(X_t))

    X_t = pd.concat(X_df_list, axis=0)  # Merge data frames
    y_t = pd.concat(y_df_list, axis=0)  # Merge data frames

    return X_t.values, y_t.values.flatten(), run_list, l_len_runs

In [None]:
def get_optimum_point_degradation(y_pred, ws):
    
    results_dict = Counter(y_pred)
    
    ws_step = 5

    most_common = dict(results_dict.most_common(1))
    most_commons = dict(results_dict.most_common(2))
    
    true_class = list(most_common.keys())[0]
    # print("most_commons:", most_commons)
    
    if len(most_commons) == len(most_common): # if only one class
        return true_class, ws + ws_step
            
    l_optimum = []
    
    for i in range(0, len(y_pred), ws_step):
        x_steps = y_pred[i: i+ws_step]
        l_optimum.extend(list(x_steps))
        # print(i, i + ws_step, list(x_steps), len(l_optimum))
        
        results_dict_ = Counter(l_optimum)
    
        most_common2 = dict(results_dict_.most_common(1))
        pred_class = list(most_common2.keys())[0]
        
        if pred_class == true_class:
            
            return true_class, ws + i + ws_step                          
        
    return true_class, len(y_pred)

In [None]:
def get_rank_class(label):
    
    if label == 0:
        return ['Vacuum', 'CpuTemperature', 'DurationPickToPick', 'TotalCpuLoadNormalized', 'NumberFuseDetected', 'FuseOutsideOperationalSpace', 'ErrorFrame', 'EPOSCurrent', 'FuseHeatSlopeOK', 'NumberEmptyFeeder', 'LightBarrierTaskDuration', 'LightBarrierPassiveTaskDuration2', 'LightBarrierPassiveTaskDuration1b', 'LightBarrierActiveTaskDuration1b', 'LightBarrieActiveTaskDuration2', 'FuseIntoFeeder', 'NumberFuseEstimated', 'ValidFrame', 'FeederAction2', 'FusePicked', 'EPOSVelocity', 'FeederBackgroundIlluminationIntensity', 'IntensityTotalImage', 'FeederAction4', 'FeederAction1', 'SharpnessImage', 'FeederAction3', 'DurationTestBenchClosed', 'FuseHeatSlope', 'ValidFrameOptrisPIIRCamera', 'Pressure', 'LightBarrierActiveTaskDuration1', 'DurationRobotFromTestBenchToFeeder', 'ProcessCpuLoadNormalized', 'FuseHeatSlopeNOK', 'DurationRobotFromFeederToTestBench', 'VacuumFusePicked', 'FuseTestResult', 'SmartMotorSpeed', 'Temperature', 'TotalMemoryConsumption', 'Humidity', 'FuseCycleDuration', 'ProcessMemoryConsumption', 'LightBarrierPassiveTaskDuration1', 'VacuumValveClosed', 'TemperatureThermoCam', 'EPOSPosition', 'IntensityTotalThermoImage', 'SmartMotorPositionError']
    elif label == 2:
        return ['CpuTemperature', 'FuseHeatSlopeNOK', 'FuseCycleDuration', 'ValidFrameOptrisPIIRCamera', 'SharpnessImage', 'ErrorFrame', 'FuseHeatSlopeOK', 'FuseHeatSlope', 'FuseOutsideOperationalSpace', 'NumberFuseDetected', 'FeederBackgroundIlluminationIntensity', 'IntensityTotalImage', 'SmartMotorPositionError', 'IntensityTotalThermoImage', 'EPOSCurrent', 'FeederAction3', 'Vacuum', 'LightBarrierTaskDuration', 'NumberEmptyFeeder', 'LightBarrierPassiveTaskDuration2', 'LightBarrierPassiveTaskDuration1b', 'LightBarrierActiveTaskDuration1b', 'LightBarrieActiveTaskDuration2', 'EPOSVelocity', 'FeederAction4', 'EPOSPosition', 'FuseTestResult', 'Temperature', 'FeederAction2', 'FuseIntoFeeder', 'LightBarrierPassiveTaskDuration1', 'SmartMotorSpeed', 'DurationPickToPick', 'LightBarrierActiveTaskDuration1', 'TotalMemoryConsumption', 'ValidFrame', 'DurationRobotFromFeederToTestBench', 'FusePicked', 'FeederAction1', 'Humidity', 'TemperatureThermoCam', 'ProcessCpuLoadNormalized', 'NumberFuseEstimated', 'DurationTestBenchClosed', 'TotalCpuLoadNormalized', 'Pressure', 'DurationRobotFromTestBenchToFeeder', 'VacuumValveClosed', 'VacuumFusePicked', 'ProcessMemoryConsumption']
    elif label == 3:
        return ['DurationPickToPick', 'IntensityTotalThermoImage', 'Pressure', 'VacuumValveClosed', 'TotalCpuLoadNormalized', 'DurationRobotFromTestBenchToFeeder', 'TotalMemoryConsumption', 'NumberFuseDetected', 'FeederAction1', 'SharpnessImage', 'EPOSVelocity', 'IntensityTotalImage', 'Vacuum', 'NumberEmptyFeeder', 'LightBarrierTaskDuration', 'LightBarrierPassiveTaskDuration2', 'LightBarrierPassiveTaskDuration1b', 'LightBarrierActiveTaskDuration1b', 'LightBarrieActiveTaskDuration2', 'ErrorFrame', 'VacuumFusePicked', 'FuseIntoFeeder', 'FuseHeatSlopeNOK', 'CpuTemperature', 'TemperatureThermoCam', 'NumberFuseEstimated', 'FeederAction2', 'FeederAction4', 'FuseCycleDuration', 'ProcessCpuLoadNormalized', 'ProcessMemoryConsumption', 'FuseHeatSlope', 'FeederBackgroundIlluminationIntensity', 'FusePicked', 'FuseOutsideOperationalSpace', 'DurationRobotFromFeederToTestBench', 'Humidity', 'SmartMotorPositionError', 'ValidFrameOptrisPIIRCamera', 'DurationTestBenchClosed', 'FuseHeatSlopeOK', 'LightBarrierActiveTaskDuration1', 'FuseTestResult', 'EPOSCurrent', 'FeederAction3', 'ValidFrame', 'EPOSPosition', 'Temperature', 'LightBarrierPassiveTaskDuration1', 'SmartMotorSpeed']
    elif label == 4:
        return ['TotalCpuLoadNormalized', 'CpuTemperature', 'EPOSPosition', 'ProcessCpuLoadNormalized', 'IntensityTotalThermoImage', 'EPOSVelocity', 'LightBarrierPassiveTaskDuration1', 'FuseCycleDuration', 'Pressure', 'TemperatureThermoCam', 'FeederAction1', 'VacuumValveClosed', 'FeederAction4', 'IntensityTotalImage', 'NumberEmptyFeeder', 'LightBarrierTaskDuration', 'LightBarrierPassiveTaskDuration2', 'LightBarrierPassiveTaskDuration1b', 'LightBarrierActiveTaskDuration1b', 'LightBarrieActiveTaskDuration2', 'ErrorFrame', 'FeederBackgroundIlluminationIntensity', 'NumberFuseDetected', 'FuseHeatSlopeOK', 'Humidity', 'VacuumFusePicked', 'FusePicked', 'ValidFrame', 'FuseHeatSlope', 'FeederAction2', 'SharpnessImage', 'LightBarrierActiveTaskDuration1', 'FeederAction3', 'NumberFuseEstimated', 'FuseIntoFeeder', 'ValidFrameOptrisPIIRCamera', 'DurationTestBenchClosed', 'FuseOutsideOperationalSpace', 'DurationRobotFromTestBenchToFeeder', 'FuseHeatSlopeNOK', 'Temperature', 'FuseTestResult', 'DurationRobotFromFeederToTestBench', 'ProcessMemoryConsumption', 'DurationPickToPick', 'EPOSCurrent', 'TotalMemoryConsumption', 'SmartMotorPositionError', 'Vacuum', 'SmartMotorSpeed']
    elif label == 5:
        return ['VacuumValveClosed', 'LightBarrierPassiveTaskDuration1', 'Vacuum', 'VacuumFusePicked', 'TotalCpuLoadNormalized', 'DurationRobotFromTestBenchToFeeder', 'CpuTemperature', 'SmartMotorPositionError', 'FuseHeatSlopeOK', 'EPOSPosition', 'ValidFrameOptrisPIIRCamera', 'EPOSVelocity', 'ValidFrame', 'FeederAction2', 'ProcessMemoryConsumption', 'FusePicked', 'NumberFuseDetected', 'Pressure', 'FuseHeatSlopeNOK', 'TemperatureThermoCam', 'TotalMemoryConsumption', 'FeederAction1', 'IntensityTotalThermoImage', 'FuseOutsideOperationalSpace', 'DurationTestBenchClosed', 'FuseIntoFeeder', 'EPOSCurrent', 'FeederAction3', 'ErrorFrame', 'NumberFuseEstimated', 'NumberEmptyFeeder', 'LightBarrierPassiveTaskDuration2', 'LightBarrierTaskDuration', 'LightBarrierActiveTaskDuration1b', 'LightBarrieActiveTaskDuration2', 'LightBarrierPassiveTaskDuration1b', 'FuseHeatSlope', 'Temperature', 'FuseTestResult', 'DurationPickToPick', 'FeederBackgroundIlluminationIntensity', 'SharpnessImage', 'DurationRobotFromFeederToTestBench', 'IntensityTotalImage', 'LightBarrierActiveTaskDuration1', 'Humidity', 'FeederAction4', 'ProcessCpuLoadNormalized', 'FuseCycleDuration', 'SmartMotorSpeed']
    elif label == 7:
        return ['ProcessMemoryConsumption', 'FuseCycleDuration', 'FeederAction4', 'NumberFuseEstimated', 'DurationPickToPick', 'LightBarrierActiveTaskDuration1', 'Temperature', 'FuseHeatSlopeNOK', 'SmartMotorPositionError', 'ValidFrameOptrisPIIRCamera', 'ProcessCpuLoadNormalized', 'FeederAction2', 'FusePicked', 'VacuumValveClosed', 'FeederBackgroundIlluminationIntensity', 'FeederAction3', 'DurationRobotFromFeederToTestBench', 'FuseOutsideOperationalSpace', 'Pressure', 'TemperatureThermoCam', 'EPOSPosition', 'TotalCpuLoadNormalized', 'CpuTemperature', 'DurationTestBenchClosed', 'TotalMemoryConsumption', 'FuseHeatSlopeOK', 'SharpnessImage', 'EPOSCurrent', 'FeederAction1', 'FuseHeatSlope', 'Vacuum', 'DurationRobotFromTestBenchToFeeder', 'NumberFuseDetected', 'Humidity', 'LightBarrieActiveTaskDuration2', 'NumberEmptyFeeder', 'LightBarrierTaskDuration', 'LightBarrierPassiveTaskDuration2', 'LightBarrierPassiveTaskDuration1b', 'LightBarrierActiveTaskDuration1b', 'LightBarrierPassiveTaskDuration1', 'ErrorFrame', 'IntensityTotalImage', 'ValidFrame', 'EPOSVelocity', 'IntensityTotalThermoImage', 'FuseIntoFeeder', 'FuseTestResult', 'VacuumFusePicked', 'SmartMotorSpeed']
    elif label == 9:
        return ['SmartMotorSpeed', 'VacuumValveClosed', 'ErrorFrame', 'FuseHeatSlopeNOK', 'EPOSPosition', 'EPOSVelocity', 'VacuumFusePicked', 'Vacuum', 'ValidFrameOptrisPIIRCamera', 'DurationRobotFromTestBenchToFeeder', 'LightBarrierActiveTaskDuration1', 'NumberFuseDetected', 'FusePicked', 'Temperature', 'Pressure', 'ProcessCpuLoadNormalized', 'TotalMemoryConsumption', 'FuseHeatSlope', 'CpuTemperature', 'FuseCycleDuration', 'EPOSCurrent', 'ValidFrame', 'FeederAction1', 'FeederAction2', 'FeederBackgroundIlluminationIntensity', 'Humidity', 'ProcessMemoryConsumption', 'NumberFuseEstimated', 'FeederAction3', 'FuseIntoFeeder', 'FuseTestResult', 'FeederAction4', 'FuseHeatSlopeOK', 'TemperatureThermoCam', 'TotalCpuLoadNormalized', 'FuseOutsideOperationalSpace', 'DurationTestBenchClosed', 'DurationPickToPick', 'NumberEmptyFeeder', 'LightBarrierTaskDuration', 'SharpnessImage', 'LightBarrierPassiveTaskDuration2', 'LightBarrierPassiveTaskDuration1b', 'LightBarrierActiveTaskDuration1b', 'IntensityTotalImage', 'LightBarrieActiveTaskDuration2', 'SmartMotorPositionError', 'IntensityTotalThermoImage', 'DurationRobotFromFeederToTestBench', 'LightBarrierPassiveTaskDuration1']
    elif label == 11:
        return ['FuseHeatSlope', 'FuseCycleDuration', 'CpuTemperature', 'FuseOutsideOperationalSpace', 'ValidFrame', 'FeederAction4', 'LightBarrierActiveTaskDuration1', 'ValidFrameOptrisPIIRCamera', 'FeederAction3', 'VacuumValveClosed', 'IntensityTotalImage', 'ErrorFrame', 'FeederAction2', 'NumberFuseEstimated', 'FeederAction1', 'IntensityTotalThermoImage', 'DurationTestBenchClosed', 'FuseIntoFeeder', 'ProcessMemoryConsumption', 'SharpnessImage', 'NumberEmptyFeeder', 'LightBarrierTaskDuration', 'LightBarrierPassiveTaskDuration2', 'LightBarrierActiveTaskDuration1b', 'LightBarrierPassiveTaskDuration1b', 'LightBarrieActiveTaskDuration2', 'FusePicked', 'Vacuum', 'LightBarrierPassiveTaskDuration1', 'SmartMotorPositionError', 'FeederBackgroundIlluminationIntensity', 'Pressure', 'NumberFuseDetected', 'FuseHeatSlopeOK', 'Temperature', 'FuseHeatSlopeNOK', 'ProcessCpuLoadNormalized', 'DurationPickToPick', 'TemperatureThermoCam', 'EPOSVelocity', 'FuseTestResult', 'EPOSPosition', 'EPOSCurrent', 'Humidity', 'DurationRobotFromFeederToTestBench', 'TotalMemoryConsumption', 'TotalCpuLoadNormalized', 'DurationRobotFromTestBenchToFeeder', 'VacuumFusePicked', 'SmartMotorSpeed']
    elif label == 12:
        return ['ValidFrame', 'FuseIntoFeeder', 'SharpnessImage', 'ErrorFrame', 'LightBarrieActiveTaskDuration2', 'NumberEmptyFeeder', 'LightBarrierTaskDuration', 'LightBarrierPassiveTaskDuration2', 'LightBarrierPassiveTaskDuration1b', 'LightBarrierActiveTaskDuration1b', 'FeederAction4', 'IntensityTotalThermoImage', 'EPOSVelocity', 'ValidFrameOptrisPIIRCamera', 'DurationRobotFromFeederToTestBench', 'NumberFuseDetected', 'NumberFuseEstimated', 'EPOSCurrent', 'TemperatureThermoCam', 'IntensityTotalImage', 'FuseHeatSlopeOK', 'DurationRobotFromTestBenchToFeeder', 'FeederAction2', 'FeederBackgroundIlluminationIntensity', 'TotalMemoryConsumption', 'FeederAction1', 'Temperature', 'FuseTestResult', 'ProcessCpuLoadNormalized', 'TotalCpuLoadNormalized', 'FeederAction3', 'FuseHeatSlope', 'VacuumFusePicked', 'FusePicked', 'LightBarrierActiveTaskDuration1', 'DurationTestBenchClosed', 'Humidity', 'Vacuum', 'EPOSPosition', 'CpuTemperature', 'FuseOutsideOperationalSpace', 'LightBarrierPassiveTaskDuration1', 'FuseHeatSlopeNOK', 'VacuumValveClosed', 'SmartMotorPositionError', 'ProcessMemoryConsumption', 'DurationPickToPick', 'Pressure', 'FuseCycleDuration', 'SmartMotorSpeed']
    else:
        return ['Vacuum', 'CpuTemperature', 'DurationPickToPick', 'TotalCpuLoadNormalized', 'NumberFuseDetected', 'FuseOutsideOperationalSpace', 'ErrorFrame', 'EPOSCurrent', 'FuseHeatSlopeOK', 'NumberEmptyFeeder', 'LightBarrierTaskDuration', 'LightBarrierPassiveTaskDuration2', 'LightBarrierPassiveTaskDuration1b', 'LightBarrierActiveTaskDuration1b', 'LightBarrieActiveTaskDuration2', 'FuseIntoFeeder', 'NumberFuseEstimated', 'ValidFrame', 'FeederAction2', 'FusePicked', 'EPOSVelocity', 'FeederBackgroundIlluminationIntensity', 'IntensityTotalImage', 'FeederAction4', 'FeederAction1', 'SharpnessImage', 'FeederAction3', 'DurationTestBenchClosed', 'FuseHeatSlope', 'ValidFrameOptrisPIIRCamera', 'Pressure', 'LightBarrierActiveTaskDuration1', 'DurationRobotFromTestBenchToFeeder', 'ProcessCpuLoadNormalized', 'FuseHeatSlopeNOK', 'DurationRobotFromFeederToTestBench', 'VacuumFusePicked', 'FuseTestResult', 'SmartMotorSpeed', 'Temperature', 'TotalMemoryConsumption', 'Humidity', 'FuseCycleDuration', 'ProcessMemoryConsumption', 'LightBarrierPassiveTaskDuration1', 'VacuumValveClosed', 'TemperatureThermoCam', 'EPOSPosition', 'IntensityTotalThermoImage', 'SmartMotorPositionError']
    

In [None]:
def load_pickle_files():
    
    with open("models_to_submit.pkl", "rb") as input_file:
        l_pickles = pickle.load(input_file)
        
    fields_dict = l_pickles[0]
    sensor_list = l_pickles[1]
    scaler = l_pickles[2]
    lda = l_pickles[3]
    model4 = l_pickles[4]
    model = l_pickles[5]
                
    return fields_dict, sensor_list, scaler, lda, model4, model

In [None]:
#Test Classification is the prototype of the function that each team must develop to classify new data
#This function must handle all the operation to: read in a streaming order the input file, make the earlier possibile classification, return the required data
#Input: 
# - Folder Name: The name of the folder where the experiment file is stored
# - Experiment: The experiment name that must be read 
#Output:
# - Predicted Label: the label predicted by the classifier
# - Time for classification: how much time of the input data was required to perform the classification task
# - Ranking: The Features ranked according to the team solution

def TestClassification(FolderName, Experiment):
          
    Label = ""
    Time = -1
    Ranking = []
    
    
    ws = 40
    
    fields_dict, sensor_list, scaler, lda, model, _ = load_pickle_files()
    
    df = load_data_file(FolderName, Experiment, fields_dict)
    # print(df.isnull().sum().any())
    
    for f in fields_dict:
        name = fields_dict[f]['name']
        fields = fields_dict[f]['fields']

        # print("\nname:", name, "fields:", fields)
        df_ = df.groupby(["class", "run"]).apply(fill_nan_values, name, fields)
        df_.reset_index(drop=True, inplace=True)
        df[fields] = df_[fields]
        
    df = df[sensor_list + ["class", "run"]]
    df = df.rename(columns={'run': 'runId'})
    
    X_test_df = df[sensor_list + ["class", "runId"]].copy()
    
    scaler_cols = sensor_list.copy()  # list(set(sensor_list).difference(["class", "runId"]))

    scaler_data_ts = scaler.transform(X_test_df[scaler_cols])
    scaler_data_ts = pd.DataFrame(scaler_data_ts, index=X_test_df.index, columns=scaler_cols)
    X_test_df = pd.concat([X_test_df[["class", "runId"]], scaler_data_ts], axis=1)

    X_test, y_test, runList_test, l_len_runs_test = create_datasets(X_test_df, ws, sensor_list)
    X_test = lda.transform(X_test)
    
    y_pred = model.predict(X_test)
    
    #Outputs
    Label, Time = get_optimum_point_degradation(y_pred, ws)
    Ranking = get_rank_class(Label)

    return Label, Time, Ranking

In [None]:
#The Record Performance function store the information of the performance achieved in the 1st run of the classification
def RecordPerformance(Experiment, Label, Time, Ranking):

    if not os.path.exists('First'):
        os.makedirs('First')
    
    PerformanceOutput = open("First/%s.csv"%Experiment,"w")
    PerformanceOutput.write("Experiment;Label;Time;Ranking\n")
    PerformanceOutput.write(Experiment+";"+str(Label)+";"+str(Time)+";"+str(Ranking)+"\n")
    PerformanceOutput.close()
    
    return

#The CutExperiment function is used to cut the input experiment in the time order. For each experiment, the cut is performed accoring to the time to classification declared by the team 
def CutExperiment(FolderName, Experiment, Time):

    if not os.path.exists('Cut'):
        os.makedirs('Cut')
    
    data = pd.read_csv(FolderName+"/%s.csv"%Experiment,sep=",")  

    df = pd.DataFrame(columns = ["c1","c2"])
    
    for i in range(len(data)):
        field = data.iloc[i][0]
        records = eval(data.iloc[i][1])[:Time]

        df = df.append({"c1": field, "c2": records}, ignore_index=True)    
    
    columns =list(data.columns)
    df.columns = ["",columns[1]]
    df.to_csv("Cut/%s.csv"%Experiment,index=False)
    return

#ComparePerfomance check if the team achieved the same performance in the 1st and in the 2nd run 
def ComparePerformance(Experiment,Label, Time, Ranking):
    
    Performance = pd.read_csv("First/%s.csv"%Experiment,sep=";")        
        
    if(Performance["Label"].iloc[0]!=Label): return False
    if(Performance["Time"].iloc[0]!=Time): return False
    if(Performance["Ranking"].iloc[0]!=str(Ranking)): return False
        
    return True

#GetWorst function returns the worst performance in case the 1st and 2nd run performance does not match
def GetWorst(Experiment, Label, Time, Ranking):
    
    Performance = pd.read_csv("First/%s.csv"%Experiment,sep=";")  
    
    if(Performance["Label"].iloc[0]!=Label): return Performance["Label"].iloc[0], -1, Performance["Ranking"].iloc[0]
    if(Performance["Time"].iloc[0]!=Time): return Performance["Label"].iloc[0], Performance["Time"].iloc[0], Performance["Ranking"].iloc[0]
    if(Performance["Ranking"].iloc[0]!=str(Ranking)): return Performance["Label"].iloc[0], Performance["Time"].iloc[0], Performance["Ranking"].iloc[0]
    
    return

#Logperformance function stores the final performance. Only this performance will be used to compute the Penalty score of each team
def LogPerformance(Experiment,Label, Time, Ranking):

    if not os.path.exists('Results'):
        os.makedirs('Results')
        
    PerformanceOutput = open("Results/%s.csv"%Experiment,"w")
    PerformanceOutput.write("Experiment;Label;Time;Ranking\n")
    PerformanceOutput.write(Experiment+";"+str(Label)+";"+str(Time)+";"+str(Ranking)+"\n")
    PerformanceOutput.close()
    
    return

In [None]:
#Example of the validation pipleline by using a single experiment.
#Data/ is the folder where the experiment is stored
#class_0_0_data is the experiment name
#Cut/ is the folder where only the cut experiment will be saved

def main():
    
    FolderName = "Data_Test/"
    Experiment = "class_ 4_4_data"
    Label, Time, Ranking = TestClassification(FolderName, Experiment)
    
    RecordPerformance(Experiment, Label, Time, Ranking)
    CutExperiment(FolderName,Experiment,Time)
     
    FolderName = "Cut/"
    Label, Time, Ranking = TestClassification(FolderName,Experiment)
    
    Equal = ComparePerformance(Experiment,Label, Time, Ranking)
    print("Equal:",  Equal, Label, Time, Ranking)
    if(Equal==False):
        Label, Time, Ranking = GetWorst(Experiment,Label, Time, Ranking)
     
    LogPerformance(Experiment,Label, Time, Ranking)
    return 
    
    
main()