In [None]:
import numpy as np
from glob import glob
import pandas as pd
import csv
from tsfresh import extract_features
from tsfresh.utilities.distribution import MultiprocessingDistributor

files = glob('data/*.csv')
totals = ['Total_PorosityQuantity', 'Total_PorosityQuality', 'Total_UnfilledZones', 'Total_FillingQuality', 'TOTAL_QUALITY']

df_list = list()
target = list()

for i, file in enumerate(files):
    df = pd.read_csv(file)

    if df.iloc[:,-1].unique()[0] == target_value:
        df['id'] = i
        target.append(df.TOTAL_QUALITY.unique()[0])
        df = df.drop(axis=1, columns=totals)   

        consta = [col for col in df if col.endswith(('K1', 'K2', 'K3'))]
        series = [col for col in df if col.endswith(('Time', 'id', 'Flow rate', 'Pressure'))]  

        distributor = MultiprocessingDistributor(n_workers=8, disable_progressbar=True, progressbar_title="Feature Extraction")
        extracted_features = extract_features(df[series], column_id='id', column_sort='Time', distributor=distributor)

        for j in consta:
            extracted_features[j] = df[j].unique()[0] 

        df_list.append(extracted_features)   



df = pd.concat(df_list)
df.head()        

# TS-Featurizer

In [None]:
import numpy as np
import pandas as pd
import os
from glob import glob
import multiprocessing
from ts_featurizer.base import TimeSeriesFeaturizer

files = glob('D:\master\LIP\composite-analysis\data\*.csv')
totals = ['Total_PorosityQuantity', 'Total_PorosityQuality', 'Total_UnfilledZones', 'Total_FillingQuality', 'TOTAL_QUALITY']

jobs = multiprocessing.cpu_count()
print("CPU count: "+ str(jobs))

output_file = 'D:/master/LIP/composite-analysis/tmp/extracted_data_ts_featurizer.csv'

for i, file in enumerate(files):
    if not file.endswith('.csv'):
        continue

    print("Extracting file: " + str(i) + " (" + file + ")")
    df = pd.read_csv(file)
    df['id'] = i
    target = df.TOTAL_QUALITY.unique()[0]
    df = df.drop(axis=1, columns=totals)

    consta = [col for col in df if col.endswith(('K1', 'K2', 'K3'))]
    series = [col for col in df if col.endswith(('Time', 'id', 'Flow rate', 'Pressure'))]
    
    
    tseries = TimeSeriesFeaturizer()
    extracted_features = tseries.featurize(df[series], n_jobs=jobs)
    
    #extracted_features = extract_features(df[series], disable_progressbar=True, column_id='id', column_sort='Time', n_jobs=jobs)

    #for j in consta:
    #    extracted_features[j] = df[j].unique()[0]    
        
    extracted_features['target'] = target    
        
    if not os.path.isfile(output_file):
        extracted_features.to_csv(output_file)
    else:
        extracted_features.to_csv(output_file, mode='a', header=False)

print('Tsfresh succesfully finished')
    


CPU count: 8
Extracting file: 0 (D:\master\LIP\composite-analysis\data\0.csv)


-------------------------------------------------- Modeling started --------------------------------------------------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:19<00:00,  1.57it/s]


Extracting file: 1 (D:\master\LIP\composite-analysis\data\1.csv)


-------------------------------------------------- Modeling started --------------------------------------------------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:23<00:00,  1.29it/s]


Extracting file: 2 (D:\master\LIP\composite-analysis\data\10.csv)


-------------------------------------------------- Modeling started --------------------------------------------------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:20<00:00,  1.46it/s]


Extracting file: 3 (D:\master\LIP\composite-analysis\data\100.csv)


-------------------------------------------------- Modeling started --------------------------------------------------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:20<00:00,  1.44it/s]


Extracting file: 4 (D:\master\LIP\composite-analysis\data\1000.csv)


-------------------------------------------------- Modeling started --------------------------------------------------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:22<00:00,  1.35it/s]


Extracting file: 5 (D:\master\LIP\composite-analysis\data\10000.csv)


-------------------------------------------------- Modeling started --------------------------------------------------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:20<00:00,  1.46it/s]


Extracting file: 6 (D:\master\LIP\composite-analysis\data\10001.csv)


-------------------------------------------------- Modeling started --------------------------------------------------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:20<00:00,  1.44it/s]


Extracting file: 7 (D:\master\LIP\composite-analysis\data\10002.csv)


-------------------------------------------------- Modeling started --------------------------------------------------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:23<00:00,  1.27it/s]


Extracting file: 8 (D:\master\LIP\composite-analysis\data\10003.csv)


-------------------------------------------------- Modeling started --------------------------------------------------


  0%|                                                                                                                                        | 0/30 [00:00<?, ?it/s]