In [None]:
#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from scipy.signal import spectrogram

In [None]:
#input/output directories
base_path = r"C:\Users\lenovo\thesis\classification\csv"
output_path=  r"C:\Users\lenovo\thesis\classification\stats"

input_files = [
    output_path+"\\B007_1_123\\X123_DE_time.csv",      
    output_path+"\\B014_1_190\\B014_1_190X190_DE_time.csv",
    output_path+"\\B021_1_227\\B021_1_227X227_DE_time.csv",
    
    output_path+"\\OR007_6_1_136\\X136_DE_time.csv",
    output_path+"\\OR014_6_1_202\\OR014_6_1_202X202_DE_time.csv",
    output_path+"\\OR021_6_1_239\\OR021_6_1_239X239_DE_time.csv",

    output_path+"\\IR007_1_110\\X110_DE_time.csv",
    output_path+"\\IR014_1_175\\IR014_1_175X175_DE_time.csv",
    output_path+"\\IR021_1_214\\IR021_1_214X214_DE_time.csv",
    
]

output_directories = [
   output_path+"\\bopolar7",
   output_path+"\\bopolar14"
   output_path+"\\bopolar21"
   output_path+"\\orpolar7",
   output_path+"\\orpolar14"
   output_path+"\\orpolar21"
   output_path+"\\irpolar7",
   output_path+"\\irpolar14"
   output_path+"\\irpolar21"
]


In [None]:
#calculate mean,rms,max,variance,skewness,kurtosis
def calculate_statistical_features(data):
    mean = np.mean(data)
    rms = np.sqrt(np.mean(data**2))
    standard_deviation = np.std(data)
    maximum = np.max(data)
    
    variance = np.var(data)
    skewness = moment(data, moment=3) / np.power(variance, 3/2) if variance > 0 else 0
    kurtosis = moment(data, moment=4) / np.power(variance, 2) if variance > 0 else 0

    return mean, rms, standard_deviation, skewness, kurtosis, maximum


In [None]:
#save statistical features based on the splitted signals
def process_chunk(chunk_data, output_path):
    
    features = calculate_statistical_features(chunk_data)

    df = pd.DataFrame({
        'Mean': [features[0]],
        'RMS': [features[1]],
        'Standard Deviation': [features[2]],
        'Skewness': [features[3]],
        'Kurtosis': [features[4]],
        'Maximum': [features[5]]
    })

    df.to_csv(output_path, index=False)


In [None]:
#split the signals to samples with 4800 datapoints with 50% overlap
def process_dataset(input_file, expected_freq, output_directory, chunk_size=4800, overlap=0.5):
    fs = 48000

    dataset = pd.read_csv(input_file, header=None)
    data = dataset.iloc[:, 0].values 

    nsamples = len(data)
    overlaps = int(chunk_size * overlap)
    nchunks = (nsamples - chunk_size) // overlaps + 1

    os.makedirs(output_directory, exist_ok=True)

    for j in range(nchunks):
        startid = j * overlaps
        endid = startid + chunk_size
        chunk_data = data[startid:endid]

        file_name = f"chunk{j + 1}_polar_spectrum.png"
        output_path = os.path.join(output_directory, file_name)

        # create stats csv
        process_chunk(chunk_data, output_path)

In [None]:
for input_file, output_directory in zip(input_files, output_directories):
    process_dataset(input_file, output_directory, chunk_size=4800, overlap=0.5)