In [6]:
import numpy as np
import pandas as pd
import os
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [7]:
def compute_time_domain_features(signal):
    features = []
    features.append(np.mean(signal))  # mean
    features.append(np.std(signal))  # std_dev
    features.append(np.var(signal))  # variance
    features.append(np.max(signal))  # max
    features.append(np.min(signal))  # min
    features.append(np.max(signal) - np.min(signal))  # range
    features.append(np.sqrt(np.mean(signal**2)))  # rms
    features.append(pd.Series(signal).kurtosis())  # kurtosis
    features.append(pd.Series(signal).skew())  # skewness
    features.append((np.diff(np.sign(signal)) != 0).sum())  # zero_crossings
    return features

def process_files_in_directory(directory):
    features_list = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                data = pd.read_csv(file_path)
                for column in data.columns:
                    signal = data[column].values
                    features = compute_time_domain_features(signal)
                    features_list.append(features)
    return features_list

def main(data_directory):
    all_features = []
    labels = []
    class_label = 0
    for subdir in sorted(os.listdir(data_directory)):
        subdir_path = os.path.join(data_directory, subdir)
        if os.path.isdir(subdir_path):
            class_features = process_files_in_directory(subdir_path)
            all_features.extend(class_features)
            labels.extend([class_label] * len(class_features))
            class_label += 1
    
    column_names = [
        'mean', 'std_dev', 'variance', 'max', 'min', 'range',
        'rms', 'kurtosis', 'skewness', 'zero_crossings'
    ]
    features_df = pd.DataFrame(all_features, columns=column_names)
    labels_df = pd.DataFrame(labels, columns=['class_label'])
    
    return features_df, labels_df

In [8]:
features_df, labels_df = main('data')

X = features_df.values
y = labels_df.values.flatten()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
clf = svm.SVC()
clf.fit(X_train, y_train)

In [10]:
y_pred = clf.predict(X_test)

In [11]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Процент правильних відповідей: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1 * 100:.2f}%")

Процент правильних відповідей: 91.16%
Precision: 91.56%
Recall: 91.16%
F1 Score: 90.11%


In [12]:
clf_rf = RandomForestClassifier(random_state=42)
clf_rf.fit(X_train, y_train)

In [13]:
y_pred_rf = clf_rf.predict(X_test)

In [14]:
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted', zero_division=1)
recall_rf = recall_score(y_test, y_pred_rf, average='weighted', zero_division=1)
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')

print(f"Процент правильних відповідей (Random Forest): {accuracy_rf * 100:.2f}%")
print(f"Precision (Random Forest): {precision_rf * 100:.2f}%")
print(f"Recall (Random Forest): {recall_rf * 100:.2f}%")
print(f"F1 Score (Random Forest): {f1_rf * 100:.2f}%")

Процент правильних відповідей (Random Forest): 98.68%
Precision (Random Forest): 98.69%
Recall (Random Forest): 98.68%
F1 Score (Random Forest): 98.67%
