In [14]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [1]:
# Training Data preprocessing
with open("data/training_data.txt", "r") as f, open("data/training_data_new.txt", "w") as g:
    for line in f:
        line = line.strip()
        line = ' '.join(line.split())
        g.write(line + "\n")

In [19]:
# load trainging data
data=pd.read_csv('data/training_data_new.txt', sep=' ')

data.drop(data.columns[0], axis=1, inplace=True)

data['activity'] = pd.to_numeric(data['activity'])

data['activity'] = data['activity'].apply(lambda x: x if x <= 7 else 7)

# print(data.head(5))  # print the first 5 rows of the data
print(data['activity'].unique())

[5 7 4 6 1 3 2]


In [20]:
# Spliting training and testing data
X, y = data.iloc[:, 1:], data.iloc[:, 0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
# define base learners
base_learners = [
    ('knn', KNeighborsClassifier()),
    ('dt', DecisionTreeClassifier()),
    ('svm', SVC(kernel='linear', probability=True)),
    ('rf', RandomForestClassifier())
]

# define meta learner
meta_learner = LogisticRegression()

# create and fit stacking classifier
stacking_clf = StackingClassifier(estimators=base_learners, final_estimator=meta_learner)

# define parameter grid
param_grid = {
    'rf__n_estimators': [10, 50, 100],
    'rf__max_depth': [None, 10, 20],
    'svm__C': [0.1, 1, 10],
    'svm__kernel': ['linear', 'rbf']
}

# using grid search to find the best parameters
grid_search = GridSearchCV(estimator=stacking_clf, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

# print the best parameters
print(f'Best parameters: {grid_search.best_params_}')

# use the best parameters to predict
y_pred = grid_search.predict(X_test)


Best parameters: {'rf__max_depth': None, 'rf__n_estimators': 100, 'svm__C': 10, 'svm__kernel': 'linear'}


In [22]:
# caculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# calculate confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("confusion matrix:")
print(cm)

Accuracy: 0.99
confusion matrix:
[[251   0   0   0   0   0   0]
 [  0 205   0   0   0   0   0]
 [  0   0 216   0   0   0   0]
 [  0   0   0 262   6   0   1]
 [  0   0   0   5 280   0   0]
 [  0   0   0   0   0 266   1]
 [  1   0   0   0   0   0  60]]


In [23]:
# Testing Data preprocessing
with open("data/test_data.txt", "r") as f, open("data/test_data_new.txt", "w") as g:
    for line in f:
        line = line.strip()
        line = ' '.join(line.split())
        g.write(line + "\n")

In [24]:
# load test data
dt=pd.read_csv('data/test_data_new.txt', sep=' ')

dt.drop(dt.columns[0], axis=1, inplace=True)

# print the first 5 rows of the data
print(dt.head(5))  

         F1        F2        F3        F4        F5        F6        F7  \
0  0.030914 -0.008927  0.040382 -0.938504 -0.944626 -0.759334 -0.952398   
1  0.042548  0.001079 -0.026236 -0.975516 -0.977502 -0.960146 -0.986694   
2  0.038297 -0.011660 -0.025643 -0.993922 -0.979215 -0.973030 -0.994298   
3  0.036205 -0.018148 -0.025240 -0.994845 -0.981534 -0.976175 -0.995169   
4  0.038034 -0.013437 -0.032899 -0.993955 -0.977493 -0.984290 -0.994006   

         F8        F9       F10  ...      F552      F553      F554      F555  \
0 -0.950281 -0.802483 -0.757099  ...  0.122830 -0.345684 -0.709087  0.006462   
1 -0.978983 -0.966820 -0.757099  ... -0.314688 -0.142804 -0.600867 -0.083495   
2 -0.980535 -0.977508 -0.799005  ...  0.114730 -0.209525 -0.645500 -0.034956   
3 -0.983020 -0.980785 -0.798401  ...  0.164515 -0.359352 -0.738474 -0.017067   
4 -0.977354 -0.985899 -0.798401  ... -0.056085 -0.544467 -0.846150 -0.002223   

       F556      F557      F558      F559      F560      F561  
0  0

In [29]:
y_new_pred = grid_search.predict(dt)

numpy.ndarray

In [30]:
np.savetxt("results/multiclass_re0.txt", y_new_pred,fmt ='%d')

In [3]:
import platform
import sys
import importlib

# print the operation system info
os_info = platform.platform()
print(f"OS info: {os_info}")

# print the version of python
python_version = sys.version
print(f"Python version: {python_version}")

OS info: Windows-10-10.0.22000-SP0
Python version: 3.10.9 | packaged by conda-forge | (main, Jan 11 2023, 15:15:40) [MSC v.1916 64 bit (AMD64)]
Installed packages:
asttokens==2.2.1
backcall==0.2.0
brotlipy==0.7.0
certifi @ file:///C:/b/abs_85o_6fm0se/croot/certifi_1671487778835/work/certifi
cffi @ file:///C:/b/abs_49n3v2hyhr/croot/cffi_1670423218144/work
charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
colorama @ file:///C:/b/abs_a9ozq0l032/croot/colorama_1672387194846/work
comm==0.1.2
conda==23.1.0
conda-content-trust @ file:///C:/Windows/TEMP/abs_4589313d-fc62-4ccc-81c0-b801b4449e833j1ajrwu/croots/recipe/conda-content-trust_1658126379362/work
conda-package-handling @ file:///C:/b/abs_fcga8w0uem/croot/conda-package-handling_1672865024290/work
conda_package_streaming @ file:///C:/b/abs_0e5n5hdal3/croot/conda-package-streaming_1670508162902/work
cryptography @ file:///C:/b/abs_b7d7drzbky/croot/cryptography_1673298763653/work
debugpy==1.6.6
decorator=