In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score
from sklearn.model_selection import train_test_split

np.random.seed(0)

In [2]:
data_dir = Path('./Data_Files')

In [3]:
df1 = pd.read_csv(data_dir / "air_Ambient.csv")
df2 = pd.read_csv(data_dir / "Noair_NoAmbient.csv")
df3 = pd.read_csv(data_dir / "Noair_Ambient.csv")
df4 = pd.read_csv(data_dir / "air_NoAmbient.csv")

In [4]:
def convert(df: pd.DataFrame, frame_size=128):
    x = np.asarray(df.iloc[:, 0]).reshape(-1, frame_size)
    x = x[x.shape[0]//10:-x.shape[0]//10]
    return x, np.zeros(x.shape[0], dtype=int)

In [5]:
X1, y1 = convert(df1)
X2, y2 = convert(df2)
X3, y3 = convert(df3)
X4, y4 = convert(df4)

y1 = y1 + 1
y4 = y4 + 1

In [6]:
X = np.concatenate((X1, X2, X3, X4), axis=0)
y = np.concatenate((y1, y2, y3, y4), axis=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print(X.shape)
print(y.shape)
classifier = LogisticRegression(max_iter=10000).fit(X_train, y_train)

(226, 128)
(226,)


In [7]:
classifier.predict(X[0:1])

array([1])

In [8]:
classifier.predict_proba(X[0:1])

array([[0., 1.]])

In [9]:
y_pred = classifier.predict(X_test)

In [10]:
print("Accuracy percentage: ", 100*np.mean(y_pred == y_test))

Accuracy percentage:  100.0


In [11]:
print(np.hstack((classifier.intercept_[:,None], classifier.coef_)))

[[-7.19619184e+01  1.15702212e-03  8.24275669e-03  1.40712255e-02
   5.98322305e-03  1.07759710e-02 -4.71157753e-03 -1.43750940e-03
   6.90747562e-03 -1.80718460e-03 -5.02313115e-03 -1.83214160e-03
   1.35365263e-03 -3.58612915e-04 -2.50431520e-03  1.26071142e-04
  -4.82416495e-04  9.81366415e-05  5.35582702e-04 -3.20204849e-04
   7.32020595e-04 -9.66206477e-05  3.00147616e-04 -4.26213586e-04
   2.87308376e-04  3.08072640e-04  5.78075586e-04  4.17600054e-04
  -2.92883711e-04  5.36949695e-04 -7.81274796e-06  7.01706539e-04
   3.88033250e-05  8.58381112e-04 -2.03413665e-05 -8.39895860e-05
  -2.63793086e-04  1.02961176e-04 -1.97085578e-04 -3.85708356e-04
  -3.71554910e-04  7.52171270e-04  2.30205594e-04  2.20566018e-04
   4.12557100e-05  1.43635471e-04  1.89159565e-04  3.08086016e-04
   3.10510338e-04 -3.71779917e-05  6.65191449e-04  5.37045820e-04
   6.75459512e-04  3.23462262e-04  2.90311488e-04  3.75885284e-04
   6.10743826e-04  4.29006792e-05  3.54109089e-04  3.66908123e-04
   3.30759

In [12]:
from sklearn.metrics import mean_squared_error
print("MSE: ", mean_squared_error(y_test, y_pred))

MSE:  0.0


In [13]:
# Confusion Matrix
print("Confusion Matrix: ",confusion_matrix(y_test, y_pred))

# Accuracy
print("Accuracy Score: ", accuracy_score(y_test, y_pred))

# Recall
print("Recall: ", recall_score(y_test, y_pred, average=None))

# Precision
print("Precision: ",precision_score(y_test, y_pred, average=None))

#F1
print("F1 Score: ",f1_score(y_test, y_pred, average=None))

Confusion Matrix:  [[28  0]
 [ 0 18]]
Accuracy Score:  1.0
Recall:  [1. 1.]
Precision:  [1. 1.]
F1 Score:  [1. 1.]
