<a href="https://colab.research.google.com/github/yohancsx/SmartMask/blob/main/micro_ml/BLE_Sense_SVM_Train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!apt-get -qq install xxd
!pip install pandas numpy matplotlib
!pip install scikit-learn
!pip install micromlgen


In [None]:
#imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from micromlgen import port
from sklearn.ensemble import RandomForestClassifier

In [None]:
#load all the data into seperate dataframes

#the name for the breathing data
breathingFile = "regular_breathing.csv"

#the name for the coughing data
coughingFile = "coughing.csv"

#the filename for the talking data
talkingFile = "talking.csv"

#the filename for the deep breathing data
deepBreathingFile = "deep_breathing.csv"

#the filename for the combined data
combinedFile = "mixed.csv"

#plot some of the data on the same axes
dfTalk = (pd.read_csv("/content/" + talkingFile, index_col=False, header=None, names = ["pressure"])).dropna(axis = 1)
dfBreathe = (pd.read_csv("/content/" + breathingFile, index_col=False, header=None, names = ["pressure"])).dropna(axis = 1)
dfCough = (pd.read_csv("/content/" + coughingFile, index_col=False, header=None, names = ["pressure"])).dropna(axis = 1)
dfDeepBreathe = (pd.read_csv("/content/" + deepBreathingFile, index_col=False, header=None, names = ["pressure"])).dropna(axis = 1)
dfMixed = (pd.read_csv("/content/" + talkingFile, index_col=False, header=None, names = ["pressure"])).dropna(axis = 1)

In [None]:
#plot some of the sample data
plt.rcParams["figure.figsize"] = (5,2)

#the indices
index = range(0, 200)

plt.plot(index, dfCough.head(200), 'g.', label='x', linestyle='solid', marker=',')
plt.title("Coughing Data")
plt.xlabel("Sample #")
plt.ylabel("Pressure")
plt.legend()
plt.show()

plt.plot(index, dfBreathe.head(200), 'r.', label='x', linestyle='solid', marker=',')
plt.title("Breathing Data")
plt.xlabel("Sample #")
plt.ylabel("Pressure")
plt.legend()
plt.show()

plt.plot(index, dfDeepBreathe.head(200), 'b.', label='x', linestyle='solid', marker=',')
plt.title("Deep Breathing Data")
plt.xlabel("Sample #")
plt.ylabel("Pressure")
plt.legend()
plt.show()

plt.plot(index, dfTalk.head(200), 'y.', label='x', linestyle='solid', marker=',')
plt.title("Talking Data")
plt.xlabel("Sample #")
plt.ylabel("Pressure")
plt.legend()
plt.show()



In [None]:
#prepare the datasets

#set the random seed
SEED = 1337
np.random.seed(SEED)
tf.random.set_seed(SEED)

#the list of breathing types to classify
BREATHING_TYPES = [
    "coughing",
    "breathing",
    "deepBreathing",
    "talking"
]

#the list of dataframes of the breathing types
BREATHING_DATA_LIST = [dfCough,dfBreathe,dfDeepBreathe,dfTalk]

#the raw number of samples per period
SAMPLES_PER_PERIOD = 200

NUM_TYPES = len(BREATHING_TYPES)

#create a one-hot encoded matrix that is used in the output
ONE_HOT_ENCODED_TYPES = np.eye(NUM_TYPES)


#input and putput files
inputs = []
outputs = []

#set to a number far greater than the maximum number of samples (or infinity)
minsamples = np.inf

#the maximum pressure value so data is between 0 and 1
maxpressure = 3000

#find the min number of samples, also the max value
for type_index in range(NUM_TYPES):
  #get the type data
  type_data = BREATHING_DATA_LIST[type_index]
  display(type_data.shape[0])
  if((type_data.shape[0] - SAMPLES_PER_PERIOD/2) < minsamples):
    minsamples = type_data.shape[0] - SAMPLES_PER_PERIOD/2
  
display("minimum samples:")
display(minsamples)

In [None]:
#Option 1: split the dataset into overlapping portions
distance_between_samples = 20

for type_index in range(NUM_TYPES): 
  #get the type data
  type_data = BREATHING_DATA_LIST[type_index]
  #get the tensors with the samples, start by getting the first sampling period until the last
  for i in range(int(SAMPLES_PER_PERIOD/2),int(minsamples),int(distance_between_samples)):
    tensor = []
    output = ONE_HOT_ENCODED_TYPES[type_index]
    for j in range(SAMPLES_PER_PERIOD):
      dataIndex = i + j - SAMPLES_PER_PERIOD/2
      tensor += [type_data['pressure'][dataIndex]/maxpressure]
  
    #append the data
    inputs.append(tensor)
    outputs.append(output)

inputs = np.array(inputs)
outputs = np.array(outputs)

display("input shape:")
display(inputs.shape)

display("output shape:")
display(outputs.shape)

In [None]:
#randomize and split inputs and outputs into training and test sets
num_inputs = len(inputs)
randomize = np.arange(num_inputs)
np.random.shuffle(randomize)

#swap the consecutive indexes (0, 1, 2, etc) with the randomized indexes
inputs = inputs[randomize]
outputs = outputs[randomize]

#split the recordings (group of samples) into three sets: training, testing and validation
TRAIN_SPLIT = int(0.6 * num_inputs)
TEST_SPLIT = int(0.2 * num_inputs + TRAIN_SPLIT)

inputs_train, inputs_test, inputs_validate = np.split(inputs, [TRAIN_SPLIT])
outputs_train, outputs_test, outputs_validate = np.split(outputs, [TRAIN_SPLIT])

In [None]:
#train
classifier = RandomForestClassifier(30, max_depth=10).fit(inputs_train, outputs_train)


In [None]:
#determine accuracy

In [None]:
#export
c_code = port(classifier, classmap=classmap)
print(c_code)