In [129]:
import pandas as pd
import numpy as np

In [130]:
csv_path = './content/gams_indoor.csv'
preprocessed_dataset_path = './content/preprocessed_dataset.csv'
labelled_dataset_path = "./content/labelled_dataset.csv"
df = pd.read_csv(preprocessed_dataset_path)
df.head()

Unnamed: 0.1,Unnamed: 0,ts,co2,humidity,pm10,pm25,temperature,voc
0,0,2016-11-21 00:47:03,358.0,66.0825,1.5086,0.105361,20.83,0.062
1,1,2016-11-21 00:48:03,355.666667,65.0375,1.537426,0.00995,21.01,0.062
2,2,2016-11-21 00:49:03,355.5,63.36,1.5086,0.01005,21.2,0.062
3,3,2016-11-21 00:50:03,355.333333,63.094167,1.482271,0.040822,21.37,0.062
4,4,2016-11-21 00:51:03,355.0,62.883333,1.473128,0.174353,21.49,0.062


In [131]:
labelsToDrop = ['voc']
df.drop(labelsToDrop, axis=1, inplace=True)
df.head()

Unnamed: 0.1,Unnamed: 0,ts,co2,humidity,pm10,pm25,temperature
0,0,2016-11-21 00:47:03,358.0,66.0825,1.5086,0.105361,20.83
1,1,2016-11-21 00:48:03,355.666667,65.0375,1.537426,0.00995,21.01
2,2,2016-11-21 00:49:03,355.5,63.36,1.5086,0.01005,21.2
3,3,2016-11-21 00:50:03,355.333333,63.094167,1.482271,0.040822,21.37
4,4,2016-11-21 00:51:03,355.0,62.883333,1.473128,0.174353,21.49


### Temperature labeling 

We expect that our operating room temperature conditions are ***comprised between 19°C and 26°C***

In [132]:
minTempC = 19
maxTempC = 26
df['temperature_label'] = df['temperature'].apply(lambda x : True if (x >= minTempC) & (x <= maxTempC) else False)
df.head()

Unnamed: 0.1,Unnamed: 0,ts,co2,humidity,pm10,pm25,temperature,temperature_label
0,0,2016-11-21 00:47:03,358.0,66.0825,1.5086,0.105361,20.83,True
1,1,2016-11-21 00:48:03,355.666667,65.0375,1.537426,0.00995,21.01,True
2,2,2016-11-21 00:49:03,355.5,63.36,1.5086,0.01005,21.2,True
3,3,2016-11-21 00:50:03,355.333333,63.094167,1.482271,0.040822,21.37,True
4,4,2016-11-21 00:51:03,355.0,62.883333,1.473128,0.174353,21.49,True


### Humidity labeling

We expect that our operating room humidity conditions are ***comprised between 45% and 65%***.

In [133]:
min_hum = 45
max_hum = 65
df['humidity_label'] = df['humidity'].apply(lambda x : True if (x >= min_hum) & (x <= max_hum) else False)
df.head()

Unnamed: 0.1,Unnamed: 0,ts,co2,humidity,pm10,pm25,temperature,temperature_label,humidity_label
0,0,2016-11-21 00:47:03,358.0,66.0825,1.5086,0.105361,20.83,True,False
1,1,2016-11-21 00:48:03,355.666667,65.0375,1.537426,0.00995,21.01,True,False
2,2,2016-11-21 00:49:03,355.5,63.36,1.5086,0.01005,21.2,True,True
3,3,2016-11-21 00:50:03,355.333333,63.094167,1.482271,0.040822,21.37,True,True
4,4,2016-11-21 00:51:03,355.0,62.883333,1.473128,0.174353,21.49,True,True


### Micro particles labeling 

We expect that our operating room micro particles conditions should be smaller than 0.56 for particles of 2.5μm and smaller than 1.76 for particles of 10μm.

In [134]:
max_pm25 = 0.56
max_pm10 = 1.76

df['pm25_label'] = df['pm25'].apply(lambda x : True if (x <= max_pm25) else False)
df['pm10_label'] = df['pm10'].apply(lambda x : True if (x <= max_pm10) else False)
df.head()

Unnamed: 0.1,Unnamed: 0,ts,co2,humidity,pm10,pm25,temperature,temperature_label,humidity_label,pm25_label,pm10_label
0,0,2016-11-21 00:47:03,358.0,66.0825,1.5086,0.105361,20.83,True,False,True,True
1,1,2016-11-21 00:48:03,355.666667,65.0375,1.537426,0.00995,21.01,True,False,True,True
2,2,2016-11-21 00:49:03,355.5,63.36,1.5086,0.01005,21.2,True,True,True,True
3,3,2016-11-21 00:50:03,355.333333,63.094167,1.482271,0.040822,21.37,True,True,True,True
4,4,2016-11-21 00:51:03,355.0,62.883333,1.473128,0.174353,21.49,True,True,True,True


In [135]:
min_co2 = 300
max_co2 = 380
df['co2_label'] = df['co2'].apply(lambda x : True if (x >= min_co2) & (x <= max_co2) else False)
df.head()

Unnamed: 0.1,Unnamed: 0,ts,co2,humidity,pm10,pm25,temperature,temperature_label,humidity_label,pm25_label,pm10_label,co2_label
0,0,2016-11-21 00:47:03,358.0,66.0825,1.5086,0.105361,20.83,True,False,True,True,True
1,1,2016-11-21 00:48:03,355.666667,65.0375,1.537426,0.00995,21.01,True,False,True,True,True
2,2,2016-11-21 00:49:03,355.5,63.36,1.5086,0.01005,21.2,True,True,True,True,True
3,3,2016-11-21 00:50:03,355.333333,63.094167,1.482271,0.040822,21.37,True,True,True,True,True
4,4,2016-11-21 00:51:03,355.0,62.883333,1.473128,0.174353,21.49,True,True,True,True,True


In [136]:
isISO5 = (df['temperature_label'] == True) & (df['humidity_label'] == True) & (df['pm10_label'] == True) & (df['pm25_label'] == True) & (df['co2_label'] == True)
df['ISO5'] = np.where(isISO5, True, False)
df.head()

Unnamed: 0.1,Unnamed: 0,ts,co2,humidity,pm10,pm25,temperature,temperature_label,humidity_label,pm25_label,pm10_label,co2_label,ISO5
0,0,2016-11-21 00:47:03,358.0,66.0825,1.5086,0.105361,20.83,True,False,True,True,True,False
1,1,2016-11-21 00:48:03,355.666667,65.0375,1.537426,0.00995,21.01,True,False,True,True,True,False
2,2,2016-11-21 00:49:03,355.5,63.36,1.5086,0.01005,21.2,True,True,True,True,True,True
3,3,2016-11-21 00:50:03,355.333333,63.094167,1.482271,0.040822,21.37,True,True,True,True,True,True
4,4,2016-11-21 00:51:03,355.0,62.883333,1.473128,0.174353,21.49,True,True,True,True,True,True


In [137]:
print(df[df['pm10_label'] == True].sum())

Unnamed: 0                                                  5652858057
ts                   2016-11-21 00:47:032016-11-21 00:48:032016-11-...
co2                                                    31196321.166667
humidity                                                4341611.949167
pm10                                                     124628.446106
pm25                                                      37054.165835
temperature                                                  1991290.5
temperature_label                                                77849
humidity_label                                                   69828
pm25_label                                                       63342
pm10_label                                                       85771
co2_label                                                        58928
ISO5                                                             36003
dtype: object


In [138]:
print(df[df['ISO5'] == True].sum())

Unnamed: 0                                                  2463723505
ts                   2016-11-21 00:49:032016-11-21 00:50:032016-11-...
co2                                                         11562981.5
humidity                                                1889265.883333
pm10                                                      56251.425186
pm25                                                       8772.348932
temperature                                                  801040.75
temperature_label                                                36003
humidity_label                                                   36003
pm25_label                                                       36003
pm10_label                                                       36003
co2_label                                                        36003
ISO5                                                             36003
dtype: object


In [None]:
df.to_csv(labelled_dataset_path)