In [40]:
import pandas as pd
import sys

sys.path.append('../../')
from src.transform import load_json
from src.plot_helpers import plot_experiment_range

indoors_cols = load_json("../../json/locations.json")["wings_indoors"]

## Load Chromatography data

In [102]:
df_c = pd.read_csv('../../data/clean_data/hplc.csv',index_col=0,parse_dates=True)
df_c.rename(columns={'tvoc': 'hplc'}, inplace=True)

# in mg m3
df_c = df_c/1000

## Load sensor data

### ref sensor


In [103]:
df = pd.read_csv('../../data/clean_data/all_sensors.csv',index_col=0,parse_dates=True)

tvoc_full_mg = df[["tvoc_mg","sensor"]]
tvoc_full_mg.index.name = 'datetime'
tvoc_full_mg


Columns (6,25) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0_level_0,tvoc_mg,sensor
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-10-11 13:24:18,,inbiot
2024-10-11 13:33:57,,inbiot
2024-10-11 13:43:57,,inbiot
2024-10-11 13:44:34,,inbiot
2024-10-11 13:54:27,,inbiot
...,...,...
2024-11-05 16:17:00,0.06,ref_sensor
2024-11-05 16:18:00,0.09,ref_sensor
2024-11-05 16:19:00,0.09,ref_sensor
2024-11-05 16:20:00,0.07,ref_sensor


In [104]:
pivot_full = pd.DataFrame()

for col in tvoc_full_mg["sensor"].unique():
    
    pivot_full = pd.concat([pivot_full,tvoc_full_mg.loc[tvoc_full_mg['sensor'] == col].pivot_table(index='datetime', columns='sensor', values='tvoc_mg')],axis=1)

pivot_full.resample('240s').mean()['2024-10-16 05:00:43':'2024-10-20 23:56:17']

sensor,ref_sensor
datetime,Unnamed: 1_level_1
2024-10-16 05:04:00,7.4425
2024-10-16 05:08:00,7.4150
2024-10-16 05:12:00,7.4200
2024-10-16 05:16:00,7.4475
2024-10-16 05:20:00,7.4300
...,...
2024-10-20 23:40:00,8.3050
2024-10-20 23:44:00,8.3925
2024-10-20 23:48:00,8.3975
2024-10-20 23:52:00,8.5175


In [105]:
ref_30_mean = pivot_full.resample('30min').mean()

## Load thinnect

In [106]:
df_thinnect = pd.read_csv('../../data/thinnect_full.csv',index_col=0,parse_dates=True,low_memory=False)
df_thinnect.drop('sensor',axis=1,inplace=True)

df_thinnect.index = pd.to_datetime(df_thinnect.index)
df_thinnect = df_thinnect.sort_index()

df_thinnect.index.name = 'datetime'

df_thinnect.rename(columns={"tvoc_mg":"thinnect"},inplace=True)

df_thinnect['thinnect']['2024-10-29 11:13:00':'2024-10-29 14:58:00']


datetime
2024-10-29 11:15:00    0.031667
2024-10-29 11:30:00    0.027000
2024-10-29 11:45:00    0.043000
2024-10-29 12:00:00    0.034333
2024-10-29 12:15:00    0.049333
2024-10-29 12:30:00    0.051333
2024-10-29 12:45:00    0.096667
2024-10-29 13:00:00    0.100250
2024-10-29 13:15:00    0.138333
2024-10-29 13:30:00    0.144000
2024-10-29 13:45:00    0.163333
2024-10-29 14:00:00    0.133667
2024-10-29 14:15:00    0.120500
2024-10-29 14:30:00    0.098333
2024-10-29 14:45:00    0.128000
Name: thinnect, dtype: float64

## Analysis

In [107]:
sensors = pd.concat([ref_30_mean['2024-10-29 11:13:00':'2024-10-29 14:58:00'],df_thinnect['2024-10-29 11:13:00':'2024-10-29 14:58:00']["thinnect"]],axis=1)
full_df = pd.concat([sensors,df_c],axis=1)

In [108]:
full_df

Unnamed: 0_level_0,ref_sensor,thinnect,hplc
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-10-29 11:13:00,,,0.046236
2024-10-29 11:14:00,,,0.043901
2024-10-29 11:15:00,,0.031667,
2024-10-29 11:30:00,1.537667,0.027,
2024-10-29 11:45:00,,0.043,
2024-10-29 11:47:00,,,0.209815
2024-10-29 11:55:00,,,0.069578
2024-10-29 12:00:00,1.903,0.034333,
2024-10-29 12:15:00,,0.049333,
2024-10-29 12:30:00,3.290667,0.051333,


In [111]:
plot_experiment_range(df=full_df['2024-10-29 11:13:00':'2024-10-29 14:58:00'].resample('30min').mean(),col_names=['ref_sensor','thinnect','hplc'],start_date='2024-10-29 11:13:00',end_date='2024-10-29 14:58:00')