In [11]:
import seaborn as sns
import matplotlib.pyplot as plt
import dask.dataframe as dd
import pandas as pd
import smps
import plotly.express as px
import plotly.graph_objects as go
from matplotlib import pyplot
from smps.fit import LogNormal
import numpy as np
import plotly as py
sns.set("notebook", "ticks", palette='colorblind') 
%matplotlib inline
import statsmodels.api as sm

### Load MODULAIR-PM 1min data
Resample 6 mins

In [2]:
#Final file (PM reported)
#Define the start and stop periods for comparison
start = "2020-11-05 00:00"
stop = "2021-01-01 23:59"

folder = input_raw = '/Users/zahrashivji/Dropbox (GaTech)/Shivji/Final Data/Outdoor Files/'
df = dd.read_csv(
    folder + 'EST_Roof_Final.csv',
    parse_dates=['timestamp','timestamp_local']
).compute()

# Set the index to be the timestamp
df.set_index("timestamp", inplace=True) 

# Resample to a 6min time-base
df = df.resample("6min").mean()

# Change the timezone (Takes care of daylight savings time)
df.index = df.index.tz_convert('US/Eastern')

df.index = df.index + pd.Timedelta(hours=1)

# Keep only data between start and stop 
df = df[start:stop]

In [3]:
#Raw file (PM1 integrated)
df1 = dd.read_csv(
    folder + 'EST_Roof_Raw.csv',
    parse_dates=['timestamp','timestamp_local']
).compute()

# Set the index to be the timestamp
df1.set_index("timestamp", inplace=True) 

# Only keep un-flagged data
df1 = df1.query("flag == 0")

# Resample to a 5min time-base
df1 = df1.resample("6min").mean()

# Change the timezone (Takes care of daylight savings time)
df1.index = df1.index.tz_convert('US/Eastern')

df1.index = df1.index + pd.Timedelta(hours=1)

# Keep only data between start and stop 
df1 = df1[start:stop]

# Convert to an SMPS object
mod_pm = smps.AlphasenseOPCN3(
    data = df1.copy(),
    bin_labels = ["bin{}".format(i) for i in range(24)]
)

### Load SMPS data
Create SMPS object

In [4]:
df2 = pd.read_csv(
    folder + '20210225_SMPS_NumberDistributions_JCR.csv',
    skiprows = 23
)

def isfloat(str):
    try:
        float(str)
        return True
    except ValueError:
        return False

#Determine the total number of channels
n_channels = sum([isfloat(x) for x in df2.columns])

#Next determine the index of the first channel
channel0_idx = [i for i, x in enumerate([isfloat(x) for x in df2.columns]) if x][0]

#Convert to a datetime object
df2['timestamp']=df2.apply(lambda x: "{} {}".format(x['Date'],x['Start Time']),axis=1)
df2['timestamp']=df2['timestamp'].map(pd.to_datetime)

#Grab the bin diameters
midpoints = df2.columns[channel0_idx:channel0_idx+n_channels]
binlabels = ['bin{}'.format(i) for i in range(n_channels)]

#Rename the columns to bin<X>
df2.rename(columns=dict(zip(midpoints,binlabels)),inplace=True)

#Set the index for the data dataframe
df2.set_index('timestamp',inplace=True)

df2.index = df2.index.tz_localize('US/Eastern')

df2 = df2[start:stop].copy()

# Resample to a 5 min timebase
df2 = df2.resample('6min').mean()

# Build out a nx3 array of the bin boundaries
bins = smps.utils.make_bins(
    midpoints=midpoints,
    lb = df2['Lower Size (nm)'][0],
    ub = df2['Upper Size (nm)'][0],
    channels_per_decade=64,
)

#B uild a generic Particle Sizer Object 
obj = smps.GenericParticleSizer(
    data=df2.copy(),
    bins=bins,
    fmt='dn',
    dp_units='nm',
    bin_labels=binlabels
)

### Load Nephelometer 1 min data

In [5]:
# Define the start and stop periods for comparison
df3 = dd.read_csv(
    folder + 'EST_Roof_Raw.csv',
    parse_dates=['timestamp','timestamp_local']
).compute()

# Set the index to be the timestamp
df3.set_index("timestamp", inplace=True) 

# Only keep un-flagged data
df3 = df3.query("flag == 0")

# Resample to a 5min time-base
df3 = df3.resample("6min").mean()

# Change the timezone (Takes care of daylight savings time)
df3.index = df3.index.tz_convert('US/Eastern')

df3.index = df3.index + pd.Timedelta(hours=1)

# Keep only data between start and stop 
df3 = df3[start:stop]

In [14]:
tmp = obj.data[["Median (nm)", "Mean (nm)", "Mode (nm)", "Geo. Mean (nm)"]].copy() 
tmp["PM1_SMPS"] = obj.integrate(dmin=0, dmax=1., weight='mass', rho=1.65)
tmp["PM1_modpm"] = df['pm1']
tmp["PM1_neph"] = df3['pm1_env']

msk = tmp.where(tmp["PM1_SMPS"]<1)
msk = msk.dropna()

pms_std = msk.std()['PM1_neph']
modpm_std =msk.std()['PM1_modpm']

pms_slope = 0.86377 #from Igor
modpm_slope = 0.92358 #from Igor

# X_group = msk['PM1_SMPS']
# X_group = sm.add_constant(X_group)

# Y_group_pms = msk['PM1_neph']
# Y_group_modpm = msk['PM1_modpm']

# pms_ols = sm.OLS(Y_group_pms, X_group).fit()
# modpm_ols = sm.OLS(Y_group_modpm, X_group).fit()

# pms_ols.summary()

pms_LOD = 3*pms_std/pms_slope
modpm_LOD = 3*modpm_std/modpm_slope

modpm_LOD

1.0812588410038795

In [None]:
0.7619244072194026 pms

1.0812588410038795 modpm