In [1]:
from functions import *

In [2]:
params = read_parameters()
file_idx = params['file_idx']

parameters:
x_axis: time [m]
file_idx_uppaal: 0
file_idx: 0
tmp_idx: 0
log: True
reset_columns_when_OFF: True
drop_outliers: True
plot_scatterplot_matrix: False
use_default_arima_params: True
default_arima_params: [1, 1, 0]
include_arima_simulations_in_analysis: True
include_outliers: True
mark_outliers: True


In [3]:
in_folder = PATH_SENSORS_DATA_RAW_UF
cur_file, file_path = get_input_file(in_folder=in_folder, in_file_idx=file_idx, log=True)
out_folder = PATH_SENSORS_DATA_EXT_UF_V1
out_file_path = out_folder + cur_file
print(f"\noutput file:\n{out_file_path}")

file list:
0 /app/data/from_sensors/0_raw/UF/2024-07-11 tap water - reflux valve closed.csv
1 /app/data/from_sensors/0_raw/UF/2024-07-12 tap water - reflux valve open.csv
2 /app/data/from_sensors/0_raw/UF/2024-07-15 wastewater 0.500 gL - reflux valve open.csv
3 /app/data/from_sensors/0_raw/UF/2024-07-16 wastewater 0.125 gL - reflux valve open.csv
4 /app/data/from_sensors/0_raw/UF/2024-07-16 wastewater 0.250 gL - reflux valve open.csv

input file:
/app/data/from_sensors/0_raw/UF/2024-07-11 tap water - reflux valve closed.csv

output file:
/app/data/from_sensors/1_extended/UF/v1/2024-07-11 tap water - reflux valve closed.csv


In [4]:
cur_experiment = get_experiment_data(cur_file)
cur_membrane   = get_membrane_data(cur_experiment)

In [5]:
df = pd.read_csv(file_path)
# print([ type(c) for c in df.columns ])
df.rename(columns=UF_COLUMNS, inplace=True)
df['datetime'] = pd.to_datetime(df["date"] + " " + df["time"], format='%Y/%m/%d %H:%M:%S')
df['time span [s]'] = df['datetime'].diff().dt.total_seconds()
df['time span [s]'] = df['time span [s]'].apply(lambda x : 60 if (58 <= x and x <= 62) else x) # 1-2 secs errors are removed
# df['tank liters [L]'] = df['tank liters [%]'] * FEED_TANK_CAPACITY_LITERS / 100
df.loc[0, 'time span [s]'] = 0.0
DATE = df.loc[0, 'datetime'].date().isoformat()
df = df[change_column_index(df.columns.tolist(), 'TMP [bar]', 7)] 
df = df[change_column_index(df.columns.tolist(), 'datetime', 0)] 
df = df[change_column_index(df.columns.tolist(), 'time span [s]', 1)]
# df = df[change_column_index(df.columns.tolist(), 'tank liters [L]', 11)]
df = change_unit_measure(df)
df = df.drop(columns=['date', 'time', 'millisecond [ms]'])

In [6]:
df['initial feed concentration [g/L]'] = cur_experiment['feed concentration g/L'].values[0]

In [7]:
num_membrane_used = cur_experiment['number of membranes used'].values[0]
membrane_area = cur_membrane['area [m2]'].values[0]
A = membrane_area * num_membrane_used # m^2
# print(f"membrane: {cur_membrane['manufacturer'].values[0]}-{membrane_model}, number of membranes used: {num_membrane_used}, total area: {A} [m^2]")
df['area tot [m^2]'] = A

In [8]:
df['prs permeate + ATM [Pa]'] = (df['prs permeate [kPa]'] + PRS_ATM_kpa) * 1000
df['viscosity permeate [Pa s]'] = df.apply((lambda x : calc_viscosity(x['temperature [°C]'], pressure_Pa=x['prs permeate + ATM [Pa]'], element='Water')), axis=1)
df = df[change_column_index(df.columns.tolist(), 'viscosity permeate [Pa s]', 8)]
df = df.drop(columns=['prs permeate + ATM [Pa]'])

In [9]:
# [L / h]   = [m^2]    * [L / m^2 h]
# flow_prmt = area_tot * flux ==> flux = flow_prmt / area_tot
df['flux [L/m^2h]'] = df['flow permeate [L/h]'] / A

In [10]:
# PRS_IN = density [kg / m^3] * gravity [m / s^2] * height [m] = 997 * 9.81 * 1.0 = 9780,57 [Pa] = 9,78 [kPa]
df['prs input [kPa]'] = 10.0 #TODO add this data into machines.xlsx
df = df[change_column_index(df.columns.tolist(), 'prs input [kPa]', 2)]

In [11]:
# res_tot = res_mbn + res_conc_pol + res_fouling
# flux = prs_TMP / (viscosity * res_tot) ==> res_tot = prs_TMP / (viscosity_permeate * flux)
# [1/m]             =     [kPa]       / ([m^3 / m^2 s]                              * [kPa s])
df['res tot [1/m]'] = df['TMP [kPa]'] / ((df['flux [L/m^2h]'] / (1000.0 * 3600.0)) * (df['viscosity permeate [Pa s]'] / 1000.0))

In [12]:
df['viscosity permeate at 20°C [Pa s]'] = df.apply((lambda x : calc_viscosity(20, pressure_Pa=1000*(PRS_ATM_kpa+x['prs permeate [kPa]']), element='Water')), axis=1)
df['flux at 20° [L/m^2h]'] = df['flux [L/m^2h]'] * df['viscosity permeate [Pa s]'] / df['viscosity permeate at 20°C [Pa s]']

In [13]:
# add bool flag when machine is ON
df['is_ON'] = df['TMP [kPa]'] >= 5
# reset to zero flux, flow, and resistance columns when the machine is OFF (TMP < 5)
# if reset_columns_when_OFF:
#   for c in df.columns :
#         if re.match("^(flow)|(flux)|(res).*", c) :
#             #print(f" - {c}")
#           df[c] = df.apply(reset_cols_if_is_OFF, axis=1, args=('is_ON', c))

In [14]:
# add a time in minutes column
df['time [m]'] = df['time span [s]'].cumsum() / 60

In [15]:
# normalize flux at a constant TMP
df[f'flux at 20° TMP={CONST_TMP} [L/m^2h]'] = df['flux at 20° [L/m^2h]'] * (CONST_TMP / df['TMP [kPa]'])

In [16]:
# TODO
if False :
    df.loc[0, 'TMP group'] = 0
    for i in range(1, len(df)) :
        cur_group = df.loc[i-1, 'TMP group'] + (0 if df.loc[i, 'changed TMP'] == 0 else 1)
        df.loc[i, 'TMP group'] = cur_group
    print('added TMP groups')

In [17]:
# start from t=1
# df['time [m]'] = df['time [m]'] - df.loc[0, 'time [m]'] + 1

In [18]:
df.to_csv(out_file_path,index_label='index')

In [19]:
#############################
# EXPORT
# print(f"extended dataset exported as csv: {out_file}")
# df.to_csv(out_file, index=False)