# Data Exploration

### Visualization of relationships between variables 

To Do list:
- .csv for Feb train 2 pressure data
- .csv August train 1 flow, pressure, and power data
- add streamlit and plotly to the flex_desal environment
- Have drop down for dates and variables to create a plot and display the average value of the vars over the time period


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

In [None]:
# Load Data
Flow_data = pd.read_csv("WRD_flow_data_feb_2021.csv")
Power_data = pd.read_csv("WRD_power_data_feb_2021.csv")

In [None]:
print('-'*30)
print("Valid X and Y data value Choices")
print('-'*30)

print('-'*30)
print("Flow Data")
print('-'*30)
for col in Flow_data.columns:
    print(col)
print('-'*30)
print("Power Data")
print('-'*30)
for col in Power_data.columns:
    print(col)

# Merge Data into one dataframe for easy plotting
Data = pd.merge(Flow_data,Power_data,how='outer')

# Plotting

In [None]:
# User input to define x and y data
x_data_name = "RO_total_permeate_flow_gpm"
y_data_name_list = ["UV_gpm"]
plot_title = "Total Permeate Flow vs. Total UV Flow"

### Adding Derived Quantities to plot

In [None]:
# Flowrate to both UV units
Data["UV_gpm"] = (Data["UV1_mgd"] + Data["UV2_mgd"]) * 1e6 / 24 / 60

In [None]:
# Create Plot
x_data = Data[x_data_name]
for y_data_name in y_data_name_list:
    y_data = Data[y_data_name]
    figure = plt.plot(x_data,y_data,'o',label=y_data_name)
plt.title(plot_title)
plt.xlabel(x_data_name)

plt.ylabel("UV_total_flow_gpm")
x = np.linspace(0,10000)
plt.plot(x,x,"-k",label="y=x")
#Adjust Axes as needed
# plt.xlim(1400,2700)
# plt.ylim(0,1)
plt.legend(loc='upper left')
#Export Figure
plt.savefig(plot_title+".png")


## Next plot: Specific Energy of RO of a train

In [None]:
# Specific Energy of RO 

# User input to define x and y data
x_data_name = "RO_train1_permeate_flow_gpm"
y_data_name_list = ["SEC_kWh/m3"]
plot_title = "SEC vs. Permeate Flow - Train 1"

# Power from the three pumps / volumetric flowrate of the permeate
Data['RO_train1_permeate_flow_gpm'] = Data['RO_train1_s1_permeate_flow_gpm'] + Data['RO_train1_s2_permeate_flow_gpm'] + Data['RO_train1_s3_permeate_flow_gpm']
Data["SEC_kW/gpm"] = (Data["PRO_feed_pump1_kW"] + Data["PRO_interstage_pump1_kW"] + Data["TSRO_feed_pump1_kW"]) / Data['RO_train1_permeate_flow_gpm']
Data["SEC_kWh/m3"] = Data["SEC_kW/gpm"] * 264 /60

In [None]:
# Create Plot
x_data = Data[x_data_name]
for y_data_name in y_data_name_list:
    y_data = Data[y_data_name]
    figure = plt.plot(x_data,y_data,'o',label=y_data_name)
plt.title(plot_title)
plt.xlabel(x_data_name)

plt.ylabel("Specific Energy Consumption (kWh/m^3)")
#Adjust Axes as needed
plt.xlim(1400,2700)
plt.ylim(0,1)
plt.legend(loc='upper left')
#Export Figure
# plt.savefig(plot_title+".png")


### Feed flow over time

In [None]:
# User input to define x and y data
x_data_name = "Date Time"
y_data_name_list = ["RO_total_feed_flow_gpm"]
plot_title = "Plant Feed vs. Time"

# No derived quantities

In [None]:
# Create Plot
x_data = Data[x_data_name]
x_data = np.arange(0,len(x_data))
figure,ax = plt.subplots()
for y_data_name in y_data_name_list:
    y_data = Data[y_data_name]
    plt.plot(x_data,y_data,'o',label=y_data_name)
plt.title(plot_title)
plt.xlabel("Minute of Feb. 2021")
# ax.set_xticklabels([])
# plt.autofmt_xdate()
plt.ylabel(y_data_name_list[0])
# Adjust Axes as needed
# plt.xlim(1400,2700)
# plt.ylim(0,1)
# plt.legend(loc='upper left')
#Export Figure
plt.savefig(plot_title+".png")


### Total Power vs. Flowrate

In [None]:
# User input to define x and y data
plot_title = "Plant Power vs. Feed Flowrate"
x_data_name = "RO_total_feed_flow_gpm"
y_data_name_list = ["plant_power_demand_total_kW"]

# No derived quantities

In [None]:
# Create Plot
x_data = Data[x_data_name]
figure,ax = plt.subplots()
for y_data_name in y_data_name_list:
    y_data = Data[y_data_name]
    plt.plot(x_data,y_data,'o',label=y_data_name)
plt.title(plot_title)
plt.xlabel(x_data_name)
plt.ylabel(y_data_name_list[0])
# Adjust Axes as needed
# plt.xlim(1400,2700)
# plt.ylim(0,1)
# plt.legend(loc='upper left')
#Export Figure
plt.savefig(plot_title+".png")

### Pump Efficiency

In [None]:
# User input to define x and y data
plot_title = "PRO Feed Pump Efficiency vs. Feed Flowrate"
x_data_name = "PRO_feed_pump_effeciency_train1"
y_data_name_list = ["plant_power_demand_total_kW"]

# No derived quantities