# GOOGLE COLAB VERSION

# IMPORT DATA

In [None]:
#@title Input field test information and click the play button {display-mode: "form"}

#A short nickname for the field test
fieldTestLabel = "AKA Ocean City" #@param {type: "string"}

#Name of the location of the field test
fieldTestLocation = "Princess Royale Oceanfront Beach" #@param {type: "string"}

#The date of the field test
fieldTestDate = "2022-10-11" #@param {type: "date"}

#A short description of the overall field test purpose
fieldTestDescription = "Flying two kestrels together" #@param {type: "string"}

#Initials for members of the field test
fieldTestTeam = "Rich, Geoff, DJ" #@param {type: "string"}

#Initials of person/people analyzing the data
dataAnalyst = "DJ" #@param {type: "string"}

#Name of the Kestrel Device
deviceName = "Drexel Kestrel 5500 A" #@param {type: "string"}

#Nickname to identify the device on the plots
deviceNickName = "Aeropod" #@param {type: "string"}

#Name of folder to store the trimmed data
trimmedDataFolderName = "trimmed_data" #@param {type: "string"}

#Name of folder to store the preprocessed data
preprocessedDataFolderName = "preprocessed_data" #@param {type: "string"}

#Name of folder to store the generated plots
plotsFolderName = "plots" #@param {type: "string"}

###########################################################################################
import time as tm
from datetime import date
import os
from pathlib import Path

#Information to indicate the history of changes made
change_history = []

#Convert fieldTestDate to a datetime object for easy manipulations
try:
    fieldTestDate = date.fromisoformat(fieldTestDate)
    fieldTestDate = fieldTestDate.strftime("%A, %B %d, %Y")
except ValueError as e:
    print(e)
except Exception as e:
    print("Something went wrong. Restart the notebook")

#Field test parameters into a dictionary
fieldTestParameters = {
    "Field Test Label": fieldTestLabel,
    "Field Test Location": fieldTestLocation,
    "Field Test Date": fieldTestDate,
    "Field Test Description": fieldTestDescription,
    "Field Test Team": fieldTestTeam,
    "Data Analyst": dataAnalyst,
    "Device Name": deviceName,
    "Device Nickname": deviceNickName,
}

#Echo field test parameters for user to confirm and change if necessary
print("PLEASE CONFIRM THE FOLLOWING FIELD TEST PARAMETERS")
print("-"*50)

for parameter in fieldTestParameters:
    print(f"{parameter}: {fieldTestParameters[parameter]}")
    change_history.append(f"{parameter}: {fieldTestParameters[parameter]}")
    
print("-"*50)

#Get the directory that the notebook is currently in
cwd = os.getcwd()

#Create trimmed data directory
try:
    trimmed_data_dir = Path(trimmedDataFolderName)
    trimmed_data_dir.mkdir()
    change_history.append(f"{trimmedDataFolderName} folder was created in {cwd}")
except FileExistsError:
    print(f"The folder, {trimmedDataFolderName}, already exists")
except Exception as e:
    print("Something went wrong. Restart the notebook")
else:
    print(f"{trimmedDataFolderName} has been created in {cwd}")

#Create preprocessed data directory
try:
    preprocessed_data_dir = Path(preprocessedDataFolderName)
    preprocessed_data_dir.mkdir()
    change_history.append(f"{preprocessedDataFolderName} folder was created in {cwd}")
except FileExistsError:
    print(f"The folder, {preprocessedDataFolderName}, already exists")
except Exception as e:
    print("Something went wrong. Restart the notebook")
else:
    print(f"{preprocessedDataFolderName} has been created in {cwd}")

#Create plots directory
try:
    plots_dir = Path(plotsFolderName)
    plots_dir.mkdir()
    change_history.append(f"{plotsFolderName} folder was created in {cwd}")
except FileExistsError:
    print(f"The folder, {plotsFolderName}, already exists")
except Exception as e:
    print("Something went wrong. Restart the notebook")
else:
    print(f"{plotsFolderName} has been created in {cwd}")



#Parameters to tell user to run certain cells before others
generate_plots = False
customize_plots = False
time_series_superimposed_step = False
customize_plots_message = "Please FIRST run the Customize Plots cell before generating plots"  

In [None]:
#@title Click the play button to upload a file {display-mode: "form"}
from google.colab import files

uploaded = files.upload()

filename = list(uploaded.keys())[0]


In [None]:
#@title Click the play button to open the file {display-mode: "form"}
import pandas as pd
import ipywidgets as widgets
from ipywidgets import Layout, Button, Box


plot_units = {
    "Meters per second": "Wind Speed (m/s)",
    "Feet per second": "Wind Speed (ft/s)",
    "Miles per hour": "Wind Speed (mph)",
    "Knots": "Wind Speed (kt)",
    "Beaufort": "Wind Speed (Bft)",
    "Meters": "AOG (Meters)",
    "Feet": "AOG (Feet)",
    "Celsius": "Temp (Celsius)",
    "Fahrenheit": "Temp (Fahrenheit)",
}

#Used to preselect the fields in the field selection widget
standard_fields = {
    "Time": "Time (yyyy-MM-dd hh:mm:ss)",
    "Altitude": "Altitude (Meters)",
    "Wind Speed": "Wind Speed (m/s)",
    "Mag. Dir.": "Mag. Dir. (Degrees)",        
    "Temp": "Temp (Celsius)",
    "Rel. Hum.": "Rel. Hum. (%)",
    #"Baro.": "Baro. (mb)",
}

glyphs = {
    "Altitude": "square",
    "Temp" : "triangle",
    "Wet Bulb Temp.": "hex",
    "Wind Speed": "diamond_cross",
    "Rel. Hum.": "circle",
    "Mag. Dir.": "cross",
    "Baro.": "star"
}

#Check if there's anything wrong with the file in the first place
try:
    print(f"Opening \"{filename}\"", end="")
    
    for i in range(2):
        print(".", end="")
    print(".")
    
    f = open(filename, "r")
    print("File opened successfully")

except Exception as e:
    print("Something went wrong")
    print(e)
    
else:
    #READ PROLOGUE   
    ####################################################################################

    prologue_length = 8 #replace this comment with the length of your prologue (optional for user to interact or get rid of?)

    ####################################################################################
    try:
        #Iterate over prologue
        print("\nReading prologue")
        #print("-"*50)
        pl = 0
        prologue = []

        while True:
            l = f.readline()
            prologue.append(l)
            #print(l)
            if l == "\n":
                break
            else:
                pl += 1
        #print("-"*50)        
        print("Prologue read successfully")

        if pl != prologue_length:
            print(f"Prologue was {pl} lines instead of {prologue_length}")
        else:
            print(f"Prologue is {prologue_length} lines")

    except Exception as e:
        print("Something went wrong")
        print(e)

    #READ HEADERS, UNITS, AND DATA
    ####################################################################################

    #Run this cell once

    try:
        #Obtaining headers and units
        print("\nObtaining headers")
        headers = f.readline().split(",")
        headers.pop(-1)
        print("Headers obtained successfully")
        print("\nObtaining units")
  
        units = f.readline().split(",")
        units.pop(-1)
        print("Units obtained successfully")
  

        #Store the data values into a pandas dataframe
        print("\nObtaining measurements")
          
        raw_df = pd.read_csv(f, names=headers)
        f.close()        
        
        """
        #Keep the standardized columns
        for i, n in zip(headers, units):
            if i in kestrel_units:
                if kestrel_units[i] == n:
                    new_label = i + " (" + n + ")"
                    df.rename(columns = {i: new_label}, inplace = True)
                else:
                    #TODO
                    print(f"Need to convert {n} to {kestrel_units[i]}")
            else:
                df.drop(columns=i, inplace=True)
        """
        columns = {}
        raw_units = {}
        for i, n in zip(headers, units):
            new_label = i + " (" + n + ")"
            raw_df.rename(columns = {i: new_label}, inplace = True)
            columns[i] = new_label
            raw_units[i] = n

        df_rows = raw_df.shape[0]
        print("Data obtained successfully")
  
        print(f"\n{df_rows} rows in file")

    except Exception as e:
        print("Something went wrong")
        print(e)
        
    #Variables to prevent cells from accidentally being run again, and potentially messing up the workflow
    timedeltas_read = False

    display(raw_df)
    
    field_select = widgets.SelectMultiple(
    options=headers[1:], #This indexing assumes that Time will always be in the first column
    value=[field for field in headers if (field in standard_fields and field != "Time")],
    rows=len(headers),
    #description='Select the fields to analyze',
    disabled=False
    )
    print(f"Ctrl+click (command+click for Mac) to select the fields to analyze")
    display(field_select)

In [None]:
#@title Click the play button to confirm field selection {display-mode: "form"}

#This method rearranges the standard columns in the standardized order (non-standard columns will stay in the same place)

#First appends the standard columns in the standardized order
fields = [field for field in standard_fields]
fields_not_selected = [column for column in field_select.options if column not in field_select.value]

#Appends the non-standard columns
for field in field_select.value:
    if field != "Time" and field not in fields:
        fields.append(field)

#Remove the standard columns if it wasn't selected
for field in fields:
    if field in fields_not_selected:
        fields.remove(field)

selected_units = {key: raw_units[key] for key in fields} 
        
fields = [columns[field] for field in fields]
df = raw_df[fields]

###############################################################################

#Automatic error checking######################################################

#LOOK FOR ANY CORRUPT FIELDS, NAN, ETC
#ERROR CHECKING ON UNITS 
indices = []
invalidcols = []
nullcols = []

asterisks_bool = False
nulls_bool = False

#Find columns with *** entries
asterisks = df.isin(["***"])
for col in asterisks.columns:
    if asterisks[col].values.any():
        invalidcols.append(col)

if len(invalidcols) != 0:
    asterisks_bool = True
    
#Find columns with NaN values
nulls = df.isnull()
for col in nulls.columns:
    if nulls[col].values.any():
        nullcols.append(col)

if len(nullcols) != 0:
    nulls_bool = True

#Find the specific rows in the entire dataframe
if asterisks_bool and nulls_bool:
    for i in range(len(df)):
        invalid = df.iloc[i]
        if invalid.hasnans: 
            indices.append(i)
        if ("***" in invalid.values): #reference https://stackoverflow.com/questions/30944577/check-if-string-is-in-a-pandas-dataframe
            indices.append(i)
elif asterisks_bool:
    for i in range(len(df)):
        invalid = df.iloc[i]
        if ("***" in invalid.values): #reference https://stackoverflow.com/questions/30944577/check-if-string-is-in-a-pandas-dataframe
            indices.append(i)
elif nulls_bool:
    for i in range(len(df)):
        invalid = df.iloc[i]
        if invalid.hasnans: 
            indices.append(i)

indices = set(indices) #Use a set to ignore duplicates

if len(indices) == 0:
    print("No errors have been detected")
    print("Proceed to TRIM DATA step")

if asterisks_bool and nulls_bool:
    print("*** entries have been detected in the following columns")
    print(*invalidcols, sep="\n")
    print("NaN entries have been detected in the following columns")
    print(*nullcols, sep = "\n")

elif asterisks_bool:
    print("*** entries have been detected in the following columns")
    print(*invalidcols, sep="\n")
    
elif nulls_bool:
    print("NaN entries have been detected in the following columns")
    print(*nullcols, sep = "\n")
    
#check nulls reference https://www.geeksforgeeks.org/check-for-nan-in-pandas-dataframe/

if asterisks_bool or nulls_bool:

  #Replace the *** entries with a value
  error_value = 500 #Arbitrary number chosen after HTTP 500 Error code

  ##################################################################################

  asterisks = df != "***"
  df = df.where(asterisks, error_value)
  for col in invalidcols:
      change_history.append(f"\nAll *** values in the {col} column were replaced with a {error_value}\n")
      print(change_history[-1])


  #Convert header data types to expected data types
  df[df.columns[0]] = pd.to_datetime(df[df.columns[0]])
  change_history.append(f"The {df.columns[0]} column was converted to type datetime\n")
  print(f"The {df.columns[0]} column was converted to type datetime\n")

  expected_dtypes = {
      "Crosswind (m/s)": "float",
      "Headwind (m/s)": "float",
      "Mag. Dir. (Degrees)": "int",
      "True Dir. (Degrees)": "int"
  }

  for col in expected_dtypes:
      if col in df:
          df[col] = df[col].astype(expected_dtypes[col])
          change_history.append(f"The {col} column was converted to type {expected_dtypes[col]}\n")
          print(change_history[-1])

  ch_bound_1 = len(change_history)

  print("Data types have been successfully changed")
  print("\n")
  print("Proceed to TRIM DATA step")

df

***END OF IMPORT DATA STAGE***
***

# TRIM DATA

## Use interactive plots to help identify where to trim

1. Execute the cell below
2. The generated charts are intended to help identify the indices corresponding to your field test/data of interest
    - Example: a field test on October 11, 2022 from 4PM to 5PM correspond to indices 529-1370
3. Use the first index as `start_index` and the last index as `end_index`
    - Example: 
    `start_index` = 529, `end_index` = 1370

In [None]:
#@title Click the play button to use interactive plots to identify field test {display-mode: "form"}
#Calculating Time Deltas###########################################################
if not timedeltas_read:
    #Date column as string variable
    date = columns["Time"]
    
    try:
        #Also here if the error checking steps were skipped
        if df[date].dtype != "<M8[ns]":
            df.loc[:,date] = pd.to_datetime(df.loc[:,date])         
            change_history.append(f"The \"{date}\" column was converted to type datetime\n")
            print(f"The \"{date}\" column was converted to type datetime\n")

        #List to store deltas
        deltas = []

        #Total number of data entry rows
        rows = len(df)

        #Calculate all time deltas and store in list
        i = 0
        while i != (rows-1):
            time1 = df.loc[i,date]
            time2 = df.loc[i+1,date]
            delta = time2 - time1
            deltas.append(delta)
            i+=1
        td_min = min(deltas)
        td_max = max(deltas)
        deltas.append("LAST TIME ENTRY") #Helper text

        #Convert list to series
        deltas = pd.Series(deltas, name="Time Delta")

        #Store the sampling interval of 2s, or whatever sampling interval was chosen (which should be the most common)        
        mode = deltas.mode()

        #Create separate series of time deltas in seconds
        td_seconds = []
        for td in deltas:
            if isinstance(td, str): #Necessary for entries with helper text
                td_seconds.append(td)
            else:
                td_seconds.append(td.total_seconds())
        td_seconds = pd.Series(td_seconds, name="Time Delta (seconds)")

        #Initialize dataframe with datetime columns
        times = pd.DataFrame(df[date]).rename(columns={date:"Datetime"})

        #Create datetime + 1 and datetime - 1 series to be added into times df
        dtplusone = pd.Series(index=range(rows), name="Datetime_i+1", dtype="object")
        dtminusone = pd.Series(index=range(rows), name="Datetime_i-1", dtype="object")

        #Append necessary values
        dtplusone[0:-1] = df.loc[1:, date]
        dtplusone[rows-1] = "LAST TIMESTAMP" #Helper text---is it necessary?

        dtminusone[1:] = df.loc[:rows-2, date]
        dtminusone[0] = "FIRST TIMESTAMP" #Helper text---is it necessary?

        times = times.join([deltas, td_seconds, dtminusone, dtplusone])

        #Reorder columns to desired order
        times = times[["Datetime", "Datetime_i+1", "Time Delta", "Time Delta (seconds)", "Datetime_i-1"]]

        #Reordering columns
        df.insert(loc = 1,
          column = 'Time Delta',
          value = deltas,
          allow_duplicates=True)
        
        df.insert(loc = 2,
          column = 'Time Delta (seconds)',
          value = td_seconds,
          allow_duplicates=True)
        
        print("Time Deltas have been calculated")
        print(f"Most common sampling time in datafile is {mode[0].seconds} seconds")
        display(df[df.columns[1:3]])
  
        timedeltas_read = True
    except Exception as e:
        print(e)

    ch_bound_1 = len(change_history)
else:
    print("Time Deltas have already been calculated")
    print(f"Most common sampling time in datafile is {mode[0].seconds} seconds")
    display(df[df.columns[1:3]])
    ch_bound_1 = len(change_history)

################################################################################



#Calculating time delta outliers################################################
outliers = []
outliers_index = []

#Find the time deltas != chosen sampling interval
for count, i in enumerate(deltas):
    if not (i==mode).any():
        outliers.append(i)
        outliers_index.append(count)
outliers = pd.Series(outliers, name="Time Deltas != sampling interval")

#Time deltas in seconds
outliers_seconds = []
for td in outliers:
    if isinstance(td, str): #Necessary for entries with helper text
        outliers_seconds.append(td)
    else:
        outliers_seconds.append(td.total_seconds())
outliers_seconds = pd.Series(outliers_seconds, name="Time Delta (seconds)")

print("Time delta outliers successfully calculated")

#Initialize time delta != sampling interval comparison chart
td = "Time Delta"
tds = "Time Delta (Seconds)"
dt = "Datetime"
dtmin = "Datetime_i-1"
dtplus = "Datetime_i+1"
columnnames = [dt, dtplus, td, tds, dtmin] #REARRANGE COLUMNS HERE TO DESIRED LAYOUT

td = columnnames.index(td)
tds = columnnames.index(tds)
dt = columnnames.index(dt)
dtmin = columnnames.index(dtmin)
dtplus = columnnames.index(dtplus)

outliers_df = pd.DataFrame(index=outliers_index, columns=columnnames)

#Append Time Deltas
for index, value in enumerate(outliers):
    outliers_df.iloc[index, td] = value

for index, value in enumerate(outliers_seconds):
    outliers_df.iloc[index, tds] = value

#Append Datetimes
for row, index in enumerate(outliers_index):
    if index == 0:
        outliers_df.iloc[row, dt] = df.loc[index, date] #Datetime
        outliers_df.iloc[row, dtmin] = "FIRST ENTRY"#Datetime_i-1
        outliers_df.iloc[row, dtplus] = df.loc[index+1, date]#Datetime_i+1
    elif index == rows-1:
        outliers_df.iloc[row, dt] = df.loc[index, date] #Datetime
        outliers_df.iloc[row, dtmin] = df.loc[index-1, date]#Datetime_i-1
        outliers_df.iloc[row, dtplus] = "NO ENTRY"#Datetime_i+1
    else:
        outliers_df.iloc[row, dt] = df.loc[index, date] #Datetime
        outliers_df.iloc[row, dtmin] = df.loc[index-1, date]#Datetime_i-1
        outliers_df.iloc[row, dtplus] = df.loc[index+1, date]#Datetime_i+1
print("Time delta outliers chart successfully created")
print(f"\nMost common sampling time in datafile is {mode[0].seconds} seconds")
display(outliers_df)

#################################################################################

#INTERACTIVE PLOTS FOR TRIMMING###################################################
#Just time series for faster performance

from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool, BoxSelectTool, DataTable, TableColumn, CDSView, IndexFilter, DateFormatter, DatetimeTickFormatter, NumeralTickFormatter, CustomJS, Panel, Tabs, LinearAxis, Range1d, Paragraph, DatePicker, Div, BoxAnnotation
from bokeh.layouts import gridplot, column, row
from bokeh.io import output_notebook
output_notebook()

#Data
#Column Names
time = standard_fields["Time"]
tdseconds = "Time Delta (seconds)"
temp = standard_fields["Temp"]
alt = standard_fields["Altitude"]
windspeed = standard_fields["Wind Speed"]


source = ColumnDataSource(data=dict(
    index=df.index, 
    datetime=df[time], 
    timedelta=df[tdseconds], 
    temp=df[temp],
    alt=df[alt],
    windspeed=df[windspeed],
    )
)


tempvstime = ColumnDataSource(data=dict(x=[], y=[]))
altvstime = ColumnDataSource(data=dict(x=[], y=[]))
windspeedvstime = ColumnDataSource(data=dict(x=[], y=[]))

trimmedvalues = ColumnDataSource(data=dict(
    index = [],
    time = [],
    temp = [],
    alt = [],
    windspeed = [],
))

sources = [tempvstime, altvstime, windspeedvstime]

#Formatting
datefmt = DateFormatter(format="%F %I:%M:%S %p") #Format API reference: https://docs.bokeh.org/en/latest/docs/reference/models/widgets/tables.html?highlight=datatable#bokeh.models.DataTable
width = 1000
height = 300
hovercolor = "black"
barocolor = "orange"

datetimevsindexhover = HoverTool( #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool
    tooltips=[
        ("Index", "$index"),
        ("Date", "@datetime{%F %I:%M:%S %p}"),
        ("Time Delta", "@{timedelta} seconds")
    ],

    formatters={
        "@datetime" : "datetime",
        #"@y1" : "numeral"
    },
    #mode = "vline"
)

options = dict(x_axis_label = "Row Index", tools=[datetimevsindexhover, "pan, wheel_zoom, xwheel_pan, ywheel_pan, box_select, box_zoom, reset"], plot_width=700, plot_height=300)
links = dict(width=width, height=height, x_axis_type="datetime")
#view = CDSView(source=source, filters=[IndexFilter(x)])
sz = 5

#INITIALIZING PLOTS************************************************************************************************

#Datetime vs. Index
f1 = figure(title = "Datetime vs. Index", y_axis_label = "Date", y_axis_type="datetime", **options)
f1.line("index", "datetime", hover_color="red", source=source)
f1.circle("index", "datetime", size=sz, hover_color="red", source=source, selection_color = "firebrick",) #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/styling/plots.html#selected-and-unselected-glyphs


#Time Delta vs. Index
f2 = figure(title = "Time Delta vs. Index", y_axis_label="Time Delta (s)", y_axis_type="log", x_range=f1.x_range, **options)
#f2.yaxis.formatter = DatetimeTickFormatter(seconds=["%S"])
f2.line("index", "timedelta", hover_color="red", source=source)
f2.circle("index", "timedelta", size=sz, hover_color="red", source=source, selection_color = "firebrick")

timecolumns = [
    TableColumn(field="datetime", title="Datetime", formatter= datefmt), #Reference: https://stackoverflow.com/questions/40942168/how-to-create-a-bokeh-datatable-datetime-formatter
    TableColumn(field="timedelta", title="Change in time (seconds)")
]

dt1 = DataTable(background = "red", source=source, columns=timecolumns)

disclaimer_msg = Paragraph(text="""*Time Delta values equal to zero will not be plotted*""")

#TIME SERIES PLOTS*************************************************************************************************
timegraphs = {
    "Altitude vs. Time": "#0eade1", 
    "Wind Speed vs. Time": "#500eec", 
}

hover_timeseries = HoverTool(
    tooltips=[
        ("Index", "@index"),
        ("Time", "@time{%F %I:%M:%S %p}"),       
        ("Altitude", "@alt"),
        ("Windspeed", "@windspeed"),
    ],

    formatters={
        "@time" : "datetime",
    },
    #mode = "vline"
)

#TIME SERIES PLOTS SEPARATE******************************************************************************
timeylabels = [alt, windspeed]
timeysourceskeys = ["alt", "windspeed"]
time_series_options = dict(tools=[hover_timeseries, "pan, wheel_zoom, box_select, tap, reset"], plot_width=700, plot_height=300)
p1, p2 = figure(), figure()
timefigures = [p1, p2]

for f, g, l, key in zip(timefigures, timegraphs, timeylabels, timeysourceskeys):
    i = timefigures.index(f)
    f = figure(title=g, x_range=timefigures[0].x_range, x_axis_label = "Time", y_axis_label=l, x_axis_type = "datetime", **time_series_options)
    f.title.text_color = timegraphs[g]
    f.yaxis.axis_label_text_color = timegraphs[g]
    f.yaxis.major_label_text_color = timegraphs[g]
    f.yaxis.axis_line_color = timegraphs[g]
    f.xaxis.formatter=DatetimeTickFormatter(
        hours="%I:%M:%S %p",
        minutes="%I:%M:%S %p")
    #f.background_fill_color = (204, 255, 255)
    timefigures[i] = f
    f.line("time", key, color=timegraphs[g], hover_color=hovercolor, source=trimmedvalues)
    f.circle("time", key, color=timegraphs[g], hover_color=hovercolor, source=trimmedvalues)
    
tab1 = Panel(child=column(timefigures[0:2]), title="Time Series Plots")


#RANGE INDICATOR*********************************************************************
ranges = Paragraph(text="""SELECTED INDICES: """)

#FIELD TEST INFO FOR PLOTS****************************************************************
fieldtestinfo = Div(text=
f"""
<p>FIELD TEST: <b>{fieldTestParameters["Field Test Label"]}</b></p>
<p>LOCATION: <b>{fieldTestParameters["Field Test Location"]}</b></p>
<p>DATE: <b>{fieldTestParameters["Field Test Date"]}</b></p>
<p>DEVICE: <b>{fieldTestParameters["Device Nickname"]}</b></p>
"""
)

#TODO DATE PICKER*************************************************************************
#Filters to first entry with selected date
"""
start_date = df[date].min().date()
end_date = df[date].max().date()

date_picker = DatePicker(title="Select Date of Field Test", value=start_date, min_date=start_date, max_date=end_date)"""

#PLOT GENERATION**************************************************************************************************************************************************************
#Reference for array performance https://github.com/bokeh/bokeh/blob/main/examples/interaction/js_callbacks/js_on_change.py
source.selected.js_on_change("indices", CustomJS(args=dict(
    origin=source, 
    trimmedvalues=trimmedvalues,
    ranges=ranges
), 
code="""
    const inds = cb_obj.indices; //Gets unsorted if you do a shift click selection in datatable

    const d1 = origin.data;
    const d2 = trimmedvalues.data;

    const cols = ["temp", "alt", "windspeed"];

    inds.sort(function(a, b){return a - b});

    //To clear for every box select
    d2["time"] = [];
    d2["index"] = [];

    for (let x in cols)
    {
        d2[cols[x]] = []
    }

    //Generate the plots
    for (let i = 0; i < inds.length; i++) 
    {
        d2["time"].push(d1["datetime"][inds[i]]);
        
        d2["index"].push(inds[i]);

        for (let x in cols)
        {
            const label = cols[x]
            d2[label].push(d1[label][inds[i]]);
        } 
    }

    //Display the range selection
    ranges.text = "SELECTED INDICES: " + inds[0] + " - " + inds[inds.length-1]

    //Refresh
    trimmedvalues.change.emit()

"""
    )
)

#ORGANIZING PLOTS INTO TABS********************************************
#tab1 = Panel(child=f3, title="Temp")
#Displaying the data
layout1 = row(column(children=[f1, f2, disclaimer_msg]), column(children=[dt1, ranges]))
layout2 = column(row(Tabs(tabs=[tab1]), fieldtestinfo), ranges)

show(column(layout1, layout2))

## Manually input the index range of interest (also indicated by SELECTED INDICES)

In [None]:
#@title Click play after inputting the index range {display-mode: "form"}

start_index = 0 #@param {type: "integer"}
end_index = 1767 #@param {type: "integer"}

#############################################################################

df_indices = df.index

if start_index not in df_indices:
    print(f"Specified start index, {start_index}, is not in the index range of {df_indices.start} and {df_indices.stop}")
    
elif end_index not in df_indices:
    print(f"Specified end index, {end_index}, is not in the index range of {df_indices.start} and {df_indices.stop}")

elif end_index < start_index:
    print(f"Specified END index, {end_index}, is less than the specified START index, {start_index}")

else:
    trim_date_start = df[date][start_index].strftime("%A, %B %d, %Y, %I:%M:%S %p")
    trim_date_end = df[date][end_index].strftime("%A, %B %d, %Y, %I:%M:%S %p")
    print(f"TRIMMING FROM INDEX {start_index} to INDEX {end_index}")
    print("-"*50, trim_date_start, "to", trim_date_end,"-"*50, sep="\n")
    df_trim = df.loc[start_index:end_index]
#    print(f"Review the data and proceed to STEP 8")
    display(df_trim)
    
    #TODO PUT INTO A FUNCTION
    trimmedvalues2 = ColumnDataSource(data=dict(
        index = df_trim.index,
        time = df_trim[date],
        alt = df_trim[alt],
        windspeed = df_trim[windspeed],
    ))

    timeylabels = [alt, windspeed]
    timeysourceskeys = ["alt", "windspeed"]
    time_series_options = dict(tools=[hover_timeseries, "pan, wheel_zoom, box_select, tap, reset"], plot_width=700, plot_height=300)
    p1, p2 = figure(), figure()
    timefigures = [p1, p2]

    for f, g, l, key in zip(timefigures, timegraphs, timeylabels, timeysourceskeys):
        i = timefigures.index(f)
        f = figure(title=g, x_range=timefigures[0].x_range, x_axis_label = "Time", y_axis_label=l, x_axis_type = "datetime", **time_series_options)
        f.title.text_color = timegraphs[g]
        f.yaxis.axis_label_text_color = timegraphs[g]
        f.yaxis.major_label_text_color = timegraphs[g]
        f.yaxis.axis_line_color = timegraphs[g]
        f.xaxis.formatter=DatetimeTickFormatter(
            hours="%I:%M:%S %p",
            minutes="%I:%M:%S %p")
        #f.background_fill_color = (204, 255, 255)
        timefigures[i] = f
        f.line("time", key, color=timegraphs[g], hover_color=hovercolor, source=trimmedvalues2)
        f.circle("time", key, color=timegraphs[g], hover_color=hovercolor, source=trimmedvalues2)

    show(column(timefigures[0:2]))

In [None]:
#@title Save the trimmed data as a .csv and .xlsx {display-mode: "form"}

# Trimmed data corresponds to just the field test

#Name your trimmed file
trimmed_file_name = f"{fieldTestLabel} - {fieldTestDate} - TRIM"

###############################################################################

#Clear/refresh the change history in case this cell is rerun to prevent redundant information being appended
change_history = change_history[:ch_bound_1]
change_history.append(f"Data was trimmed from {trim_date_start} to {trim_date_end}")
#change_history.append(f"Data was trimmed from indices {start_index + 2} and {end_index + 2}") #Plus two for excel indexing
 
#Get rid of the Time Delta column
trimmed_file = df_trim.drop(columns="Time Delta").reset_index(drop=True)

#Rename the Time Delta (seconds) column to Sampling Interval
trimmed_file.rename(columns={"Time Delta (seconds)": "Sampling Interval (seconds)"}, inplace = True)
change_history.append("Time Delta (seconds) was renamed to Sampling Interval (seconds)")

#Create elapsed time column (Reference: https://chris35wills.github.io/time_elapsed_pandas/)
time_position = trimmed_file.columns.get_loc(time)
elapsed_time = trimmed_file.iloc[:,time_position] - trimmed_file.iloc[0,time_position]
trimmed_file.insert(1, "Elapsed Time (seconds)", elapsed_time.dt.total_seconds(), allow_duplicates=True)                 
change_history.append("Elapsed Time (seconds) column was added")

prologuepd = pd.Series(prologue)
changehistory = pd.Series(change_history)

df_trim_path_csv = cwd + "/" + trimmedDataFolderName + "/" + trimmed_file_name + ".csv"
df_trim_path_excel = cwd + "/" + trimmedDataFolderName + "/" + trimmed_file_name + ".xlsx"

try:
    print(f"Saving file as {trimmed_file_name}.csv")
    trimmed_file.to_csv(df_trim_path_csv, index=False)
    print(f"{trimmed_file_name}.csv was saved to {df_trim_path_csv}")
except Exception as e:
    print(e)
    print(f"Is {trimmed_file_name}.csv currently open on your computer?")

try:
    print(f"\nSaving file as {trimmed_file_name}.xlsx")
    with pd.ExcelWriter(df_trim_path_excel) as writer:
        prologuepd.to_excel(writer, sheet_name="Kestrel Info", index = False, header = False)
        changehistory.to_excel(writer, sheet_name="Data Analysis Record", index = False, header = False)
        trimmed_file.to_excel(writer, sheet_name="Field Test Data", index = False)
    print(f"{trimmed_file_name}.xlsx was saved to {df_trim_path_excel}")
except Exception as e:
    print(e)
    print(f"Is {trimmed_file_name}.xlsx currently open on your computer?")
    
ch_bound_2 = len(change_history)    

# DEFINING REFERENCE ALTITUDE

In [None]:
#@title Click play to use an interactive plot to identify baseline ranges {display-mode: "form"}

#Run this cell once

###############################################################################
baseline = ColumnDataSource(data=dict(
    index = trimmed_file.index, 
    time = trimmed_file[time], 
    alt = trimmed_file[alt]
    )
)

altvstimehover = HoverTool( #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool
    tooltips=[
        ("Index", "$index"),
        ("Date", "@time{%F %I:%M:%S %p}"),
        ("Value", "$y"),
    ],

    formatters={
        "@time" : "datetime",
        #"@y1" : "numeral"
    },
    #mode = "vline"
)

options = dict(x_axis_label = "Time", tools=[altvstimehover, "pan, wheel_zoom, box_select, tap, reset"], plot_width=700, plot_height=400)

f = figure(title = "Altitude vs. Time", y_axis_label = alt, **options)
f.xaxis.formatter = DatetimeTickFormatter(
    seconds=["%I:%M:%S %p"],
    minutes=["%I:%M:%S %p"],
    hours=["%I:%M:%S %p"]
)
f.line("time", "alt", source=baseline, color = "blue")
f.circle("time", "alt", source=baseline, color= "blue", size = 4, selection_color="firebrick")

ranges = Paragraph(text="""SELECTED INDICES: """)

####################################################################
#TODO
baseline.selected.js_on_change("indices", CustomJS(args=dict(ranges=ranges),
code="""
    
    const inds = cb_obj.indices; //Gets unsorted if you do a shift click selection in datatable
    console.log("INDS: " + inds)
    //If condition necessary to optimize performance (so the code doesn't run for any accidental selections)
        
    //Display the range selection
    ranges.text = "SELECTED INDICES: " + inds[0] + " - " + inds[inds.length-1]

"""
    )
)                             
####################################################################
show(column(f, ranges))


## Baseline the data
- Input the index ranges corresponding to the altitude values to be used to baseline

***Example 1***

`baseline_ranges = 0, 40, 489, 520`
- The altitude values from indices 0-40 and 489-520 will be used to baseline the altitude

***Example 2***

`baseline_ranges = 100, 250`
- The altitude values from indices 100-250 will be used to baseline the altitude

***Afterwards,*** select the desired baseline method
- Options are "LINEAR", "AVERAGE", or "CONSTANT"

In [None]:
#@title Click play to baseline data after specifying information {display-mode: "form"}

import numpy as np
from sklearn import datasets, linear_model

#####################################################################

baseline_data_step = False

#Input the index ranges here
baseline_ranges = "0, 882" #@param {type: "string"}


#Method to get the format into [[1,2], [3,4], [5, 6], ...]
baseline_ranges_cleaned = baseline_ranges.strip(",").replace(",", "").split()

#Check to see if the user input an even number of ranges
if len(baseline_ranges_cleaned) % 2 == 0:
  for i in range(len(baseline_ranges_cleaned)):
    baseline_ranges_cleaned[i] = int(baseline_ranges_cleaned[i])
  baseline_ranges = []

  counter = 0
  for i in range(len(baseline_ranges_cleaned)//2):
    baseline_ranges.append([baseline_ranges_cleaned[counter], baseline_ranges_cleaned[counter+1]])
    counter+=2

  #Uncomment the baseline method to be used
  baseline_method = "CONSTANT" #@param ["LINEAR", "AVERAGE", "CONSTANT"]

  baseline_val_constant = -123 #@param {type:"integer"}

  ######################################################################
  change_history = change_history[:ch_bound_2] #Refresh the change_history

  def baseline(indices, data = trimmed_file, values = trimmed_file[alt], method = "LINEAR", baseline_val = 0.):
      validmethods = ["LINEAR", "CONSTANT", "AVERAGE"]
      method = method.upper()
      if method in validmethods:

          time_series = data["Elapsed Time (seconds)"].astype("int") #Elapsed Time is of type float and therefore can't be combined with a boolean operation
          time_filter = time_series & False
          values_series = values

          for ranges in indices:
              start = ranges[0]
              end = ranges[1]
              baseline_starttime = data.loc[ranges[0], time].strftime("%I:%M:%S %p")
              baseline_endtime = data.loc[ranges[1], time].strftime("%I:%M:%S %p") 
              print(f"Baselining from {baseline_starttime} at index {start} to {baseline_endtime} at index {end} using baselining method: {method}")
              change_history.append(f"Data baselined from {baseline_starttime} to {baseline_endtime}")
              time_filter = time_filter | (time_series[start:end] | True)
              
          time_baseline = time_series[time_filter].values
          
          values_baseline = values_series[time_filter].values

          time_baseline = time_baseline.reshape(len(time_baseline), 1)
          values_baseline = values_baseline.reshape(len(values_baseline), 1)

          if method == "AVERAGE":
              print(f"\nPerforming {method} baseline procedure")
              baseline_avg = np.average(values_baseline)
              print(f"Baseline average: {baseline_avg}")
              baseline_array = np.full((len(time_series), 1), baseline_avg)
              change_history.append(f"Baseline procedure used: {method}. Baseline average: {baseline_avg}")

          elif method == "CONSTANT":
              print(f"\nPerforming {method} baseline procedure")
              print(f"Baseline constant used: {baseline_val_constant}")
              baseline_array = np.full((len(time_series), 1), baseline_val_constant)
              change_history.append(f"Baseline procedure used: {method}. Baseline constant used: {baseline_val_constant}")
          
          elif method == "LINEAR":
              change_history.append(f"Baseline procedure used: {method}")
              print(f"\nPerforming {method} baseline procedure")
              regr = linear_model.LinearRegression()
              regr.fit(time_baseline, values_baseline)
              time_array = time_series.values
              time_array = time_array.reshape(len(time_array),1)
              baseline_array = regr.predict(time_array)
              
              #Slope
              print("Slope =", regr.coef_)
              
              #Intercept
              print("Intercept =", regr.intercept_)
              
              #R^2
              r2 = regr.score(time_baseline, values_baseline)
              print("R^2 =", r2)  

          print(f"\n{method} baseline procedure completed successfully")
          baseline_est = pd.Series(baseline_array[:,0], name = "Altitude Baseline (Meters)")


          #Obtain the baselined values
          values_above_baseline = values_series - baseline_est
          values_above_baseline.rename("AOG (Meters)", inplace=True)


          return(baseline_est, values_above_baseline)
      else:
          print(f"'{method}' is an invalid method for baseline estimation")
          print("Valid methods are: LINEAR, AVERAGE, CONSTANT")

  #Check for valid index range (reference: https://datascienceparichay.com/article/python-flatten-a-list-of-lists-to-a-single-list/)
  validranges = [index for sublist in baseline_ranges for index in sublist]
  validrange = True
  for i in validranges:
      if i not in trimmed_file.index:
          validrange = False
          
  if validrange:
      baseline_series, AOG_series = baseline(baseline_ranges, data = trimmed_file, values = trimmed_file[alt], method = baseline_method, baseline_val = baseline_val_constant)
      
      trimmed_file_baselined = trimmed_file.copy(deep=True)

      #Reorder columns
      
      #Index location of the original altitude column
      loc_for_aog = trimmed_file_baselined.columns.get_loc(alt)
      
      #Insert the Baselined altitude values
      trimmed_file_baselined.insert(loc = loc_for_aog + 1,
        column = baseline_series.name,
        value = baseline_series,
        allow_duplicates=True)    
      
      #Insert the AOG values
      trimmed_file_baselined.insert(loc = loc_for_aog + 2,
        column = AOG_series.name,
        value = AOG_series,
        allow_duplicates=True)    
      
  #def review_baseline():##################################################################################################
      dot_size = 0.5    
      
      altvstimehover = HoverTool( #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool
          tooltips=[
              ("Index", "$index"),
              ("Date", "@time{%F %I:%M:%S %p}"),
              ("Altitude before baseline", "@alt"),
              ("Altitude after baseline", "@aog"),
          ],

          formatters={
              "@time" : "datetime",
              #"@y1" : "numeral"
          },
          mode = "vline"
      )

      source = ColumnDataSource(data=dict(
          index = trimmed_file_baselined.index, 
          time = trimmed_file_baselined[time],
          alt = trimmed_file_baselined[alt],
          ab = trimmed_file_baselined[baseline_series.name],
          aog = trimmed_file_baselined[AOG_series.name]
          )
      )

      options = dict(x_axis_label = "Time", tools=[altvstimehover, "pan, wheel_zoom, box_zoom, reset"], plot_width=600, plot_height=400)
      ##############################################################################
      #Altitude Baseline Plot
      ab = figure(title="Altitude Baseline (shaded areas indicate values used as baseline)", y_axis_label = alt, **options)
      ab.xaxis.formatter = DatetimeTickFormatter(
          seconds=["%I:%M:%S %p"],
          minutes=["%I:%M:%S %p"],
          hours=["%I:%M:%S %p"]
      )
      ab.line("time", "alt", source=source, color="orange", legend_label = "Barometric Altitude")
      ab.circle("time", "alt", source=source, size = dot_size, color="orange")
      
      ab.line("time", "ab", source=source, color="green", line_width=2, legend_label = "Baseline Altitude")

      #Highlight the selected ranges
      for period in baseline_ranges:
          leftbound = trimmed_file_baselined.loc[period[0], time]
          rightbound = trimmed_file_baselined.loc[period[1], time]
          baseline_box = BoxAnnotation(left=leftbound, right=rightbound, fill_alpha=0.2, fill_color="green")
          ab.add_layout(baseline_box)

      ##############################################################################
      #Altitude Above Ground Plot
      aog = figure(title = "Altitude Above Ground", x_range = ab.x_range, y_axis_label = alt, **options)
      aog.xaxis.formatter = DatetimeTickFormatter(
          seconds=["%I:%M:%S %p"],
          minutes=["%I:%M:%S %p"],
          hours=["%I:%M:%S %p"]
      )
      aog.line("time", "aog", source=source)
      aog.circle("time", "aog", source=source, size = dot_size)

      show(column(ab, aog))

      baseline_data_step = True

      display(trimmed_file_baselined)
  ###########################################################################################################################
  else:
      print(f"Specified baseline ranges,{validranges}, do not fall within {trimmed_file.index.start} and {trimmed_file.index.stop}")

elif len(baseline_ranges_cleaned) == 0:
  print("Please specify two numbers for baseline_ranges")

else:
  print("Please input an EVEN number of baseline ranges")

In [None]:
#@title Save the preprocessed data to a .csv and .xlsx {display-mode: "form"}

# Preprocessed data corresponds to just the field test with baselined altitude values

if not baseline_data_step:
  trimmed_file_baselined = trimmed_file.copy(deep=True)

#Name your preprocessed data file
preprocessed_data_name = f"{fieldTestLabel} - {fieldTestDate} - {deviceNickName} - PREPROCESSED"

############################################################################### 

changehistory = pd.Series(change_history)

preprocessed_data_path_csv = cwd + "/" + preprocessedDataFolderName + "/" + preprocessed_data_name + ".csv"
preprocessed_data_path_excel = cwd + "/" + preprocessedDataFolderName + "/" + preprocessed_data_name + ".xlsx"

try:
    print(f"Saving file as {preprocessed_data_name}.csv")
    trimmed_file_baselined.to_csv(preprocessed_data_path_csv, index=False)
    print(f"{preprocessed_data_name}.csv was saved to {preprocessed_data_path_csv}")
except Exception as e:
    print(e)
    print(f"Is {preprocessed_data_name}.csv currently open on your computer?")

try:
    print(f"\nSaving file as {preprocessed_data_name}.xlsx")
    with pd.ExcelWriter(preprocessed_data_path_excel) as writer:
        prologuepd.to_excel(writer, sheet_name="Kestrel Info", index = False, header = False)
        changehistory.to_excel(writer, sheet_name="Data Analysis Record", index = False, header = False)
        trimmed_file_baselined.to_excel(writer, sheet_name="Field Test Data with Baseline", index = False)
    print(f"{preprocessed_data_name}.xlsx was saved to {preprocessed_data_path_excel}")
except Exception as e:
    print(e)
    print(f"Is {preprocessed_data_name}.xlsx currently open on your computer?")

generate_plots = True

***END OF PREPROCESSING STAGE***
***

# GENERATE STANDARDIZED SET OF PLOTS

In [None]:
#@title Customize plots {display-mode: "form"}

if not generate_plots:
  print("Please first save the preprocessed data before generating plots")

else:

  def initialize_plots():

    #Choosing the unit from the 
    for field, unit in zip(supported_units, unit_select):
        supported_units[field] = plot_units[unit.value]
        
    for field in supported_units:
        columns[field] = supported_units[field]
        
    #Data
    source = {"index": df_pp.index,}

    for key in selected_units:
        source[key] = df_pp[columns[key]].copy(deep=True)
        
    source = ColumnDataSource(data=source)

    tooltips = [("Index", "$index"), ("Time", "@Time{%F %I:%M:%S %p}")]
    
    for field in source.data.keys():
        if field != "Time" and field != "index":
          field_adjusted = "{" + field + "}"
          tooltips.append((field, f"@{field_adjusted}"))

    hover = HoverTool(
        tooltips=tooltips,

        formatters={
            "@Time" : "datetime",
        },
        #mode = "vline"
    )

    return source, hover

  def kt_to_bft(knots):
      
  #Using this chart as a reference https://www.weather.gov/mfl/beaufort

      Bft = []
      
      for knots in knots:
          if (0 <= knots < 1):
              Bft.append(0)
          elif (1 <= knots < 3):
              Bft.append(1)
          elif (3 <= knots < 6):
              Bft.append(2)
          elif (6 <= knots < 10):
              Bft.append(3)
          elif (10 <= knots < 16):
              Bft.append(4)
          elif (16 <= knots < 21):
              Bft.append(5)
          elif (21 <= knots < 27):
              Bft.append(6)
          elif (27 <= knots < 33):
              Bft.append(7)
          elif (33 <= knots < 40):
              Bft.append(8)
          elif (40 <= knots < 47):
              Bft.append(9)
          elif (47 <= knots < 55):
              Bft.append(10)
          elif (55 <= knots < 63):
              Bft.append(11)
          elif (63 <= knots < 71): #Could also do 64 <= knots...; I wonder how high the Kestrel can measure the WS in knots
              Bft.append(12)
              
      return Bft

  #df_pp naming convention comes from dataframe_preprocessed
  df_pp = trimmed_file_baselined.copy(deep=True)

  #Temperature
  if selected_units["Temp"] == "Celsius":
      temp_f = ((9/5) * df_pp[temp]) + 32
      df_pp.insert(loc = df_pp.columns.get_loc(temp) + 1,
        column = "Temp (Fahrenheit)",
        value = temp_f,
        allow_duplicates=True)
  elif selected_units["Temp"] == "Fahrenheit":
      temp_c = (df_pp[temp] - 32) * (5/9)
      df_pp.insert(loc = df_pp.columns.get_loc(temp) + 1,
        column = "Temp (Celsius)",
        value = temp_c,
        allow_duplicates=True)
      
      
  #AOG
  if selected_units["Altitude"] == "Meters": #Can refer to it using the raw altitude name since AOG will be in the same units
      
      #Meters to Feet
      ft = df_pp[AOG_series.name] * 3.2808
      df_pp.insert(loc = df_pp.columns.get_loc(AOG_series.name) + 1,
        column = "AOG (Feet)",
        value = ft,
        allow_duplicates=True)    
      
  elif selected_units["Altitude"] == "Feet":
      
      #Feet to Meters
      meters = df_pp[AOG_series.name] / 3.2808    
      df_pp.insert(loc = df_pp.columns.get_loc(AOG_series.name) + 1,
        column = "AOG (Meters)",
        value = meters,
        allow_duplicates=True)
      
  #Wind Speed
  loc_for_ws = df_pp.columns.get_loc(windspeed)

  if selected_units["Wind Speed"] == "m/s":
      
      # m/s to ft/s
      ft_per_s = df_pp[windspeed] * 3.2808
      df_pp.insert(loc = loc_for_ws + 1,
        column = "Wind Speed (ft/s)",
        value = ft_per_s,
        allow_duplicates=True)
      
      # m/s to mph
      mph = df_pp[windspeed] * 2.2369
      df_pp.insert(loc = loc_for_ws + 2,
        column = "Wind Speed (mph)",
        value = mph,
        allow_duplicates=True)
      
      # m/s to kt
      kt = df_pp[windspeed] * 1.9438
      df_pp.insert(loc = loc_for_ws + 3,
        column = "Wind Speed (kt)",
        value = kt,
        allow_duplicates=True)    
      
      # kt to Bft
      Bft = kt_to_bft(kt)
      df_pp.insert(loc = loc_for_ws + 4,
        column = "Wind Speed (Bft)",
        value = Bft,
        allow_duplicates=True)    

  elif selected_units["Wind Speed"] == "ft/s":
      
      # ft/s to m/s
      m_per_s = df_pp[windspeed] / 3.2808
      df_pp.insert(loc = loc_for_ws + 1,
        column = "Wind Speed (m/s)",
        value = m_per_s,
        allow_duplicates=True)
      
      # mph
      mph = m_per_s * 2.2369
      df_pp.insert(loc = loc_for_ws + 2,
        column = "Wind Speed (mph)",
        value = mph,
        allow_duplicates=True)
      
      # kt
      kt = m_per_s * 1.9438
      df_pp.insert(loc = loc_for_ws + 3,
        column = "Wind Speed (kt)",
        value = kt,
        allow_duplicates=True)    
      
      # Bft
      Bft = kt_to_bft(kt)
      df_pp.insert(loc = loc_for_ws + 4,
        column = "Wind Speed (Bft)",
        value = Bft,
        allow_duplicates=True)        
      
  elif selected_units["Wind Speed"] == "mph":
      
      # mph to m/s
      m_per_s = df_pp[windspeed] / 2.2369
      df_pp.insert(loc = loc_for_ws + 1,
        column = "Wind Speed (m/s)",
        value = ft_per_s,
        allow_duplicates=True)
      
      # ft/s
      ft_per_s = m_per_s * 3.2808
      df_pp.insert(loc = loc_for_ws + 2,
        column = "Wind Speed (ft/s)",
        value = mph,
        allow_duplicates=True)
      
      # kt
      kt = m_per_s * 1.9438
      df_pp.insert(loc = loc_for_ws + 3,
        column = "Wind Speed (kt)",
        value = kt,
        allow_duplicates=True)    
      
      # kt to Bft
      Bft = kt_to_bft(kt)
      df_pp.insert(loc = loc_for_ws + 4,
        column = "Wind Speed (Bft)",
        value = Bft,
        allow_duplicates=True)      
      
  elif selected_units["Wind Speed"] == "kt":
      
      # kt to m/s
      m_per_s = df_pp[windspeed] / 1.9438
      df_pp.insert(loc = loc_for_ws + 1,
        column = "Wind Speed (m/s)",
        value = ft_per_s,
        allow_duplicates=True)
      
      # ft/s
      ft_per_s = m_per_s * 3.2808
      df_pp.insert(loc = loc_for_ws + 2,
        column = "Wind Speed (ft/s)",
        value = mph,
        allow_duplicates=True)
      
      # mph
      mph = m_per_s * 2.2369
      df_pp.insert(loc = loc_for_ws + 3,
        column = "Wind Speed (mph)",
        value = kt,
        allow_duplicates=True)    
      
      # kt to Bft
      Bft = kt_to_bft(df_pp[windspeed])
      df_pp.insert(loc = loc_for_ws + 4,
        column = "Wind Speed (Bft)",
        value = Bft,
        allow_duplicates=True)     

  #Not sure if converting FROM Bft is really necessary    
  #elif selected_units["Wind Speed"] == "Bft":

  import bokeh.palettes as palettes
  #https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Layout.html#natural-sizes-and-arrangements-using-hbox-and-vbox

  selected_units_no_time = selected_units.copy()
  selected_units_no_time.pop("Time")
  colors_length = len(selected_units_no_time)

  if colors_length <= 8:
    colors = palettes.Colorblind[colors_length] #Colorblind palette has a maximum of 8 colors, which should be enough https://docs.bokeh.org/en/latest/docs/reference/palettes.html#d3-palettes
  
  else:
    colors = palettes.Category20[colors_length]

  #Unit selection
  temp_units = ["Celsius", "Fahrenheit"]
  alt_units = ["Meters", "Feet"]
  ws_units = ["Meters per second", "Feet per second", "Miles per hour", "Knots", "Beaufort"]

  supported_units = {
      "Temp": temp_units,
      "Altitude": alt_units,
      "Wind Speed": ws_units,
  }

  unit_select = []
  for field in supported_units:
      unit_select.append(widgets.ToggleButtons(
          options=[i for i in supported_units[field]],
          description=field,
          disabled=False,
          button_style='', # 'success', 'info', 'warning', 'danger' or ''
          #tooltips=['Description of slow', 'Description of regular', 'Description of fast'],
      #    icons=['check'] * 3
      )
      )

  #Color selection
  color_selections = {
      "Altitude": f"#0eade1",
      "Temp": f"#e01b9a",
      "Wind Speed": f"#500eec",
      "Mag. Dir.": f"#08a108",
      "Rel. Hum.": f"#12cab4"
      }
  for unit, color in zip(selected_units_no_time, colors):
    if unit in color_selections:
      color_selections[unit] = widgets.ColorPicker(
                              concise=False,
                              description=unit,
                              value=color_selections[unit],
                              disabled=False
    )
    else:
      color_selections[unit] = widgets.ColorPicker(
                              concise=False,
                              description=unit,
                              value=color,
                              disabled=False
    )      

  items_layout = Layout(width='auto')     # override the default width of the button to 'auto' to let the button grow

  unit_layout = Layout(display='flex',
                      flex_flow='row', 
                      align_items='stretch', 
                      #border='solid',
                      width='100%')

  colors_layout = Layout(display='flex',
                      flex_flow='column', 
                      align_items='stretch', 
                      #border='solid',
                      width='100%')

  box = Box(children=unit_select, layout=unit_layout)

  box2 = Box(children=[color_selections[unit] for unit in color_selections], layout=colors_layout)

  children = [box, box2]
  tab = widgets.Tab()
  tab.children = children
  tab.titles = ["Units", "Colors"]

  customize_plots = True

  display(tab)

  

## Time Series Separate

In [None]:
#@title Click play to generate time series plots {display-mode: "form"}
#from bokeh.models import ColorPicker

if not customize_plots:
  print(customize_plots_message)

else:
  from bokeh.io import output_file
  from bokeh.models import Select

  time_series_separate_path = f"TIME SERIES SEPARATE - {fieldTestLabel} - {fieldTestDate}.html"

  output_file(f"{plotsFolderName}/{time_series_separate_path}")

  #Formatting
  datefmt = DateFormatter(format="%F %I:%M:%S %p") #Format API reference: https://docs.bokeh.org/en/latest/docs/reference/models/widgets/tables.html?highlight=datatable#bokeh.models.DataTable
  width = 900
  height = 300

  #view = CDSView(source=source, filters=[IndexFilter(x)])
  hovercolor = "black"
  sz = 3

  #FIELD TEST INFO FOR PLOTS****************************************************************
  fieldtestinfo = Div(text=
  f"""
  <p>FIELD TEST: <b>{fieldTestParameters["Field Test Label"]}</b></p>
  <p>LOCATION: <b>{fieldTestParameters["Field Test Location"]}</b></p>
  <p>DATE: <b>{fieldTestParameters["Field Test Date"]}</b></p>
  <p>DEVICE: <b>{fieldTestParameters["Device Nickname"]}</b></p>
  """
  )

  #TIME SERIES PLOTS*************************************************************************************************

  source, hover = initialize_plots()

  """Unit selection in the plots
  unit_selections = {
      "Temp": Select(title = "Units", value = "Celsius", options = temp_units),
      "Altitude": Select(title = "Units", value = "Meters", options = alt_units),
      "Wind Speed": Select(title = "Units", value = "Meters per second", options = ws_units)
  }
  """

  #TIME SERIES PLOTS SEPARATE******************************************************************************
  time_series_options = dict(tools=[hover, "pan, wheel_zoom, box_select, tap, reset"], plot_width=width, plot_height=height)
  timefigures = []
  #color_pickers = [] #For changing the colors in the plot; disabling for now to optimize performance
  line_color = []
  circle_color = []
  plot_layout = []

  for title, color in zip(selected_units_no_time, color_selections):
      color = color_selections[color].value

      if title == "Mag. Dir.":
          timefigures.append(figure(title=f"{columns[title]} vs. Time", y_range=(-40, 400), x_axis_label = "Time", y_axis_label=columns[title], x_axis_type = "datetime", **time_series_options))
      else:
          timefigures.append(figure(title=f"{columns[title]} vs. Time", x_axis_label = "Time", y_axis_label=columns[title], x_axis_type = "datetime", **time_series_options))
          line_color.append(timefigures[-1].line("Time", title, color=color, hover_color=hovercolor, source=source))
      timefigures[-1].title.text_color = color
  #         timefigures[-1].yaxis.axis_label_text_color = color
  #         timefigures[-1].yaxis.major_label_text_color = color
  #         timefigures[-1].yaxis.axis_line_color = color
      timefigures[-1].xaxis.formatter=DatetimeTickFormatter(
          hours="%I:%M:%S %p",
          minutes="%I:%M:%S %p")
      timefigures[-1].x_range = timefigures[0].x_range
      circle_color.append(timefigures[-1].circle("Time", title, color=color, size=sz, hover_color=hovercolor, source=source))

  #     #Changing the colors of the plot lines
  #     color_pickers.append(ColorPicker(title=f"{title} Color"))
  #     color_pickers[-1].js_link("color", line_color[-1].glyph, "line_color")
  #     color_pickers[-1].js_link("color", circle_color[-1].glyph, "line_color")
  #     color_pickers[-1].js_link("color", circle_color[-1].glyph, "fill_color")
  #     color_pickers[-1].js_link("color", circle_color[-1].glyph, "fill_color")     

  #     #Changing the colors of the plot title, axes
  #     color_pickers[-1].js_link("color", timefigures[-1].title, "text_color")          

  # #         color_pickers[-1].js_link("color", timefigures[-1].yaxis, "axis_label_text_color")        
  # #         color_pickers[-1].js_link("color", timefigures[-1].yaxis, "major_label_text_color")        
  # #         color_pickers[-1].js_link("color", timefigures[-1].yaxis, "axis_line_color")       

  #     color_pickers[-1].color = color

      #plot_layout.append(row(timefigures[-1], color_pickers[-1]))

      plot_layout.append(row(timefigures[-1]))

  tab1 = Panel(child=column(fieldtestinfo, column(plot_layout)), title="Time Series Plots")

  layout1 = Tabs(tabs=[tab1])

  show(column(layout1))

  print(f"\n This plot was saved to {plotsFolderName} as {time_series_separate_path}")


 This plot was saved to plots as TIME SERIES SEPARATE - AKA Ocean City - Tuesday, October 11, 2022.html


## Time Series Superimposed

In [None]:
#@title Click play to choose the variables to view on one time series plot {display-mode: "form"}

if not customize_plots:
  print(customize_plots_message)

else:
  variable_1 = widgets.ToggleButtons(
      options=[field for field in selected_units_no_time],
      description='Variable 1',
      disabled=False,
      button_style='', # 'success', 'info', 'warning', 'danger' or ''
      tooltips=['Description of slow', 'Description of regular', 'Description of fast'],
  #     icons=['check'] * 3
  )

  variable_2 = widgets.ToggleButtons(
      options=[field for field in selected_units_no_time],
      description='Variable 2',
      disabled=False,
      button_style='', # 'success', 'info', 'warning', 'danger' or ''
      tooltips=['Description of slow', 'Description of regular', 'Description of fast'],
  #     icons=['check'] * 3
  )

  superimposed_variables = [variable_1, variable_2]
  box_superimposed = Box(children=superimposed_variables, layout=unit_layout)

  time_series_superimposed_step = True

  display(box_superimposed)

Box(children=(ToggleButtons(description='Variable 1', options=('Altitude', 'Wind Speed', 'Mag. Dir.', 'Temp', …

In [None]:
#@title Click play to generate the superimposed Time Series plot {display-mode: "form"}

if not time_series_superimposed_step:
  print("Please FIRST run the ABOVE cell to select the variables to view on a time series plot")

else:
  from bokeh.io import output_file

  time_series_superimposed_path = f"{variable_1.value} VS. {variable_2.value} - TIME SERIES - {fieldTestLabel} - {fieldTestDate}.html"

  output_file(f"{plotsFolderName}/{time_series_superimposed_path}")

  #TIME SERIES PLOTS SUPERIMPOSED***************************************************************************************************************************************

  source, hover = initialize_plots()

  var1 = variable_1.value
  var2 = variable_2.value

  var1_label = columns[var1]
  var2_label = columns[var2]

  var1_min = df_pp[var1_label].min()
  var1_max = df_pp[var1_label].max()

  var2_min = df_pp[var2_label].min()
  var2_max = df_pp[var2_label].max()

  var1_color = color_selections[var1].value
  var2_color = color_selections[var2].value

  width_superimposed = 1000
  height_superimposed = 500
  glyph_size = 5

  superimposed1_options = dict(tools=[hover, "pan, wheel_zoom, box_select, tap, reset"], plot_width=width_superimposed, plot_height=height_superimposed)

  superimposed1 = figure(y_range = (var1_min, var1_max), x_axis_label = "Time", y_axis_label = var1_label,
                        x_axis_type="datetime", **superimposed1_options)
  superimposed1.xaxis.formatter=DatetimeTickFormatter(
      hours="%I:%M:%S %p",
      minutes="%I:%M:%S %p"
  )

  superimposed1.yaxis.axis_label_text_color = var1_color
  superimposed1.yaxis.major_label_text_color = var1_color
  superimposed1.yaxis.axis_line_color = var1_color

  superimposed1.line("Time", var1, color=var1_color, hover_color="red", source=source, legend_label = var1_label)
  superimposed1.scatter("Time", var1, marker = glyphs[var1], size = glyph_size, color=var1_color, hover_color="red", source=source, legend_label = var1_label)

  superimposed1.extra_y_ranges["var2"] = Range1d(start=var2_min, end=var2_max)

  superimposed1.line("Time", var2, color=var2_color, hover_color="red", source=source, legend_label = var2_label, y_range_name ="var2")
  superimposed1.scatter("Time", var2, marker = glyphs[var2], size = glyph_size, color=var2_color, hover_color="red", source=source, legend_label = var2_label, y_range_name = "var2")

  ax2 = LinearAxis(y_range_name="var2", axis_label = var2_label, 
                  major_label_text_color = color_selections[var2].value, 
                  axis_label_text_color = color_selections[var2].value, 
                  axis_line_color= color_selections[var2].value)

  superimposed1.add_layout(ax2, "right") 

  superimposed1.legend.click_policy= "hide"

  tab2 = Panel(child=column(fieldtestinfo, superimposed1), title="Time Series Superimposed")

  layout2 = Tabs(tabs=[tab2])

  show(layout2)

  print(f"\nThis plot was saved to {plotsFolderName} as {time_series_superimposed_path}")


This plot was saved to plots as Altitude VS. Temp - TIME SERIES - AKA Ocean City - Tuesday, October 11, 2022.html


## Altitude Profiles

In [None]:
#@title Click play to generate a series of altitude profiles {display-mode: "form"}

if not customize_plots:
  print(customize_plots_message)

else:
  from bokeh.io import output_file

  alt_profiles_separate_path = f"ALTITUDE PROFILES SEPARATE - {fieldTestLabel} - {fieldTestDate}.html"

  output_file(f"{plotsFolderName}/{alt_profiles_separate_path}")

  #ALTITUDE PROFILES*********************************************************

  source, hover = initialize_plots()
  glyph_size = 5

  #######################################################################


  alt_profiles_options = dict(tools=[hover, "pan, wheel_zoom, box_select, tap, reset"], plot_width=700, plot_height=300)
  altprofiles = [alt for alt in selected_units_no_time if alt != "Altitude"]
  altprofiles_plots = []

  for title in altprofiles:
      alt_plot_color = color_selections[title].value
      label = columns[title]
      if title == "Mag. Dir.":
          altprofiles_plots.append(figure(title=label, x_axis_label = columns["Altitude"], y_range=(-40, 400), y_axis_label=label, **alt_profiles_options))
      else:
          altprofiles_plots.append(figure(title=label, x_axis_label = columns["Altitude"], y_axis_label=label, **alt_profiles_options))
      altprofiles_plots[-1].scatter("Altitude", title, marker = glyphs[title], size = glyph_size, color=alt_plot_color, hover_color=hovercolor, source=source)
      
      altprofiles_plots[-1].title.text_color = alt_plot_color
      altprofiles_plots[-1].yaxis.axis_label_text_color = alt_plot_color
      altprofiles_plots[-1].yaxis.major_label_text_color = alt_plot_color
      altprofiles_plots[-1].yaxis.axis_line_color = alt_plot_color
      
      altprofiles_plots[-1].x_range = altprofiles_plots[0].x_range
      
  tab3 = Panel(child=column(fieldtestinfo, column(altprofiles_plots)), title="Altitude Profiles")

  layout3 = Tabs(tabs=[tab3])

  show(layout3)

  print(f"\nThis plot was saved to {plotsFolderName} as {alt_profiles_separate_path}")


This plot was saved to plots as ALTITUDE PROFILES SEPARATE - AKA Ocean City - Tuesday, October 11, 2022.html


## Altitude Profiles Superimposed

In [None]:
#@title Click play to choose the variables to view on a single altitude plot {display-mode: "form"}

if not customize_plots:
  print(customize_plots_message)

else:
  alt_profile_superimposed_step = False

  altprofiles = [alt for alt in selected_units_no_time if alt != "Altitude"]
  alt_variable_1 = widgets.ToggleButtons(
      options=[field for field in altprofiles],
      description='Variable 1',
      disabled=False,
      button_style='', # 'success', 'info', 'warning', 'danger' or ''
      tooltips=['Description of slow', 'Description of regular', 'Description of fast'],
  #     icons=['check'] * 3
  )

  alt_variable_2 = widgets.ToggleButtons(
      options=[field for field in altprofiles],
      description='Variable 2',
      disabled=False,
      button_style='', # 'success', 'info', 'warning', 'danger' or ''
      tooltips=['Description of slow', 'Description of regular', 'Description of fast'],
  #     icons=['check'] * 3
  )

  alt_superimposed_variables = [alt_variable_1, alt_variable_2]
  box_alt_superimposed = Box(children=alt_superimposed_variables, layout=unit_layout)

  alt_profile_superimposed_step = True

  display(box_alt_superimposed)

Box(children=(ToggleButtons(description='Variable 1', options=('Wind Speed', 'Mag. Dir.', 'Temp', 'Rel. Hum.')…

In [None]:
#@title Click play to generate the superimposed altitude profile plot {display-mode: "form"}

if not alt_profile_superimposed_step:
  print("Please FIRST run the ABOVE cell to select the variables to view on the altitude plot")

else:
  from bokeh.io import output_file

  alt_profile_superimposed_path = f"{alt_variable_1.value} VS. {alt_variable_2.value} - ALTITUDE PROFILE - {fieldTestLabel} - {fieldTestDate}.html"

  output_file(f"{plotsFolderName}/{alt_profile_superimposed_path}")

  #ALTITUDE PROFILES SUPERIMPOSED***************************************************************************************************************************************

  source, hover = initialize_plots()

  #######################################################################

  var1 = alt_variable_1.value
  var2 = alt_variable_2.value

  var1_label = columns[var1]
  var2_label = columns[var2]

  var1_min = df_pp[var1_label].min()
  var1_max = df_pp[var1_label].max()

  var2_min = df_pp[var2_label].min()
  var2_max = df_pp[var2_label].max()

  var1_color = color_selections[var1].value
  var2_color = color_selections[var2].value

  width_superimposed = 1000
  height_superimposed = 500
  glyph_size = 5

  altsuperimposed_options = dict(tools=[hover, "pan, wheel_zoom, box_select, reset"], plot_width=width_superimposed, plot_height=height_superimposed)

  altsuperimposed = figure(y_range = (var1_min, var1_max), x_axis_label=columns["Altitude"], y_axis_label=var1_label, **altsuperimposed_options)

  altsuperimposed.yaxis.axis_label_text_color = var1_color
  altsuperimposed.yaxis.major_label_text_color = var1_color
  altsuperimposed.yaxis.axis_line_color = var1_color

  altsuperimposed.scatter("Altitude", var1, marker = glyphs[var1], color=var1_color, size = glyph_size, hover_color="red", source=source, legend_label = var1_label)

  altsuperimposed.extra_y_ranges["var2"] = Range1d(start=var2_min, end=var2_max)

  altsuperimposed.scatter("Altitude", var2, marker = glyphs[var2], color=var2_color, size = glyph_size, hover_color="red", source=source, legend_label = var2_label, y_range_name = "var2")

  ax2 = LinearAxis(y_range_name="var2", axis_label = var2_label, 
                  major_label_text_color = color_selections[var2].value, 
                  axis_label_text_color = color_selections[var2].value, 
                  axis_line_color= color_selections[var2].value)

  altsuperimposed.add_layout(ax2, "right") 

  altsuperimposed.legend.click_policy= "hide"

  tab4 = Panel(child=column(fieldtestinfo, altsuperimposed), title="Altitude Profiles Superimposed")

  layout4 = Tabs(tabs=[tab4])

  show(layout4)

  print(f"\nThis plot was saved to {plotsFolderName} as {alt_profile_superimposed_path}")



This plot was saved to plots as Wind Speed VS. Temp - ALTITUDE PROFILE - AKA Ocean City - Tuesday, October 11, 2022.html


## A.R.T.S Plot (**A**lt. vs. **R**el hum, **T**emp, wind **S**peed)

In [None]:
#@title Click play to generate the plot {display-mode: "form"}

if not customize_plots:
  print(customize_plots_message)

else:
  from bokeh.io import output_file

  geoff_plot_path = f"A.R.T.S Plot - {fieldTestLabel} - {fieldTestDate}.html"

  output_file(f"{plotsFolderName}/{geoff_plot_path}")

  source, hover = initialize_plots()

  #The Geoff Exclusive********************************************************************************
  glyph_size = 5
  geoffsourcekeys = ["Temp", "Wind Speed", "Rel. Hum."]
  geoff_options = dict(tools=[hover, "pan, wheel_zoom, box_select, reset"], plot_width=1000, plot_height=500)
  geoffcolors = ["#e01b9a", "#500eec", "#08d4c6"]
  geoffp = figure(y_axis_label = columns["Altitude"], **geoff_options)

  for key, c in zip(geoffsourcekeys, geoffcolors):
      geoffp.scatter(key, "Altitude", marker = glyphs[key], size = glyph_size, color=c, hover_color=hovercolor, source=source, legend_label=columns[key])
  geoffp.legend.click_policy="hide"

  tab5 = Panel(child=column(fieldtestinfo, geoffp), title="Geoff Exclusive")

  layout5 = Tabs(tabs=[tab5])

  show(layout5)

  print(f"\n This plot was saved to {plotsFolderName} as {geoff_plot_path}")


 This plot was saved to plots as A.R.T.S Plot - AKA Ocean City - Tuesday, October 11, 2022.html
