# ON BRANCH MULTIPLE-SENSORS

# IMPORT DATA

## STEP 0: Input Field Test Information
- Create folders to store:
    - Trimmed data (data corresponding only to the field test)
    - Preprocessed data (the field test data that's been baselined)



In [1]:
fieldTestLabel = "AREN CBEC" #a short nickname for the field test

fieldTestLocation = "Chesapeake Bay Environmental Center" #name of the location of the field test

fieldTestDate = "2022-06-21" #the date of the field test in this format (YYYY-MM-DD)

fieldTestDescription = "Flying a Kestrel at CBEC" #a short description of the overall field test purpose

fieldTestTeam = ["RAC"] #initials for members of the field test

dataAnalyst = ["RAC"] #initials of person/people analyzing the data

deviceName = "Drexel Kestrel 5500 A" #name of the Kestrel Device

deviceNickName = "Aeropod" #nickname to identify the device on the plots

trimmedDataFolderName = "trimmed_data" #name of folder to store the trimmed data

preprocessedDataFolderName = "preprocessed_data" #name of folder to store the preprocessed data

###########################################################################################
import time as tm
from datetime import date
import os
from pathlib import Path

#Information to indicate the history of changes made
change_history = []

#Convert fieldTestDate to a datetime object for easy manipulations
try:
    fieldTestDate = date.fromisoformat(fieldTestDate)
    fieldTestDate = fieldTestDate.strftime("%A, %B %d, %Y")
except ValueError as e:
    print(e)
except Exception as e:
    print("Something went wrong. Restart the notebook")

#Field test parameters into a dictionary
fieldTestParameters = {
    "Field Test Label": fieldTestLabel,
    "Field Test Location": fieldTestLocation,
    "Field Test Date": fieldTestDate,
    "Field Test Description": fieldTestDescription,
    "Field Test Team": fieldTestTeam,
    "Data Analyst": dataAnalyst,
    "Device Name": deviceName,
    "Device Nickname": deviceNickName
}

#Echo field test parameters for user to confirm and change if necessary
print("PLEASE CONFIRM THE FOLLOWING FIELD TEST PARAMETERS")
print("-"*50)

for parameter in fieldTestParameters:
    print(f"{parameter}: {fieldTestParameters[parameter]}")
    change_history.append(f"{parameter}: {fieldTestParameters[parameter]}")
    
print("-"*50)

#Get the directory that the notebook is currently in
cwd = os.getcwd()

#Create trimmed data directory
try:
    trimmed_data_dir = Path(trimmedDataFolderName)
    trimmed_data_dir.mkdir()
    change_history.append(f"{trimmedDataFolderName} folder was created in {cwd}")
except FileExistsError:
    print(f"The folder, {trimmedDataFolderName}, already exists")
except Exception as e:
    print("Something went wrong. Restart the notebook")
else:
    print(f"{trimmedDataFolderName} has been created in {cwd}")

#Create preprocessed data directory
try:
    preprocessed_data_dir = Path(preprocessedDataFolderName)
    preprocessed_data_dir.mkdir()
    change_history.append(f"{preprocessedDataFolderName} folder was created in {cwd}")
except FileExistsError:
    print(f"The folder, {preprocessedDataFolderName}, already exists")
    print("Proceed to STEP 1")
except Exception as e:
    print("Something went wrong. Restart the notebook")
else:
    print(f"{preprocessedDataFolderName} has been created in {cwd}")
    print("Proceed to STEP 1")

PLEASE CONFIRM THE FOLLOWING FIELD TEST PARAMETERS
--------------------------------------------------
Field Test Label: AREN CBEC
Field Test Location: Chesapeake Bay Environmental Center
Field Test Date: Tuesday, June 21, 2022
Field Test Description: Flying a Kestrel at CBEC
Field Test Team: ['RAC']
Data Analyst: ['RAC']
Device Name: Drexel Kestrel 5500 A
Device Nickname: Aeropod
--------------------------------------------------
The folder, trimmed_data, already exists
The folder, preprocessed_data, already exists
Proceed to STEP 1


## STEP 1: Type in/paste the name of the file `"examplename.csv"`
- Make sure the file is in the same directory as this notebook

In [2]:
#TYPE IN FILE NAME HERE
filename = "WEATHER - 2481048_2022-06-27 15_31_52.csv"

#################################################################
delay_time = 0.1
print(f"Locating \"{filename}\"", end="")

for i in range(2):
    tm.sleep(delay_time)
    print(".", end="")
tm.sleep(delay_time)
print(".")
tm.sleep(delay_time)

if filename not in os.listdir():
    print(f"\"{filename}\" does not exist in the current directory: {cwd}")
    print(f"\nDid you type in the name correctly?")
    print(f"Did you move the file to the wrong directory?")
else:
    print(f"\n\"{filename}\" has been located")
    print("\nProceed to STEP 2")

Locating "WEATHER - 2481048_2022-06-27 15_31_52.csv"...

"WEATHER - 2481048_2022-06-27 15_31_52.csv" has been located

Proceed to STEP 2


## STEP 2: Open the file
`***CURRENTLY ASSUMING THE USER WILL ALWAYS UPLOAD THE RAW FILE, WHICH WILL ALWAYS HAVE A PROLOGUE***<br>
***IMPLEMENT A "THIS FILE DOESN'T APPEAR TO HAVE A PROLOGUE" IN THE FUTURE? UTILIZE REGULAR EXPRESSIONS FOR THIS*** `

In [36]:
standardized_units = {
    "time": "Time (yyyy-MM-dd hh:mm:ss)",
    "alt": "Altitude (Meters)",
    "temp": "Temp (Celsius)",
    "wetbulbtemp": "Wet Bulb Temp. (Celsius)", 
    "windspeed": "Wind Speed (m/s)",
    "rh": "Rel. Hum. (%)",
    "baro": "Baro. (mb)",
    "magdir": "Mag. Dir. (Degrees)",
    "alt_baselined": "AOG (Meters)"
}

kestrel_units = {
    "Time": "yyyy-MM-dd hh:mm:ss",
    "Temp": "Celsius",
    "Wet Bulb Temp.": "Celsius",    
    "Rel. Hum.": "%",
    "Baro.": "mb",
    "Altitude": "Meters",
    "Wind Speed": "m/s",
    "Mag. Dir.": "Degrees",
    "True Dir.": "Degrees",
}

In [58]:
#Run this cell once
import pandas as pd

#Check if there's anything wrong with the file in the first place
try:
    print(f"Opening \"{filename}\"", end="")
    
    for i in range(2):
        tm.sleep(delay_time)
        print(".", end="")
    tm.sleep(delay_time)
    print(".")
    tm.sleep(delay_time)
    
    f = open(filename, "r")
    print("File opened successfully")

except Exception as e:
    print("Something went wrong")
    print(e)
    
else:
    #READ PROLOGUE   
    ####################################################################################

    prologue_length = 8 #replace this comment with the length of your prologue (optional for user to interact or get rid of?)

    ####################################################################################
    try:
        #Iterate over prologue
        print("\nReading prologue")
        print("-"*50)
        pl = 0
        prologue = []

        while True:
            l = f.readline()
            prologue.append(l)
            print(l)
            tm.sleep(delay_time)
            if l == "\n":
                break
            else:
                pl += 1
        print("-"*50)        
        print("Prologue read successfully")

        if pl != prologue_length:
            print(f"Prologue was {pl} lines instead of {prologue_length}")
        else:
            print(f"Prologue is {prologue_length} lines")

    except Exception as e:
        print("Something went wrong")
        print(e)
        print("Try rerunning STEP 2")

    #READ HEADERS, UNITS, AND DATA
    ####################################################################################

    #Run this cell once

    try:
        #Obtaining headers and units
        print("\nObtaining headers")
        tm.sleep(delay_time)
        headers = f.readline().split(",")
        headers.pop(-1)
        print("Headers obtained successfully")
        tm.sleep(delay_time)

        print("\nObtaining units")
        tm.sleep(delay_time)
        units = f.readline().split(",")
        units.pop(-1)
        print("Units obtained successfully")
        tm.sleep(delay_time)

        #Store the data values into a pandas dataframe
        print("\nObtaining measurements")
        tm.sleep(delay_time)        
        df = pd.read_csv(f, names=headers)
        f.close()        
        
        #Keep the standardized columns
        for i, n in zip(headers, units):
            if i in kestrel_units:
                if kestrel_units[i] == n:
                    new_label = i + " (" + n + ")"
                    df.rename(columns = {i: new_label}, inplace = True)
                else:
                    #TODO
                    print(f"Need to convert {n} to {kestrel_units[i]}")
            else:
                df.drop(columns=i, inplace=True)
        
        df_rows = df.shape[0]
        print("Data obtained successfully")
        tm.sleep(delay_time)
        print(f"\n{df_rows} rows in file")
        print("\nReview data and proceed to STEP 3")

    except Exception as e:
        print("Something went wrong")
        print(e)
        
    #Variables to prevent cells from accidentally being run again, and potentially messing up the workflow
    timedeltas_read = False

    display(df)

Opening "WEATHER - 2481048_2022-06-27 15_31_52.csv"...
File opened successfully

Reading prologue
--------------------------------------------------
Device Information:

Name:,WEATHER - 2481048

Model:,5500L

Serial:,2481048

Firmware:,1.31

Profile Version:,0.06

Hardware Version:,Rev 13C

LiNK Version:,1.04.04



--------------------------------------------------
Prologue read successfully
Prologue is 8 lines

Obtaining headers
Headers obtained successfully

Obtaining units
Units obtained successfully

Obtaining measurements
Data obtained successfully

7021 rows in file

Review data and proceed to STEP 3


Unnamed: 0,Time (yyyy-MM-dd hh:mm:ss),Temp (Celsius),Wet Bulb Temp. (Celsius),Rel. Hum. (%),Baro. (mb),Altitude (Meters),Wind Speed (m/s),Mag. Dir. (Degrees),True Dir. (Degrees)
0,2022-02-25 14:46:56,9.8,9.5,96.9,1018.7,-48,1.4,341,341
1,2022-02-25 14:46:58,9.7,9.5,97.7,1018.5,-47,1.2,288,288
2,2022-02-25 14:47:00,9.8,9.8,98.3,1018.7,-48,1.4,275,276
3,2022-02-25 14:47:02,9.8,9.7,98.5,1018.9,-49,2.0,291,292
4,2022-02-25 14:47:04,9.7,9.5,98.1,1019.0,-49,2.4,285,286
...,...,...,...,...,...,...,...,...,...
7016,2022-06-22 15:19:46,24.1,16.8,47.9,1012.2,6,0.0,359,0
7017,2022-06-22 15:19:48,24.1,16.8,47.9,1012.2,6,0.0,359,0
7018,2022-06-22 15:19:50,24.1,16.8,47.9,1012.2,6,0.0,359,0
7019,2022-06-22 15:19:52,24.1,16.8,47.9,1012.2,6,0.0,359,0


***END OF IMPORT DATA STAGE***
***

# CLEAN DATA

## STEP 3: ERROR CHECKING
### Check the data types of the headers

<table>
  <tr>
    <th>Measurement</th>
    <th>Units</th>
    <th>Expected Data Type</th>
  </tr>
  <tr>
    <td>Time</td>
    <td>(yyyy-MM-dd hh:mm:ss)</td>
    <td style="font-family: monospace">datetime64[ns]</td>
  </tr>
  <tr>
    <td>Temp</td>
    <td>Fahrenheit | Celsius</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Wet Bulb Temp.</td>
    <td>Fahrenheit | Celsius</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Rel. Hum.</td>
    <td>%</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Baro.</td>
    <td>inHg | hPA | psi | mb</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Altitude</td>
    <td>Meters | Feet</td>
    <td style="font-family: monospace">int64</td>
  </tr>
  <tr>
    <td>Station P.</td>
    <td>inHg | hPA | psi | mb</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Wind Speed</td>
    <td>mph | fpm | Bft | m/s | km/h | kt</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Heat Index</td>
    <td>Fahrenheit | Celsius</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Dew Point</td>
    <td>Fahrenheit | Celsius</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Dens. Alt.</td>
    <td>Meters | Feet</td>
    <td style="font-family: monospace">int64</td>
  </tr>
  <tr>
    <td>Crosswind</td>
    <td>mph | fpm | Bft | m/s | km/h | kt</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Headwind</td>
    <td>mph | fpm | Bft | m/s | km/h | kt</td>
    <td style="font-family: monospace">float64</td>
  </tr>
  <tr>
    <td>Mag. Dir</td>
    <td>Cardinal Points | Degrees</td>
    <td style="font-family: monospace">int64</td>
  </tr>
  <tr>
    <td>True Dir.</td>
    <td>Cardinal Points | Degrees</td>
    <td style="font-family: monospace">int64</td>
  </tr>
  <tr>
    <td>Wind Chill</td>
    <td>Fahrenheit | Celsius</td>
    <td style="font-family: monospace">float64</td>
  </tr>

</table>

In [53]:
#Run this cell once to view the datatypes
df.dtypes

Time (yyyy-MM-dd hh:mm:ss)     object
Temp (Celsius)                float64
Rel. Hum. (%)                 float64
Altitude (Meters)               int64
Wind Speed (m/s)              float64
Mag. Dir. (Degrees)             int64
True Dir. (Degrees)             int64
dtype: object

### Check for any invalid values, corrupt values, etc
- These invalid/corrupt values are most likely the reason why the data types differ from the expected

In [54]:
#Run this cell once

###############################################################################
#LOOK FOR ANY CORRUPT FIELDS, NAN, ETC
#ERROR CHECKING ON UNITS 
indices = []
invalidcols = []
nullcols = []

asterisks_bool = False
nulls_bool = False

#Find columns with *** entries
asterisks = df.isin(["***"])
for col in asterisks.columns:
    if asterisks[col].values.any():
        invalidcols.append(col)

if len(invalidcols) != 0:
    asterisks_bool = True
    
#Find columns with NaN values
nulls = df.isnull()
for col in nulls.columns:
    if nulls[col].values.any():
        nullcols.append(col)

if len(nullcols) != 0:
    nulls_bool = True

#Find the specific rows in the entire dataframe
if asterisks_bool and nulls_bool:
    for i in range(len(df)):
        invalid = df.iloc[i]
        if invalid.hasnans: 
            indices.append(i)
        if ("***" in invalid.values): #reference https://stackoverflow.com/questions/30944577/check-if-string-is-in-a-pandas-dataframe
            indices.append(i)
elif asterisks_bool:
    for i in range(len(df)):
        invalid = df.iloc[i]
        if ("***" in invalid.values): #reference https://stackoverflow.com/questions/30944577/check-if-string-is-in-a-pandas-dataframe
            indices.append(i)
elif nulls_bool:
    for i in range(len(df)):
        invalid = df.iloc[i]
        if invalid.hasnans: 
            indices.append(i)

indices = set(indices) #Use a set to ignore duplicates

if len(indices) == 0:
    print("No errors have been detected")
    print("Proceed to STEP 4")

if asterisks_bool and nulls_bool:
    print("*** entries have been detected in the following columns")
    print(*invalidcols, sep="\n")
    print("NaN entries have been detected in the following columns")
    print(*nullcols, sep = "\n")
    print("\nProceed to STEP 3A to identify them and STEP 3B for any further action")

elif asterisks_bool:
    print("*** entries have been detected in the following columns")
    print(*invalidcols, sep="\n")
    print("\nProceed to STEP 3A to identify them and STEP 3B for any further action")
    
elif nulls_bool:
    print("NaN entries have been detected in the following columns")
    print(*nullcols, sep = "\n")
    print("\nProceed to STEP 3A to identify them and STEP 3B for any further action")
    
#check nulls reference https://www.geeksforgeeks.org/check-for-nan-in-pandas-dataframe/

No errors have been detected
Proceed to STEP 4


### STEP 3A: Review the data with error values

In [None]:
#Run this cell once

#View the entire dataframe, filtered with the invalid values
df.filter(items=indices, axis=0)

### STEP 3B: Determine whether to ignore the entire corrupt fields or replace the individual corrupt values with a null/error indicating value
- Ignore the entire corrupt fields if they are not of interest, and proceed to STEP 4
- Replace the individual values if the fields are of interest
    - Ex. Rows 0-100 are of interest, but row 50 has a corrupt value; this will have to be noted

In [None]:
#Replace the *** entries with a value
error_value = 0

##################################################################################


asterisks = df != "***"
df = df.where(asterisks, error_value)
for col in invalidcols:
    change_history.append(f"All asterisks in the {col} column were replaced with a {error_value}\n")
    print(change_history[-1])


#Convert header data types to expected data types
df[df.columns[0]] = pd.to_datetime(df[df.columns[0]])
change_history.append(f"The {df.columns[0]} column was converted to type datetime\n")
print(f"The {df.columns[0]} column was converted to type datetime\n")

#TODO MAKE THIS INSTRUCTION MORE CLEAR; USER WILL TYPE HERE
#INSERT YOUR MEASUREMENTS WITH THE EXPECTED DATA TYPES HERE
#MEASUREMENTS MUST BE TYPED IN EXACTLY AS IT IS IN THE DATAFRAME
expected_dtypes = {
    "Crosswind (m/s)": "float",
    "Headwind (m/s)": "float",
    "Mag. Dir. (Degrees)": "int",
    "True Dir. (Degrees)": "int"
}

for col in expected_dtypes:
    if col in df:
        df[col] = df[col].astype(expected_dtypes[col])
        change_history.append(f"The {col} column was converted to type {expected_dtypes[col]}\n")
        print(change_history[-1])

ch_bound_1 = len(change_history)

print("Data types have been successfully changed")
print("\n")
print(df.dtypes)
print("\n")
print("Proceed to STEP 4")

***END OF CLEAN DATA STAGE***
***

# IDENTIFY THE DATA OF INTEREST

## Using time deltas
- The time deltas are intended to help easily identify a field test <br>
- "Spikes" in the time delta graph will most likely indicate 
    - The start and end of a field test
    - A clock reset 
    - When the Kestrel was turned on <br>
<br>

Time Deltas are calculated as follows
- $\Delta t_{i,\ i+1} = t_{i+1} - t_{i}$
where $\Delta t_{i,\ i+1}$ is the change in time from the entry at row $i$ to $i+1$
- ($\Delta t_{0,\ 1} = t_{1} - t_{0}$
where $\Delta t_{0,\ 1}$ is the change in time from the entry at row $0$ to $1$)

## STEP 4: Calculating all time deltas

In [59]:
#Run this cell once

#####################################################################
if not timedeltas_read:
    #Date column as string variable
    date = df.columns[0]

    try:
        #Also here if the error checking steps were skipped
        if df[date].dtype != "<M8[ns]":
            df[date] = pd.to_datetime(df[date])         
            change_history.append(f"The \"{date}\" column was converted to type datetime\n")
            print(f"The \"{date}\" column was converted to type datetime\n")

        #List to store deltas
        deltas = []

        #Total number of data entry rows
        rows = len(df)

        #Calculate all time deltas and store in list
        i = 0
        while i != (rows-1):
            time1 = df.loc[i,date]
            time2 = df.loc[i+1,date]
            delta = time2 - time1
            deltas.append(delta)
            i+=1
        td_min = min(deltas)
        td_max = max(deltas)
        deltas.append("LAST TIME ENTRY") #Helper text

        #Convert list to series
        deltas = pd.Series(deltas, name="Time Delta")

        #Store the sampling interval of 2s, or whatever sampling interval was chosen (which should be the most common)        
        mode = deltas.mode()

        #Create separate series of time deltas in seconds
        td_seconds = []
        for td in deltas:
            if isinstance(td, str): #Necessary for entries with helper text
                td_seconds.append(td)
            else:
                td_seconds.append(td.total_seconds())
        td_seconds = pd.Series(td_seconds, name="Time Delta (seconds)")

        #Initialize dataframe with datetime columns
        times = pd.DataFrame(df[date]).rename(columns={date:"Datetime"})

        #Create datetime + 1 and datetime - 1 series to be added into times df
        dtplusone = pd.Series(index=range(rows), name="Datetime_i+1", dtype="object")
        dtminusone = pd.Series(index=range(rows), name="Datetime_i-1", dtype="object")

        #Append necessary values
        dtplusone[0:-1] = df.loc[1:, date]
        dtplusone[rows-1] = "LAST TIMESTAMP" #Helper text---is it necessary?

        dtminusone[1:] = df.loc[:rows-2, date]
        dtminusone[0] = "FIRST TIMESTAMP" #Helper text---is it necessary?

        times = times.join([deltas, td_seconds, dtminusone, dtplusone])

        #Reorder columns to desired order
        times = times[["Datetime", "Datetime_i+1", "Time Delta", "Time Delta (seconds)", "Datetime_i-1"]]

        ### Appending the time deltas to the main df
        df = df.join([deltas, td_seconds])

        #Reordering columns
        df = df[[
        'Time (yyyy-MM-dd hh:mm:ss)',
        'Time Delta',
        'Time Delta (seconds)',
        'Temp (Celsius)',
        'Wet Bulb Temp. (Celsius)',
        'Rel. Hum. (%)', 
        'Baro. (mb)',
        'Altitude (Meters)', 
        'Wind Speed (m/s)',
        'Mag. Dir. (Degrees)',
        'True Dir. (Degrees)', 
        ]]
        
        print("Time Deltas have been calculated")
        print(f"Most common sampling time in datafile is {mode[0].seconds} seconds")
        display(df[df.columns[1:3]])
        print("Proceed to STEP 5")
        timedeltas_read = True
    except Exception as e:
        print(e)

    ch_bound_1 = len(change_history)
else:
    print("Time Deltas have already been calculated")
    print(f"Most common sampling time in datafile is {mode[0].seconds} seconds")
    display(df[df.columns[1:3]])
    print("Proceed to STEP 5")
    ch_bound_1 = len(change_history)

The "Time (yyyy-MM-dd hh:mm:ss)" column was converted to type datetime

Time Deltas have been calculated
Most common sampling time in datafile is 2 seconds


Unnamed: 0,Time Delta,Time Delta (seconds)
0,0 days 00:00:02,2.0
1,0 days 00:00:02,2.0
2,0 days 00:00:02,2.0
3,0 days 00:00:02,2.0
4,0 days 00:00:02,2.0
...,...,...
7016,0 days 00:00:02,2.0
7017,0 days 00:00:02,2.0
7018,0 days 00:00:02,2.0
7019,0 days 00:00:02,2.0


Proceed to STEP 5


## STEP 5: Calculating the time delta outliers
- The "time delta outliers" are the time deltas != standard sampling interval (2 seconds)

### Calculate the time delta outliers

In [60]:
#Run this cell once

####################################################################################
outliers = []
outliers_index = []

#Find the time deltas != chosen sampling interval
for count, i in enumerate(deltas):
    if not (i==mode).any():
        outliers.append(i)
        outliers_index.append(count)
outliers = pd.Series(outliers, name="Time Deltas != sampling interval")

#Time deltas in seconds
outliers_seconds = []
for td in outliers:
    if isinstance(td, str): #Necessary for entries with helper text
        outliers_seconds.append(td)
    else:
        outliers_seconds.append(td.total_seconds())
outliers_seconds = pd.Series(outliers_seconds, name="Time Delta (seconds)")

print("Time delta outliers successfully calculated")

#Initialize time delta != sampling interval comparison chart
td = "Time Delta"
tds = "Time Delta (Seconds)"
dt = "Datetime"
dtmin = "Datetime_i-1"
dtplus = "Datetime_i+1"
columnnames = [dt, dtplus, td, tds, dtmin] #REARRANGE COLUMNS HERE TO DESIRED LAYOUT

td = columnnames.index(td)
tds = columnnames.index(tds)
dt = columnnames.index(dt)
dtmin = columnnames.index(dtmin)
dtplus = columnnames.index(dtplus)

outliers_df = pd.DataFrame(index=outliers_index, columns=columnnames)

#Append Time Deltas
for index, value in enumerate(outliers):
    outliers_df.iloc[index, td] = value

for index, value in enumerate(outliers_seconds):
    outliers_df.iloc[index, tds] = value

#Append Datetimes
for row, index in enumerate(outliers_index):
    if index == 0:
        outliers_df.iloc[row, dt] = df.loc[index, date] #Datetime
        outliers_df.iloc[row, dtmin] = "FIRST ENTRY"#Datetime_i-1
        outliers_df.iloc[row, dtplus] = df.loc[index+1, date]#Datetime_i+1
    elif index == rows-1:
        outliers_df.iloc[row, dt] = df.loc[index, date] #Datetime
        outliers_df.iloc[row, dtmin] = df.loc[index-1, date]#Datetime_i-1
        outliers_df.iloc[row, dtplus] = "NO ENTRY"#Datetime_i+1
    else:
        outliers_df.iloc[row, dt] = df.loc[index, date] #Datetime
        outliers_df.iloc[row, dtmin] = df.loc[index-1, date]#Datetime_i-1
        outliers_df.iloc[row, dtplus] = df.loc[index+1, date]#Datetime_i+1
print("Time delta outliers chart successfully created")
print(f"\nMost common sampling time in datafile is {mode[0].seconds} seconds")
print("\nProceed to STEP 6")
outliers_df

Time delta outliers successfully calculated
Time delta outliers chart successfully created

Most common sampling time in datafile is 2 seconds

Proceed to STEP 6


Unnamed: 0,Datetime,Datetime_i+1,Time Delta,Time Delta (Seconds),Datetime_i-1
825,2022-02-25 15:14:26,2022-02-25 15:14:27,0 days 00:00:01,1.0,2022-02-25 15:14:24
826,2022-02-25 15:14:27,2022-02-25 15:14:28,0 days 00:00:01,1.0,2022-02-25 15:14:26
3038,2022-02-25 16:28:10,2022-02-25 20:01:32,0 days 03:33:22,12802.0,2022-02-25 16:28:08
3135,2022-02-25 20:04:44,2022-02-27 02:28:04,1 days 06:23:20,109400.0,2022-02-25 20:04:42
3234,2022-02-27 02:31:20,2022-02-27 05:31:22,0 days 03:00:02,10802.0,2022-02-27 02:31:18
3235,2022-02-27 05:31:22,2022-02-27 12:31:24,0 days 07:00:02,25202.0,2022-02-27 02:31:20
3236,2022-02-27 12:31:24,2022-02-27 15:31:26,0 days 03:00:02,10802.0,2022-02-27 05:31:22
3249,2022-02-27 15:31:50,2022-06-27 15:31:52,120 days 00:00:02,10368002.0,2022-02-27 15:31:48
3250,2022-06-27 15:31:52,2022-06-26 15:31:54,-1 days +00:00:02,-86398.0,2022-02-27 15:31:50
3251,2022-06-26 15:31:54,2022-06-22 15:31:56,-4 days +00:00:02,-345598.0,2022-06-27 15:31:52


## STEP 6: Use plots to help identify where to trim

1. Execute the cell below
2. The generated charts are intended to help identify the indices corresponding to your field test/data of interest
    - Example: a field test on October 11, 2022 from 4PM to 5PM correspond to indices 529-1370
3. Use the first index as `start_index` and the last index as `end_index`
    - Example: 
    `start_index` = 529, `end_index` = 1370
4. Proceed to STEP 7 afterwards

In [61]:
#Run this cell once

###############################################################################
#Just time series for faster performance

from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool, BoxSelectTool, DataTable, TableColumn, CDSView, IndexFilter, DateFormatter, DatetimeTickFormatter, NumeralTickFormatter, CustomJS, Panel, Tabs, LinearAxis, Range1d, Paragraph, DatePicker, Div, BoxAnnotation
from bokeh.layouts import gridplot, column, row
from bokeh.io import output_notebook, curdoc
output_notebook()

#Data
#Column Names
time = "Time (yyyy-MM-dd hh:mm:ss)"
tdseconds = "Time Delta (seconds)"
temp = "Temp (Celsius)"
alt = "Altitude (Meters)"
windspeed = "Wind Speed (m/s)"
rh = "Rel. Hum. (%)"
baro = "Baro. (mb)"
magdir = "Mag. Dir. (Degrees)"

source = ColumnDataSource(data=dict(
    index=df.index, 
    datetime=df[time], 
    timedelta=df[tdseconds], 
    temp=df[temp],
    alt=df[alt],
    windspeed=df[windspeed],
    rh=df[rh],
    baro=df[baro],
    magdir=df[magdir],
    )
)

tempvstime = ColumnDataSource(data=dict(x=[], y=[]))
altvstime = ColumnDataSource(data=dict(x=[], y=[]))
windspeedvstime = ColumnDataSource(data=dict(x=[], y=[]))
rhvstime = ColumnDataSource(data=dict(x=[], y=[]))
barovstime = ColumnDataSource(data=dict(x=[], y=[]))
magdirvstime = ColumnDataSource(data=dict(x=[], y=[]))

trimmedvalues = ColumnDataSource(data=dict(
    index = [],
    time = [],
    temp = [],
    alt = [],
    windspeed = [],
    rh = [],
    baro = [],
    magdir = []
))

sources = [tempvstime, altvstime, windspeedvstime, rhvstime, barovstime, magdirvstime]

#Formatting
datefmt = DateFormatter(format="%F %I:%M:%S %p") #Format API reference: https://docs.bokeh.org/en/latest/docs/reference/models/widgets/tables.html?highlight=datatable#bokeh.models.DataTable
width = 1000
height = 300
hovercolor = "black"
barocolor = "orange"

datetimevsindexhover = HoverTool( #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool
    tooltips=[
        ("Index", "$index"),
        ("Date", "@datetime{%F %I:%M:%S %p}"),
        ("Time Delta", "@{timedelta} seconds")
    ],

    formatters={
        "@datetime" : "datetime",
        #"@y1" : "numeral"
    },
    #mode = "vline"
)

options = dict(x_axis_label = "Row Index", tools=[datetimevsindexhover, "pan, wheel_zoom, xwheel_pan, ywheel_pan, box_select, box_zoom, reset"], plot_width=700, plot_height=300)
links = dict(width=width, height=height, x_axis_type="datetime")
#view = CDSView(source=source, filters=[IndexFilter(x)])
sz = 5

#INITIALIZING PLOTS************************************************************************************************

#Datetime vs. Index
f1 = figure(title = "Datetime vs. Index", y_axis_label = "Date", y_axis_type="datetime", **options)
f1.line("index", "datetime", hover_color="red", source=source)
f1.circle("index", "datetime", size=sz, hover_color="red", source=source, selection_color = "firebrick",) #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/styling/plots.html#selected-and-unselected-glyphs


#Time Delta vs. Index
f2 = figure(title = "Time Delta vs. Index", y_axis_label="Time Delta (s)", y_axis_type="log", x_range=f1.x_range, **options)
#f2.yaxis.formatter = DatetimeTickFormatter(seconds=["%S"])
f2.line("index", "timedelta", hover_color="red", source=source)
f2.circle("index", "timedelta", size=sz, hover_color="red", source=source, selection_color = "firebrick")

columns = [
    TableColumn(field="datetime", title="Datetime", formatter= datefmt), #Reference: https://stackoverflow.com/questions/40942168/how-to-create-a-bokeh-datatable-datetime-formatter
    TableColumn(field="timedelta", title="Change in time (seconds)")
]

dt1 = DataTable(background = "red", source=source, columns=columns)

disclaimer_msg = Paragraph(text="""*Time Delta values equal to zero will not be plotted*""")

#TIME SERIES PLOTS*************************************************************************************************
timegraphs = {
    "Altitude vs. Time": "red", 
    "Wind Speed vs. Time": "magenta", 
}

hover_timeseries = HoverTool(
    tooltips=[
        ("Index", "@index"),
        ("Time", "@time{%F %I:%M:%S %p}"),        
        ("Temperature", "@temp"),  
        ("Altitude", "@alt"),
        ("Windspeed", "@windspeed"),
        ("Relative Humidity", "@rh"),
        ("Barometric Pressure", "@baro"),
        ("Magnetic Direction", "@magdir"),  
    ],

    formatters={
        "@time" : "datetime",
    },
    #mode = "vline"
)

#TIME SERIES PLOTS SEPARATE******************************************************************************8
timeylabels = [alt, windspeed]
timeysourceskeys = ["alt", "windspeed"]
time_series_options = dict(tools=[hover_timeseries, "pan, wheel_zoom, box_select, tap, reset"], plot_width=700, plot_height=300)
p1, p2 = figure(), figure()
timefigures = [p1, p2]

for f, g, l, key in zip(timefigures, timegraphs, timeylabels, timeysourceskeys):
    i = timefigures.index(f)
    f = figure(title=g, x_range=timefigures[0].x_range, x_axis_label = "Time", y_axis_label=l, x_axis_type = "datetime", **time_series_options)
    f.title.text_color = timegraphs[g]
    f.yaxis.axis_label_text_color = timegraphs[g]
    f.yaxis.major_label_text_color = timegraphs[g]
    f.yaxis.axis_line_color = timegraphs[g]
    f.xaxis.formatter=DatetimeTickFormatter(
        hours="%I:%M:%S %p",
        minutes="%I:%M:%S %p")
    #f.background_fill_color = (204, 255, 255)
    timefigures[i] = f
    f.line("time", key, color=timegraphs[g], hover_color=hovercolor, source=trimmedvalues)
    f.circle("time", key, color=timegraphs[g], hover_color=hovercolor, source=trimmedvalues)
    
tab1 = Panel(child=column(timefigures[0:2]), title="Time Series Plots")


#RANGE INDICATOR*********************************************************************
ranges = Paragraph(text="""SELECTED INDICES: """)

#FIELD TEST INFO FOR PLOTS****************************************************************
fieldtestinfo = Div(text=
f"""
<p>FIELD TEST: <b>{fieldTestParameters["Field Test Label"]}</b></p>
<p>LOCATION: <b>{fieldTestParameters["Field Test Location"]}</b></p>
<p>DATE: <b>{fieldTestParameters["Field Test Date"]}</b></p>
<p>DEVICE: <b>{fieldTestParameters["Device Nickname"]}</b></p>
"""
)

#TODO DATE PICKER*************************************************************************
#Filters to first entry with selected date
"""
start_date = df[date].min().date()
end_date = df[date].max().date()

date_picker = DatePicker(title="Select Date of Field Test", value=start_date, min_date=start_date, max_date=end_date)"""

#PLOT GENERATION**************************************************************************************************************************************************************
#Reference for array performance https://github.com/bokeh/bokeh/blob/main/examples/interaction/js_callbacks/js_on_change.py
source.selected.js_on_change("indices", CustomJS(args=dict(
    origin=source, 
    trimmedvalues=trimmedvalues,
    ranges=ranges
), 
code="""
    const inds = cb_obj.indices; //Gets unsorted if you do a shift click selection in datatable
    console.log("INDS: " + inds)

    const d1 = origin.data;
    const d2 = trimmedvalues.data;

    const cols = ["temp", "alt", "windspeed", "rh", "baro", "magdir"];

    inds.sort(function(a, b){return a - b});

    //To clear for every box select
    d2["time"] = [];
    d2["index"] = [];

    for (let x in cols)
    {
        d2[cols[x]] = []
    }

    //Generate the plots
    for (let i = 0; i < inds.length; i++) 
    {
        d2["time"].push(d1["datetime"][inds[i]]);
        
        d2["index"].push(inds[i]);

        for (let x in cols)
        {
            const label = cols[x]
            d2[label].push(d1[label][inds[i]]);
        } 
    }

    //Display the range selection
    ranges.text = "SELECTED INDICES: " + inds[0] + " - " + inds[inds.length-1]

    //Refresh
    trimmedvalues.change.emit()

"""
    )
)

#ORGANIZING PLOTS INTO TABS********************************************
#tab1 = Panel(child=f3, title="Temp")
#Displaying the data
layout1 = row(column(children=[f1, f2, disclaimer_msg]), column(children=[dt1, ranges]))
layout2 = row(Tabs(tabs=[tab1]), fieldtestinfo)

show(column(layout1, layout2))

# TRIM THE DATA

## STEP 7: Manually input the index range of interest (also indicated by the SELECTED RANGES above)

In [24]:
start_index = 4200
end_index = 6100

#############################################################################

df_indices = df.index

if start_index not in df_indices:
    print(f"Specified start index, {start_index}, is not in the index range of {df_indices.start} and {df_indices.stop}")
    
elif end_index not in df_indices:
    print(f"Specified end index, {end_index}, is not in the index range of {df_indices.start} and {df_indices.stop}")

elif end_index < start_index:
    print(f"Specified END index, {end_index}, is less than the specified START index, {start_index}")

else:
    trim_date_start = df[date][start_index].strftime("%A, %B %d, %Y, %I:%M:%S %p")
    trim_date_end = df[date][end_index].strftime("%A, %B %d, %Y, %I:%M:%S %p")
    print(f"TRIMMING FROM INDEX {start_index} to INDEX {end_index}")
    print("-"*50, trim_date_start, "to", trim_date_end,"-"*50, sep="\n")
    df_trim = df.loc[start_index:end_index]
    print(f"Review the data and proceed to STEP 8")
    display(df_trim)
    
    #TODO PUT INTO A FUNCTION
    trimmedvalues2 = ColumnDataSource(data=dict(
        index = df_trim.index,
        time = df_trim[date],
        alt = df_trim[alt],
        windspeed = df_trim[windspeed],
        temp=df_trim[temp],
        rh=df_trim[rh],
        baro=df_trim[baro],
        magdir=df_trim[magdir],
    ))

    timeylabels = [alt, windspeed]
    timeysourceskeys = ["alt", "windspeed"]
    time_series_options = dict(tools=[hover_timeseries, "pan, wheel_zoom, box_select, tap, reset"], plot_width=700, plot_height=300)
    p1, p2 = figure(), figure()
    timefigures = [p1, p2]

    for f, g, l, key in zip(timefigures, timegraphs, timeylabels, timeysourceskeys):
        i = timefigures.index(f)
        f = figure(title=g, x_range=timefigures[0].x_range, x_axis_label = "Time", y_axis_label=l, x_axis_type = "datetime", **time_series_options)
        f.title.text_color = timegraphs[g]
        f.yaxis.axis_label_text_color = timegraphs[g]
        f.yaxis.major_label_text_color = timegraphs[g]
        f.yaxis.axis_line_color = timegraphs[g]
        f.xaxis.formatter=DatetimeTickFormatter(
            hours="%I:%M:%S %p",
            minutes="%I:%M:%S %p")
        #f.background_fill_color = (204, 255, 255)
        timefigures[i] = f
        f.line("time", key, color=timegraphs[g], hover_color=hovercolor, source=trimmedvalues2)
        f.circle("time", key, color=timegraphs[g], hover_color=hovercolor, source=trimmedvalues2)

    show(column(timefigures[0:2]))

TRIMMING FROM INDEX 4200 to INDEX 6100
--------------------------------------------------
Tuesday, June 21, 2022, 04:42:56 PM
to
Tuesday, June 21, 2022, 05:46:16 PM
--------------------------------------------------
Review the data and proceed to STEP 8


Unnamed: 0,Time (yyyy-MM-dd hh:mm:ss),Time Delta,Time Delta (seconds),Temp (Celsius),Wet Bulb Temp. (Celsius),Rel. Hum. (%),Baro. (mb),Altitude (Meters),Wind Speed (m/s),Mag. Dir. (Degrees),True Dir. (Degrees)
4200,2022-06-21 16:42:56,0 days 00:00:02,2.0,28.6,20.0,45.3,1020.0,-58,0.4,23,23
4201,2022-06-21 16:42:58,0 days 00:00:02,2.0,28.6,20.0,45.4,1020.0,-58,0.4,23,23
4202,2022-06-21 16:43:00,0 days 00:00:02,2.0,28.6,20.0,45.4,1020.0,-60,0.0,23,24
4203,2022-06-21 16:43:02,0 days 00:00:02,2.0,28.6,20.0,45.5,1020.0,-58,0.0,23,24
4204,2022-06-21 16:43:04,0 days 00:00:02,2.0,28.7,20.0,45.5,1020.0,-58,0.3,24,24
...,...,...,...,...,...,...,...,...,...,...,...
6096,2022-06-21 17:46:08,0 days 00:00:02,2.0,28.9,19.6,42.4,1019.5,-53,0.0,349,349
6097,2022-06-21 17:46:10,0 days 00:00:02,2.0,28.9,19.6,42.3,1019.5,-53,0.0,349,350
6098,2022-06-21 17:46:12,0 days 00:00:02,2.0,28.9,19.6,42.3,1019.5,-53,0.0,349,350
6099,2022-06-21 17:46:14,0 days 00:00:02,2.0,28.9,19.6,42.3,1019.4,-53,0.0,349,350


## STEP 8: Save the trimmed data as a `.csv` and `.xlsx`
- Input the desired file name to `trimmed_file_name`

In [25]:
# Trimmed data corresponds to just the field test

#Name your trimmed file
trimmed_file_name = "JUNE 21 2022 CBEC TRIM"

###############################################################################

#Clear/refresh the change history in case this cell is rerun to prevent redundant information being appended
change_history = change_history[:ch_bound_1]
change_history.append(f"Data was trimmed from {trim_date_start} to {trim_date_end}")
#change_history.append(f"Data was trimmed from indices {start_index + 2} and {end_index + 2}") #Plus two for excel indexing
 
#Get rid of the Time Delta column
trimmed_file = df_trim.drop(columns="Time Delta").reset_index(drop=True)

#Rename the Time Delta (seconds) column to Sampling Interval
trimmed_file.rename(columns={"Time Delta (seconds)": "Sampling Interval (seconds)"}, inplace = True)
change_history.append("Time Delta (seconds) was renamed to Sampling Interval (seconds)")

#Create elapsed time column (Reference: https://chris35wills.github.io/time_elapsed_pandas/)
time_position = trimmed_file.columns.get_loc(time)
elapsed_time = trimmed_file.iloc[:,time_position] - trimmed_file.iloc[0,time_position]
trimmed_file.insert(1, "Elapsed Time (seconds)", elapsed_time.dt.total_seconds(), allow_duplicates=True)                 
change_history.append("Elapsed Time (seconds) column was added")

prologuepd = pd.Series(prologue)
changehistory = pd.Series(change_history)

df_trim_path_csv = cwd + "/" + trimmedDataFolderName + "/" + trimmed_file_name + ".csv"
df_trim_path_excel = cwd + "/" + trimmedDataFolderName + "/" + trimmed_file_name + ".xlsx"

try:
    print(f"Saving {trimmed_file_name}.csv")
    trimmed_file.to_csv(df_trim_path_csv, index=False)
    print(f"{trimmed_file_name}.csv was saved to {df_trim_path_csv}")
except Exception as e:
    print(e)
    print(f"Is {trimmed_file_name}.csv currently open on your computer?")

try:
    print(f"\nSaving {trimmed_file_name}.xlsx")
    with pd.ExcelWriter(df_trim_path_excel) as writer:
        prologuepd.to_excel(writer, sheet_name="Kestrel Info", index = False, header = False)
        changehistory.to_excel(writer, sheet_name="Data Analysis Record", index = False, header = False)
        trimmed_file.to_excel(writer, sheet_name="Field Test Data", index = False)
    print(f"{trimmed_file_name}.xlsx was saved to {df_trim_path_excel}")
except Exception as e:
    print(e)
    print(f"Is {trimmed_file_name}.xlsx currently open on your computer?")
    
ch_bound_2 = len(change_history)    

Saving JUNE 21 2022 CBEC TRIM.csv
JUNE 21 2022 CBEC TRIM.csv was saved to D:\Users\firen\Documents\Drexel\Edgley Nov. 5 2022\Data-Analysis-Package-for-Kestrel-5500/trimmed_data/JUNE 21 2022 CBEC TRIM.csv

Saving JUNE 21 2022 CBEC TRIM.xlsx
JUNE 21 2022 CBEC TRIM.xlsx was saved to D:\Users\firen\Documents\Drexel\Edgley Nov. 5 2022\Data-Analysis-Package-for-Kestrel-5500/trimmed_data/JUNE 21 2022 CBEC TRIM.xlsx


# BASELINING THE FIELD TEST DATA

## Convert barometric altitude to Altitude Above Ground (AOG)
- Hover over the plot to help identify the indices corresponding to the desired baseline values

In [10]:
#Run this cell once

###############################################################################
baseline = ColumnDataSource(data=dict(
    index = trimmed_file.index, 
    time = trimmed_file[time], 
    alt = trimmed_file[alt]
    )
)

altvstimehover = HoverTool( #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool
    tooltips=[
        ("Index", "$index"),
        ("Date", "@time{%F %I:%M:%S %p}"),
    ],

    formatters={
        "@time" : "datetime",
        #"@y1" : "numeral"
    },
    mode = "vline"
)

options = dict(x_axis_label = "Time", tools=[altvstimehover, "pan, wheel_zoom, box_select, tap, reset"], plot_width=700, plot_height=400)

f = figure(title = "Altitude vs. Time", y_axis_label = alt, **options)
f.xaxis.formatter = DatetimeTickFormatter(
    seconds=["%I:%M:%S %p"],
    minutes=["%I:%M:%S %p"],
    hours=["%I:%M:%S %p"]
)
f.line("time", "alt", source=baseline)
f.circle("time", "alt", source=baseline, size = 5)

ranges = Paragraph(text="""SELECTED INDICES: """)

####################################################################
#TODO
baseline.selected.js_on_change("indices", CustomJS(args=dict(ranges=ranges),
code="""
    
    const inds = cb_obj.indices; //Gets unsorted if you do a shift click selection in datatable
    console.log("INDS: " + inds)
    //If condition necessary to optimize performance (so the code doesn't run for any accidental selections)
        
    //Display the range selection
    ranges.text = "SELECTED INDICES: " + inds[0] + " - " + inds[inds.length-1]

"""
    )
)                             
####################################################################
show(column(f, ranges))


## STEP 9: Baseline the data
- Input the index ranges corresponding to the altitude values to be used to baseline

***Example 1***

`baseline_ranges = [[0,40], [489,520]]`
- The altitude values from indices 0-40 and 489-520 will be used to baseline the altitude

***Example 2***

`baseline_ranges = [[100,250]]`
- The altitude values from indices 100-250 will be used to baseline the altitude

***Afterwards,*** uncomment the desired baseline procedure to be used
- If using the `CONSTANT` baseline method, enter in the desired constant value for `baseline_val_constant`

In [29]:
import numpy as np
from sklearn import datasets, linear_model

#####################################################################

#Input the index ranges here
baseline_ranges = [[0,949], [1386, 1846]]

#Uncomment the baseline method to be used
#baseline_method = "CONSTANT"
#baseline_method = "AVERAGE"
baseline_method = "LINEAR"

baseline_val_constant = -55

######################################################################\
change_history = change_history[:ch_bound_2] #Refresh the change_history

def baseline(indices, data = trimmed_file, values = trimmed_file[alt], method = "LINEAR", baseline_val = 0.):
    validmethods = ["LINEAR", "CONSTANT", "AVERAGE"]
    method = method.upper()
    if method in validmethods:

        time_series = data["Elapsed Time (seconds)"].astype("int") #Elapsed Time is of type float and therefore can't be combined with a boolean operation
        time_filter = time_series & False
        values_series = values

        for ranges in indices:
            start = ranges[0]
            end = ranges[1]
            baseline_starttime = data.loc[ranges[0], time].strftime("%I:%M:%S %p")
            baseline_endtime = data.loc[ranges[1], time].strftime("%I:%M:%S %p") 
            print(f"Baselining from {baseline_starttime} at index {start} to {baseline_endtime} at index {end} using baselining method: {method}")
            change_history.append(f"Data baselined from {baseline_starttime} to {baseline_endtime}")
            tm.sleep(delay_time)
            time_filter = time_filter | (time_series[start:end] | True)
            
        time_baseline = time_series[time_filter].values
        
        values_baseline = values_series[time_filter].values

        time_baseline = time_baseline.reshape(len(time_baseline), 1)
        values_baseline = values_baseline.reshape(len(values_baseline), 1)

        if method == "AVERAGE":
            print(f"\nPerforming {method} baseline procedure")
            tm.sleep(delay_time)
            baseline_avg = np.average(values_baseline)
            print(f"Baseline average: {baseline_avg}")
            baseline_array = np.full((len(time_series), 1), baseline_avg)
            change_history.append(f"Baseline procedure used: {method}. Baseline average: {baseline_avg}")

        elif method == "CONSTANT":
            print(f"\nPerforming {method} baseline procedure")
            print(f"Baseline constant used: {baseline_val_constant}")
            tm.sleep(delay_time)
            baseline_array = np.full((len(time_series), 1), baseline_val_constant)
            change_history.append(f"Baseline procedure used: {method}. Baseline constant used: {baseline_val_constant}")
        
        elif method == "LINEAR":
            change_history.append(f"Baseline procedure used: {method}")
            print(f"\nPerforming {method} baseline procedure")
            
            tm.sleep(delay_time)
            regr = linear_model.LinearRegression()
            regr.fit(time_baseline, values_baseline)
            time_array = time_series.values
            time_array = time_array.reshape(len(time_array),1)
            baseline_array = regr.predict(time_array)
            
            #Slope
            print("Slope =", regr.coef_)
            
            #Intercept
            print("Intercept =", regr.intercept_)
            
            #R^2
            r2 = regr.score(time_baseline, values_baseline)
            print("R^2 =", r2)  

        print(f"\n{method} baseline procedure completed successfully")
        baseline_est = pd.Series(baseline_array[:,0], name = "Altitude Baseline (Meters)")
        print("\nEstimated baseline values (Meters)")
        print(baseline_est)

        #Obtain the baselined values
        values_above_baseline = values_series - baseline_est
        values_above_baseline.rename("AOG (Meters)", inplace=True)
        print("\nAltitude Above Ground values (Meters)")
        print(values_above_baseline)

        return(baseline_est, values_above_baseline)
    else:
        print(f"'{method}' is an invalid method for baseline estimation")
        print("Valid methods are: LINEAR, AVERAGE, CONSTANT")

#Check for valid index range (reference: https://datascienceparichay.com/article/python-flatten-a-list-of-lists-to-a-single-list/)
validranges = [index for sublist in baseline_ranges for index in sublist]
validrange = True
for i in validranges:
    if i not in trimmed_file.index:
        validrange = False
        
if validrange:
    baseline_series, AOG_series = baseline(baseline_ranges, data = trimmed_file, values = trimmed_file[alt], method = baseline_method, baseline_val = baseline_val_constant)
    
    trimmed_file_baselined = trimmed_file.join([baseline_series, AOG_series])

    #Reorder columns
    trimmed_file_baselined = trimmed_file_baselined[[
    'Time (yyyy-MM-dd hh:mm:ss)',
    'Elapsed Time (seconds)',
    'Sampling Interval (seconds)',
    'Temp (Celsius)',
    'Wet Bulb Temp. (Celsius)',
    'Rel. Hum. (%)', 
    'Baro. (mb)',
    'Altitude (Meters)',
    baseline_series.name,
    AOG_series.name,
    'Wind Speed (m/s)',
    'Mag. Dir. (Degrees)',
    'True Dir. (Degrees)',
    ]]  
    
#def review_baseline():##################################################################################################
    dot_size = 0.5    
    
    altvstimehover = HoverTool( #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool
        tooltips=[
            ("Index", "$index"),
            ("Date", "@time{%F %I:%M:%S %p}"),
            ("Altitude", "@alt meters")
        ],

        formatters={
            "@time" : "datetime",
            #"@y1" : "numeral"
        },
        mode = "vline"
    )

    source = ColumnDataSource(data=dict(
        index = trimmed_file_baselined.index, 
        time = trimmed_file_baselined[time],
        alt = trimmed_file_baselined[alt],
        ab = trimmed_file_baselined[baseline_series.name],
        aog = trimmed_file_baselined[AOG_series.name]
        )
    )

    options = dict(x_axis_label = "Time", tools=[altvstimehover, "pan, wheel_zoom, box_zoom, reset"], plot_width=600, plot_height=400)
    ##############################################################################
    #Altitude Baseline Plot
    ab = figure(title="Altitude Baseline (shaded areas indicate values used as baseline)", y_axis_label = alt, **options)
    ab.xaxis.formatter = DatetimeTickFormatter(
        seconds=["%I:%M:%S %p"],
        minutes=["%I:%M:%S %p"],
        hours=["%I:%M:%S %p"]
    )
    ab.line("time", "alt", source=source, color="orange", legend_label = "Barometric Altitude")
    ab.circle("time", "alt", source=source, size = dot_size, color="orange")
    
    if baseline_method == "LINEAR":
        ab.line("time", "ab", source=source, color="green", line_width=2, legend_label = "Baseline Altitude")

    #Highlight the selected ranges
    for period in baseline_ranges:
        leftbound = trimmed_file_baselined.loc[period[0], time]
        rightbound = trimmed_file_baselined.loc[period[1], time]
        baseline_box = BoxAnnotation(left=leftbound, right=rightbound, fill_alpha=0.2, fill_color="green")
        ab.add_layout(baseline_box)

    ##############################################################################
    #Altitude Above Ground Plot
    aog = figure(title = "Altitude Above Ground", x_range = ab.x_range, y_axis_label = alt, **options)
    aog.xaxis.formatter = DatetimeTickFormatter(
        seconds=["%I:%M:%S %p"],
        minutes=["%I:%M:%S %p"],
        hours=["%I:%M:%S %p"]
    )
    aog.line("time", "aog", source=source)
    aog.circle("time", "aog", source=source, size = dot_size)

    show(column(ab, aog))

    display(trimmed_file_baselined)
###########################################################################################################################
else:
    print(f"Specified baseline ranges,{validranges}, do not fall within {trimmed_file.index.start} and {trimmed_file.index.stop}")

Baselining from 04:42:56 PM at index 0 to 05:14:34 PM at index 949 using baselining method: LINEAR
Baselining from 05:29:08 PM at index 1386 to 05:44:28 PM at index 1846 using baselining method: LINEAR

Performing LINEAR baseline procedure
Slope = [[0.00189519]]
Intercept = [-60.03957726]
R^2 = 0.8071029402507393

LINEAR baseline procedure completed successfully

Estimated baseline values (Meters)
0      -60.039577
1      -60.035787
2      -60.031996
3      -60.028206
4      -60.024416
          ...    
1896   -52.853004
1897   -52.849214
1898   -52.845424
1899   -52.841633
1900   -52.837843
Name: Altitude Baseline (Meters), Length: 1901, dtype: float64

Altitude Above Ground values (Meters)
0       2.039577
1       2.035787
2       0.031996
3       2.028206
4       2.024416
          ...   
1896   -0.146996
1897   -0.150786
1898   -0.154576
1899   -0.158367
1900   -0.162157
Name: AOG (Meters), Length: 1901, dtype: float64


Unnamed: 0,Time (yyyy-MM-dd hh:mm:ss),Elapsed Time (seconds),Sampling Interval (seconds),Temp (Celsius),Wet Bulb Temp. (Celsius),Rel. Hum. (%),Baro. (mb),Altitude (Meters),Altitude Baseline (Meters),AOG (Meters),Wind Speed (m/s),Mag. Dir. (Degrees),True Dir. (Degrees)
0,2022-06-21 16:42:56,0.0,2.0,28.6,20.0,45.3,1020.0,-58,-60.039577,2.039577,0.4,23,23
1,2022-06-21 16:42:58,2.0,2.0,28.6,20.0,45.4,1020.0,-58,-60.035787,2.035787,0.4,23,23
2,2022-06-21 16:43:00,4.0,2.0,28.6,20.0,45.4,1020.0,-60,-60.031996,0.031996,0.0,23,24
3,2022-06-21 16:43:02,6.0,2.0,28.6,20.0,45.5,1020.0,-58,-60.028206,2.028206,0.0,23,24
4,2022-06-21 16:43:04,8.0,2.0,28.7,20.0,45.5,1020.0,-58,-60.024416,2.024416,0.3,24,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1896,2022-06-21 17:46:08,3792.0,2.0,28.9,19.6,42.4,1019.5,-53,-52.853004,-0.146996,0.0,349,349
1897,2022-06-21 17:46:10,3794.0,2.0,28.9,19.6,42.3,1019.5,-53,-52.849214,-0.150786,0.0,349,350
1898,2022-06-21 17:46:12,3796.0,2.0,28.9,19.6,42.3,1019.5,-53,-52.845424,-0.154576,0.0,349,350
1899,2022-06-21 17:46:14,3798.0,2.0,28.9,19.6,42.3,1019.4,-53,-52.841633,-0.158367,0.0,349,350


## STEP 10: Review the baselined data

In [30]:
dot_size = 0.5
    
altvstimehover = HoverTool( #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool
    tooltips=[
        ("Index", "$index"),
        ("Date", "@time{%F %I:%M:%S %p}"),
        ("Altitude", "@alt meters")
    ],

    formatters={
        "@time" : "datetime",
        #"@y1" : "numeral"
    },
    mode = "vline"
)

source = ColumnDataSource(data=dict(
    index = trimmed_file_baselined.index, 
    time = trimmed_file_baselined[time],
    alt = trimmed_file_baselined[alt],
    ab = trimmed_file_baselined[baseline_series.name],
    aog = trimmed_file_baselined[AOG_series.name]
    )
)

options = dict(x_axis_label = "Time", tools=[altvstimehover, "pan, wheel_zoom, box_zoom, reset"], plot_width=600, plot_height=400)
##############################################################################
#Altitude Baseline Plot
ab = figure(title="Altitude Baseline (shaded areas indicate values used as baseline)", y_axis_label = alt, **options)
ab.xaxis.formatter = DatetimeTickFormatter(
    seconds=["%I:%M:%S %p"],
    minutes=["%I:%M:%S %p"],
    hours=["%I:%M:%S %p"]
)
ab.line("time", "alt", source=source, color="orange", legend_label = "Barometric Altitude")
ab.circle("time", "alt", source=source, size = dot_size, color="orange")
ab.line("time", "ab", source=source, color="green", line_width=2, legend_label = "Baseline Altitude")


for period in baseline_ranges:
    leftbound = trimmed_file_baselined.loc[period[0], time]
    rightbound = trimmed_file_baselined.loc[period[1], time]
    baseline_box = BoxAnnotation(left=leftbound, right=rightbound, fill_alpha=0.2, fill_color="green")
    ab.add_layout(baseline_box)

##############################################################################
#Altitude Above Ground Plot
aog = figure(title = "Altitude Above Ground", x_range = ab.x_range, y_axis_label = alt, **options)
aog.xaxis.formatter = DatetimeTickFormatter(
    seconds=["%I:%M:%S %p"],
    minutes=["%I:%M:%S %p"],
    hours=["%I:%M:%S %p"]
)
aog.line("time", "aog", source=source)
aog.circle("time", "aog", source=source, size = dot_size)

show(column(ab, aog))

#display(trimmed_file_baselined)


## STEP 11: Save the preprocessed data to a `.csv` and `.xlsx`
- ***Repeat STEPS 9 and 10*** until satisfied with the baselined values

Input the desired file name for the preprocessed data to `preprocessed_data_name`

In [33]:
# Preprocessed data corresponds to just the field test with baselined altitude values

#Name your preprocessed data file
preprocessed_data_name = "TEST PREPROCESSED"

############################################################################### 

changehistory = pd.Series(change_history)

preprocessed_data_path_csv = cwd + "/" + preprocessedDataFolderName + "/" + preprocessed_data_name + ".csv"
preprocessed_data_path_excel = cwd + "/" + preprocessedDataFolderName + "/" + preprocessed_data_name + ".xlsx"

try:
    print(f"Saving {preprocessed_data_name}.csv")
    trimmed_file_baselined.to_csv(preprocessed_data_path_csv, index=False)
    print(f"{preprocessed_data_name}.csv was saved to {preprocessed_data_path_csv}")
except Exception as e:
    print(e)
    print(f"Is {preprocessed_data_name}.csv currently open on your computer?")

try:
    print(f"\nSaving {preprocessed_data_name}.xlsx")
    with pd.ExcelWriter(preprocessed_data_path_excel) as writer:
        prologuepd.to_excel(writer, sheet_name="Kestrel Info", index = False, header = False)
        changehistory.to_excel(writer, sheet_name="Data Analysis Record", index = False, header = False)
        trimmed_file_baselined.to_excel(writer, sheet_name="Field Test Data with Baseline", index = False)
    print(f"{preprocessed_data_name}.xlsx was saved to {preprocessed_data_path_excel}")
except Exception as e:
    print(e)
    print(f"Is {preprocessed_data_name}.xlsx currently open on your computer?")

Saving TEST PREPROCESSED.csv
TEST PREPROCESSED.csv was saved to D:\Users\firen\Documents\Drexel\Edgley Nov. 5 2022\Data-Analysis-Package-for-Kestrel-5500/preprocessed_data/TEST PREPROCESSED.csv

Saving TEST PREPROCESSED.xlsx
TEST PREPROCESSED.xlsx was saved to D:\Users\firen\Documents\Drexel\Edgley Nov. 5 2022\Data-Analysis-Package-for-Kestrel-5500/preprocessed_data/TEST PREPROCESSED.xlsx


***END OF PREPROCESSING STAGE***
***

# Generate standardized set of plots

In [56]:
from bokeh.io import output_file
output_file("PLOTS - CBEC_JUNE 21 2022.html")

df = trimmed_file_baselined
#Data
#Column Names
datetime = standardized_units["time"]
temp = standardized_units["temp"]
wetbulbtemp = standardized_units["wetbulbtemp"]
alt = standardized_units["alt_baselined"]
windspeed = standardized_units["windspeed"]
rh = standardized_units["rh"]
baro = standardized_units["baro"]
magdir = standardized_units["magdir"]

windspeed_min = df[windspeed].min()
windspeed_max = df[windspeed].max()

alt_min = df[alt].min()
alt_max = df[alt].max()

baro_min = df[baro].min()
baro_max = df[baro].max()

magdir_min = 0
magdir_max = 360

rh_min = df[rh].min()
rh_max = df[rh].max()

temp_min = df[temp].min()
temp_max = df[temp].max()

wetbulbtemp_min = df[wetbulbtemp].min()
wetbulbtemp_max = df[wetbulbtemp].max()

source = ColumnDataSource(data=dict(
    index=df.index, 
    time=df[datetime], 
    temp=df[temp],
    wetbulbtemp=df[wetbulbtemp],
    alt=df[alt],
    windspeed=df[windspeed],
    rh=df[rh],
    baro=df[baro],
    magdir=df[magdir],
    )
)

#Formatting
datefmt = DateFormatter(format="%F %I:%M:%S %p") #Format API reference: https://docs.bokeh.org/en/latest/docs/reference/models/widgets/tables.html?highlight=datatable#bokeh.models.DataTable
width = 1000
height = 300

tempcolor = "black"
wetbulbtempcolor = "gray"
altcolor = "navy"
windspeedcolor = "magenta"
rhcolor = "red"
barocolor = "orange"
magdircolor = "firebrick"


#view = CDSView(source=source, filters=[IndexFilter(x)])
hovercolor = "black"
sz = 3

#FIELD TEST INFO FOR PLOTS****************************************************************
fieldtestinfo = Div(text=
f"""
<p>FIELD TEST: <b>{fieldTestParameters["Field Test Label"]}</b></p>
<p>LOCATION: <b>{fieldTestParameters["Field Test Location"]}</b></p>
<p>DATE: <b>{fieldTestParameters["Field Test Date"]}</b></p>
<p>DEVICE: <b>{fieldTestParameters["Device Nickname"]}</b></p>
"""
)

#TIME SERIES PLOTS*************************************************************************************************
timegraphs = {
    "Temperature vs. Time": tempcolor,
    "Wet Bulb Temp.": wetbulbtempcolor,
    "Altitude vs. Time": altcolor, 
    "Wind Speed vs. Time": windspeedcolor, 
    "Relative Humidity vs. Time": rhcolor, 
    "Barometric Pressure vs. Time": barocolor, 
    "Magnetic Direction vs. Time": magdircolor
}

hover_timeseries = HoverTool(
    tooltips=[
        ("Index", "@index"),
        ("Time", "@time{%F %I:%M:%S %p}"),        
        ("Temperature", "@temp"),
        ("Wet Bulb Temp.", "@wetbulbtemp"),
        ("Altitude", "@alt"),
        ("Windspeed", "@windspeed"),
        ("Relative Humidity", "@rh"),
        ("Barometric Pressure", "@baro{0.000}"),
        ("Magnetic Direction", "@magdir"),  
    ],

    formatters={
        "@time" : "datetime",
    },
    #mode = "vline"
)

#TIME SERIES PLOTS SEPARATE******************************************************************************8
timeylabels = [temp, wetbulbtemp, alt, windspeed, rh, baro, magdir]
timeysourceskeys = ["temp", "wetbulbtemp", "alt", "windspeed", "rh", "baro", "magdir"]
time_series_options = dict(tools=[hover_timeseries, "pan, wheel_zoom, box_select, tap, reset"], plot_width=700, plot_height=300)
p1, p2, p3, p4, p5, p6, p7 = figure(), figure(), figure(), figure(), figure(), figure(), figure()
timefigures = [p1, p2, p3, p4, p5, p6, p7]

for f, g, l, key in zip(timefigures, timegraphs, timeylabels, timeysourceskeys):
    i = timefigures.index(f)
    f = figure(title=g, x_range=timefigures[0].x_range, x_axis_label = "Time", y_axis_label=l, x_axis_type = "datetime", **time_series_options)
    f.title.text_color = timegraphs[g]
    f.yaxis.axis_label_text_color = timegraphs[g]
    f.yaxis.major_label_text_color = timegraphs[g]
    f.yaxis.axis_line_color = timegraphs[g]
    f.xaxis.formatter=DatetimeTickFormatter(
        hours="%I:%M:%S %p",
        minutes="%I:%M:%S %p")
    #f.background_fill_color = (204, 255, 255)
    timefigures[i] = f
    f.line("time", key, color=timegraphs[g], hover_color=hovercolor, source=source)
    f.circle("time", key, color=timegraphs[g], size = sz, hover_color=hovercolor, source=source)

tab1 = Panel(child=row(column(timefigures[0:8]), fieldtestinfo), title="Time Series Plots")


#TIME SERIES PLOTS SUPERIMPOSED***************************************************************************************************************************************
superimposed1_options = dict(tools=[hover_timeseries, "pan, wheel_zoom, box_select, tap, reset"], plot_width=1000, plot_height=700)
superimposed1labels = [temp, windspeed]
superimposed1colors = [tempcolor, windspeedcolor]
superimposed1keys = ["temp", "windspeed"]
superimposed1 = figure(y_range = (windspeed_min,windspeed_max), x_axis_label = "Time", x_axis_type="datetime", **superimposed1_options)
superimposed1.xaxis.formatter=DatetimeTickFormatter(
    hours="%I:%M:%S %p",
    minutes="%I:%M:%S %p"
)

for key, l, c in zip(superimposed1keys, superimposed1labels, superimposed1colors):
    superimposed1.line("time", key, color=c, hover_color="red", source=source, legend_label = l)
    superimposed1.circle("time", key, color=c, hover_color="red", source=source, legend_label = l)

superimposed1.extra_y_ranges = {
    "baro": Range1d(start=baro_min, end=baro_max), 
    "alt": Range1d(start=alt_min, end=alt_max), 
    "magdir": Range1d(start=magdir_min, end=magdir_max),
    "temp": Range1d(start=temp_min, end=temp_max),
    "wetbulbtemp": Range1d(start=wetbulbtemp_min, end=wetbulbtemp_max),
    "rh": Range1d(start=rh_min, end=rh_max)

}

#Wet Bulb Temperature
superimposed1.line("time", "wetbulbtemp", y_range_name="wetbulbtemp", color=wetbulbtempcolor, hover_color=hovercolor, source=source, legend_label = wetbulbtemp)
superimposed1.circle("time", "wetbulbtemp", y_range_name="wetbulbtemp", color=wetbulbtempcolor, hover_color=hovercolor, source=source, legend_label = wetbulbtemp)
#superimposed1.add_layout(LinearAxis(y_range_name="wetbulbtemp", axis_label = wetbulbtemp, major_label_text_color = wetbulbtempcolor, axis_label_text_color = wetbulbtempcolor, axis_line_color=wetbulbtempcolor), "left")

#Baro
superimposed1.line("time", "baro", y_range_name="baro", color="orange", hover_color=hovercolor, source=source, legend_label = baro)
superimposed1.circle("time", "baro", y_range_name="baro", color="orange", hover_color=hovercolor, source=source, legend_label = baro)
superimposed1.add_layout(LinearAxis(y_range_name="baro", axis_label = baro, major_label_text_color = barocolor, axis_label_text_color = barocolor, axis_line_color=barocolor), "right")

#Mag Dir
superimposed1.line("time", "magdir", y_range_name="magdir", color=magdircolor, hover_color=hovercolor, source=source, legend_label = magdir)
superimposed1.circle("time", "magdir", y_range_name="magdir", color=magdircolor, hover_color=hovercolor, source=source, legend_label = magdir)
superimposed1.add_layout(LinearAxis(y_range_name="magdir", axis_label = magdir, major_label_text_color = magdircolor, axis_label_text_color = magdircolor, axis_line_color=magdircolor), "right")

#Temperature
superimposed1.line("time", "temp", y_range_name="temp", color=tempcolor, hover_color=hovercolor, source=source, legend_label = temp)
superimposed1.circle("time", "temp", y_range_name="temp", color=tempcolor, hover_color=hovercolor, source=source, legend_label = temp)
superimposed1.add_layout(LinearAxis(y_range_name="temp", axis_label = temp, major_label_text_color = tempcolor, axis_label_text_color = tempcolor, axis_line_color=tempcolor), "left")

#Altitude
superimposed1.line("time", "alt", y_range_name="alt", color=altcolor, hover_color=hovercolor, source=source, legend_label = alt)
superimposed1.circle("time", "alt", y_range_name="alt", color=altcolor, hover_color=hovercolor, source=source, legend_label = alt)
superimposed1.add_layout(LinearAxis(y_range_name="alt", axis_label = alt, major_label_text_color = altcolor, axis_label_text_color = altcolor, axis_line_color=altcolor), "left")

#Relative Humidity
superimposed1.line("time", "rh", y_range_name="rh", color=rhcolor, hover_color=hovercolor, source=source, legend_label = rh)
superimposed1.circle("time", "rh", y_range_name="rh", color=rhcolor, hover_color=hovercolor, source=source, legend_label = rh)
superimposed1.add_layout(LinearAxis(y_range_name="rh", axis_label = rh, major_label_text_color = rhcolor, axis_label_text_color = rhcolor, axis_line_color=rhcolor), "right")

superimposed1.legend.click_policy= "hide"

tab2 = Panel(child=column(fieldtestinfo, superimposed1), title="Time Series Superimposed")

#ALTITUDE PROFILES*********************************************************
hover_altprofiles = HoverTool(
    tooltips=[
        ("Time", "@time{%F %I:%M:%S %p}"),
        ("Temperature", "@temp"),
        ("Altitude", "@alt"),
        ("Wind Speed", "@windspeed"),
        ("Relative Humidity", "@rh"),
        ("Magnetic Direction", "@magdir"),
    ],

    formatters={
        "@time" : "datetime",
    },
    #mode = "vline"
)

altgraphs = {
    "Temperature vs. Altitude": "black", 
    "Wind Speed vs. Altitude": "red", 
    "Relative Humidity vs. Altitude": "magenta", 
    "Magnetic Direction vs. Altitude": "navy", 
}
alt_profiles_options = dict(tools=[hover_altprofiles, "pan, wheel_zoom, box_select, tap, reset"], plot_width=700, plot_height=300)
altylabels = [temp, windspeed, rh, magdir]
altsourceskeys = ["temp", "windspeed", "rh", "magdir"]
p7, p8, p9, p10 = figure(), figure(), figure(), figure()
altfigures = [p7, p8, p9, p10]

for f, g, l, key in zip(altfigures, altgraphs, altylabels, altsourceskeys):
    i = altfigures.index(f)
    f = figure(title=g, x_range=altfigures[0].x_range, x_axis_label = "Altitude (m)", y_axis_label=l, **alt_profiles_options)
    f.title.text_color = altgraphs[g]
    f.yaxis.axis_label_text_color = altgraphs[g]
    f.yaxis.major_label_text_color = altgraphs[g]
    f.yaxis.axis_line_color = altgraphs[g]
    altfigures[i] = f

    #f.line("alt", key, color=altgraphs[g], hover_color=hovercolor, source=trimmedvalues)

    f.circle("alt", key, color=altgraphs[g], hover_color=hovercolor, source=source)

tab3 = Panel(child=row(column(altfigures[0:5]), fieldtestinfo), title="Altitude Profiles")

#ALTITUDE PROFILES SUPERIMPOSED**********************************************************************
altsuperimposed_options = dict(tools=[hover_altprofiles, "pan, wheel_zoom, box_select, tap, reset"], plot_width=1000, plot_height=700)

altsuperimposed = figure(x_axis_label= "Altitude (m)", **altsuperimposed_options)

#Temperature vs. Alt
altsuperimposed.line("alt", "temp", color=tempcolor, hover_color=hovercolor, source=source, legend_label = temp)
altsuperimposed.circle("alt", "temp", color=tempcolor, hover_color=hovercolor, source=source, legend_label = temp)
#altsuperimposed.add_layout(LinearAxis(y_range_name="temp", axis_label = temp, major_label_text_color = tempcolor, axis_label_text_color = tempcolor, axis_line_color = tempcolor), "right")

altsuperimposed.extra_y_ranges = {
    "temp": Range1d(start=temp_min, end=temp_max),
    "rh": Range1d(start=rh_min, end=rh_max), 
    "magdir": Range1d(start=magdir_min, end=magdir_max),
    "windspeed": Range1d(start=windspeed_min, end=windspeed_max)
}

#Windspeed vs. Alt
altsuperimposed.line("alt", "windspeed", color=windspeedcolor, hover_color=hovercolor, source=source, legend_label = windspeed)
altsuperimposed.circle("alt", "windspeed", color=windspeedcolor, hover_color=hovercolor, source=source, legend_label = windspeed)
#altsuperimposed.add_layout(LinearAxis(y_range_name="windspeed", axis_label = windspeed, major_label_text_color = windspeedcolor, axis_label_text_color = windspeedcolor, axis_line_color = windspeedcolor), "right")

#Relative Humidity vs. Alt
altsuperimposed.line("alt", "rh", color=rhcolor, hover_color=hovercolor, source=source, legend_label = rh, y_range_name = "rh")
altsuperimposed.circle("alt", "rh", color=rhcolor, hover_color=hovercolor, source=source, legend_label = rh, y_range_name = "rh")
altsuperimposed.add_layout(LinearAxis(y_range_name="rh", axis_label = rh, major_label_text_color = rhcolor, axis_label_text_color = rhcolor, axis_line_color = rhcolor), "left")

#Mag Dir. vs. Alt
altsuperimposed.line("alt", "magdir", color=magdircolor, hover_color=hovercolor, source=source, legend_label = magdir, y_range_name = "magdir")
altsuperimposed.circle("alt", "magdir", color=magdircolor, hover_color=hovercolor, source=source, legend_label = magdir, y_range_name = "magdir")
altsuperimposed.add_layout(LinearAxis(y_range_name="magdir", axis_label = magdir, major_label_text_color = magdircolor, axis_label_text_color = magdircolor, axis_line_color = magdircolor),  "right")

altsuperimposed.legend.click_policy= "hide"

tab4 = Panel(child=column(fieldtestinfo, altsuperimposed), title="Altitude Profiles Superimposed")

#The Geoff Exclusive********************************************************************************
hover_geoff = HoverTool(
    tooltips=[
        ("Time", "@time{%F %I:%M:%S %p}"),        
        ("Altitude", "@alt"),
        ("Temperature", "@temp"),
        ("Wind Speed", "@windspeed"),
        ("Relative Humidity", "@rh"),

    ],

    formatters={
        "@time" : "datetime",
    },
    #mode = "vline"
)
geoff_options = dict(tools=[hover_geoff, "pan, wheel_zoom, box_select, tap, reset"], plot_width=1000, plot_height=500)
geoffsourcekeys = ["temp", "windspeed", "rh"]
geofflabels = [temp, windspeed, rh]
geoffcolors = ["red", "blue", "orange"]
geoffp = figure(y_axis_label = alt, **geoff_options)

for key, l, c in zip(geoffsourcekeys, geofflabels, geoffcolors):
    geoffp.line(key, "alt", color=c, hover_color=hovercolor, source=source, legend_label=l)
    geoffp.circle(key, "alt", color=c, hover_color=hovercolor, source=source, legend_label=l)
geoffp.legend.click_policy="hide"

tab5 = Panel(child=column(fieldtestinfo, geoffp), title="Altitude Profiles Style 2")

#ORGANIZING PLOTS INTO TABS********************************************
#tab1 = Panel(child=f3, title="Temp")
#Displaying the data
layout1 = Tabs(tabs=[tab1, tab2, tab3, tab4, tab5])

show(column(layout1))