# Reference



```
turb_int_analyzer(sampling_rate, time_window, label, df=df, turbintcolor = "#ed0e96", stddevwscolor = "#60223e"):

```
*Parameters*

```sampling_rate = int``` - sampling rate of Kestrel in seconds

```time_window = int ``` - desired time window slice in seconds **(must be a multiple of the sampling rate)**

```label = string ``` - a label to indicate the length of time window

```df``` - a pandas dataframe

```turbintcolor``` - optional; a color code (hex, rgb) to color the turbulence intensity curve

```stddevwscolor``` - optional; a color code (hex, rgb) to color the standard deviation curve



---



*Returns*

a ```dict``` with keys:

```"turbintplot"```: plot of turbulence intensity, windspeed, and standard deviations vs. time

```"wsplot"```: plot of windspeed vs. time

```"turbintvsmeanws"```: plot of turbulence intensity vs. windspeed

```"stats_df"```: pandas dataframe of the times, wind speed averages, wind speed standard deviations, and turbulence intensities

---

*Example usage*

```turb_int_analyzer(sampling_rate = 2, time_window = 10, label = "(10 second time windows)", df = preprocessed_df)```





# Open preprocessed file

In [None]:
#@title Click the play button to upload and open the preprocessed file {display-mode: "form"}
import os
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool, BoxSelectTool, DataTable, TableColumn, CDSView, IndexFilter, DateFormatter, DatetimeTickFormatter, NumeralTickFormatter, CustomJS, Panel, Tabs, LinearAxis, Range1d, Paragraph, DatePicker, Div, BoxAnnotation, Arrow, NormalHead, Slope
from bokeh.layouts import gridplot, column, row
from bokeh.io import output_notebook
from google.colab import files
output_notebook()


time = "Time (yyyy-MM-dd hh:mm:ss)"
windspeed = "Wind Speed (m/s)"

uploaded = files.upload()

filename = list(uploaded.keys())[0]

f = open(filename, "r")
df = pd.read_csv(f)
df[time] = pd.to_datetime(df[time])      
f.close()

display(df)
display(df.dtypes)

#Initializing the wind speed plot
source = ColumnDataSource(data=dict(
    index=df.index, 
    datetime=df[time], 
    windspeed=df[windspeed],
    )
)

statistics = ColumnDataSource(data=dict(
    start_index = [],
    end_index = [],
    start_time = [],
    end_time = [],
    #time_duration = [],
    num_points = [],
    avg_windspeed = [],
    ws_std = [],
    turb_int = []
    )
)

field_test_date_start = df[time].iat[0].strftime("%A, %B %d, %Y, %I:%M:%S %p")
field_test_date_end = df[time].iat[-1].strftime("%A, %B %d, %Y, %I:%M:%S %p")
field_test_date_label = Div(text=f"""<b>FIELD TEST DATE: {field_test_date_start} to {field_test_date_end}<b>""")

#Create windspeed plot

plots_width = 1000
plots_height = 500
windspeed_color = "navy"
selection_color = "firebrick"
line_thickness = 1
dot_size = 2

hover = HoverTool( #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool
    tooltips=[
        ("Index", "$index"),
        ("Date", "@datetime{%F %I:%M:%S %p}"),
        ("Wind Speed", "@{windspeed} m/s")
    ],

    formatters={
        "@datetime" : "datetime",
        #"@y1" : "numeral"
    },
    
    mode = "vline"
)
options = dict(
    x_axis_label = "Time", 
    tools=[hover, "pan, wheel_zoom, xwheel_pan, ywheel_pan, xbox_select, box_zoom, reset"],
    active_drag="xbox_select",
    plot_width=plots_width, 
    plot_height=plots_height,

)

#WIND SPEED
ws_plot = figure(title = "Wind Speed vs. Time", y_axis_label = windspeed, x_axis_type="datetime", **options)
ws_plot.line("datetime", "windspeed", line_color = windspeed_color, line_width=line_thickness, source=source)
ws_plot.circle("datetime", "windspeed", line_color = windspeed_color, size=dot_size, selection_color = selection_color, source=source)
ws_plot.xaxis.formatter=DatetimeTickFormatter(
    hours="%I:%M:%S %p",
    minutes="%I:%M:%S %p"
)

def turb_int_analyzer(sampling_rate, time_window, label, df=df, turbintcolor = "#ed0e96", stddevwscolor = "#60223e"):

  if (time_window % sampling_rate == 0) and (time_window > sampling_rate):
    increment = int(time_window / sampling_rate)
    start_index = 0
    end_index = increment + 1
    length = [i for i in range(len(df))]

    times = []
    avg_ws = []
    std_ws = []
    turb_ints = []
    turbintcolor = turbintcolor
    avgwscolor = "#500eec"
    stddevwscolor = stddevwscolor
    highlight_colors = ["pink", "green"]
    color_index = 0
    zero_avg_ws_count = 0


    turbinthover = HoverTool( #API Reference: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool
    tooltips=[
        ("Index", "$index"),
        ("Time", "@Time{%F %I:%M:%S %p}"),
        ("Turb. Int.", "@{Turbulence Intensity}"),
        ("Avg. Wind Speed", "@{Wind Speed Average}"),
        ("Std. Dev.", "@{Wind Speed Standard Deviation}"),
    ],

    formatters={
        "@Time" : "datetime",
        #"@y1" : "numeral"
    },
    
    )    
    #WIND SPEED
    ws_plot = figure(title = "Wind Speed vs. Time", y_axis_label = windspeed, x_axis_type="datetime", **options)
    ws_plot.line("datetime", "windspeed", line_color = windspeed_color, line_width=line_thickness, source=source)
    ws_plot.circle("datetime", "windspeed", line_color = windspeed_color, size=dot_size, selection_color = selection_color, source=source)
    ws_plot.xaxis.formatter=DatetimeTickFormatter(
        hours="%I:%M:%S %p",
        minutes="%I:%M:%S %p"
    )
#Slicing loop ********************************************************************************
    while start_index in length:
      time_ith = df[time][start_index]
      times.append(time_ith)

      wsvalues = df[windspeed][start_index:end_index]
      avgws = np.mean(wsvalues)
      stdws = np.std(wsvalues)
      std_ws.append(stdws)

      if avgws == 0:
          zero_avg_ws_count += 1
          avgws = 0.001
          avg_ws.append(avgws)
      else:
          avg_ws.append(avgws)

      turbint = stdws / avgws
      turb_ints.append(turbint)

      start_index = end_index - 1
      end_index += increment      

#End of slicing loop ********************************************************************************

    print(f"{zero_avg_ws_count} instances when the average windspeed was 0 for {label}")
    stats = {
        "Time": times,
        "Wind Speed Average": avg_ws,
        "Wind Speed Standard Deviation": std_ws,
        "Turbulence Intensity": turb_ints,
    }  

    stats_df = pd.DataFrame(data=stats)
    turbintcds = ColumnDataSource(data=stats_df)

    #Turbulence Intensity & Average Windspeed vs. Time

    #Turbulence Intensity data
    turbint_windspeed_vs_time = figure(title = f"Turbulence Intensity and Average Windspeed vs. Time {label}", x_axis_label = "Time", y_axis_label = "Turbulence Intensity", 
                                       x_axis_type="datetime", x_range=ws_plot.x_range,
                                       active_drag="xbox_select",
                                       plot_width=plots_width,
                                       plot_height=plots_height,
                                       tools=[turbinthover, "pan, wheel_zoom, xwheel_pan, ywheel_pan, xbox_select, box_zoom, reset"])
    
    turbint_windspeed_vs_time.line("Time", "Turbulence Intensity", line_width = line_thickness, line_color = turbintcolor, legend_label = "Turbulence Intensity", source=turbintcds)
    turbint_windspeed_vs_time.circle("Time", "Turbulence Intensity", size = dot_size, color = turbintcolor, selection_color = selection_color, legend_label = "Turbulence Intensity", source=turbintcds)
    
    turbint_windspeed_vs_time.yaxis.axis_label_text_color = turbintcolor
    turbint_windspeed_vs_time.yaxis.major_label_text_color = turbintcolor
    turbint_windspeed_vs_time.yaxis.axis_line_color = turbintcolor

    #Average Windspeed data
    turbint_windspeed_vs_time.extra_y_ranges["avgws"] = Range1d(start=stats_df["Wind Speed Average"].min(), end=stats_df["Wind Speed Average"].max())

    turbint_windspeed_vs_time.line("Time", "Wind Speed Average", line_width = line_thickness, line_color = avgwscolor, legend_label = "Average Wind Speed", source=turbintcds)
    turbint_windspeed_vs_time.circle("Time", "Wind Speed Average", size = dot_size, color = avgwscolor, selection_color = selection_color, legend_label = "Average Wind Speed", source=turbintcds)    

    ax2 = LinearAxis(y_range_name="avgws", axis_label = "Wind Speed Average", 
                    major_label_text_color = avgwscolor, 
                    axis_label_text_color = avgwscolor, 
                    axis_line_color= avgwscolor)

    turbint_windspeed_vs_time.add_layout(ax2, "right")

    #Standard Deviation Windspeed data
    turbint_windspeed_vs_time.extra_y_ranges["stddev"] = Range1d(start=stats_df["Wind Speed Standard Deviation"].min(), end=stats_df["Wind Speed Standard Deviation"].max())

    turbint_windspeed_vs_time.line("Time", "Wind Speed Standard Deviation", line_width = line_thickness, line_color = stddevwscolor, legend_label = "Standard Deviation Wind Speed", source=turbintcds)
    turbint_windspeed_vs_time.circle("Time", "Wind Speed Standard Deviation", size = dot_size, color = stddevwscolor, selection_color = selection_color, legend_label = "Standard Deviation Wind Speed", source=turbintcds)    

    ax3 = LinearAxis(y_range_name="stddev", axis_label = "Wind Speed Standard Deviation", 
                    major_label_text_color = stddevwscolor, 
                    axis_label_text_color = stddevwscolor, 
                    axis_line_color= stddevwscolor)

    turbint_windspeed_vs_time.add_layout(ax3, "right")     
    
    legend = turbint_windspeed_vs_time.legend[0]
    turbint_windspeed_vs_time.add_layout(legend, "above")
    turbint_windspeed_vs_time.legend.click_policy = "mute"

    turbint_windspeed_vs_time.xaxis.formatter=DatetimeTickFormatter(
    hours="%I:%M:%S %p",
    minutes="%I:%M:%S %p"
)
    
    #Turbulence Intensity vs. Mean Windspeed
    turbint_vs_meanws = figure(title = f"Turbulence Intensity vs. Mean Wind Speed (m/s) {label}", y_axis_label = "Turbulence Intensity", x_axis_label = "Mean Wind Speed (m/s)", plot_height = plots_height, tools=["pan, wheel_zoom, box_select, box_zoom, reset"])
    turbint_vs_meanws.circle("Wind Speed Average", "Turbulence Intensity", source = turbintcds, legend_label="Dots")
    turbint_vs_meanws.legend.click_policy = "hide"
    
    turb_int_data = {
        "turbintplot": turbint_windspeed_vs_time,
        "wsplot": ws_plot,
        "turbintvsmeanws": turbint_vs_meanws,
        "stats_df": stats
    }

    return turb_int_data
  elif (time_window == sampling_rate):
    print("The time window must be greater than the sampling rate")
  else:
    print("Time window must be a multiple of the sampling rate in seconds")

# Interactive Slicing

In [None]:
#@title Interactive slicing table {display-mode: "form"}

ranges = Paragraph(text="""SELECTED INDICES: """)

#STATISTICS TABLE
datefmt = DateFormatter(format="%I:%M:%S %p")
#durationfmt = DateFormatter(format="%I hours, %M minutes, %S seconds") Formatting is an issue where durations shorter than 12 hours will still be displayed as 12 hours (e.g. a 5 second time duration will display as 12 hours, 0 minutes, 5 seconds)
columns = [
    TableColumn(field="start_index", title="Start Index"),
    TableColumn(field="end_index", title="End Index"),
    TableColumn(field="start_time", title="Start Time", formatter = datefmt),
    TableColumn(field="end_time", title="End Time", formatter = datefmt),
    #TableColumn(field="time_duration", title="Time Duration", formatter = durationfmt),
    TableColumn(field="num_points", title="Number of Data Points"),
    TableColumn(field="avg_windspeed", title="Average Wind Speed"),
    TableColumn(field="ws_std", title="Standard Deviation Wind Speed"),
    TableColumn(field="turb_int", title="Turbulence Intensity"),    
]
dt = DataTable(source=statistics, columns=columns, width=plots_width)

source.selected.js_on_change("indices", CustomJS(args=dict(
    source=source,
    statistics=statistics,
    ranges=ranges
),
code="""
    const inds = cb_obj.indices;
    const inds_length = inds.length;
    const d1 = source.data;
    const d2 = statistics.data;
    
    let avg_windspeed = 0;
    //let time_duration = (d1["datetime"][inds[inds_length-1]]) - (d1["datetime"][inds[0]])

    const averages = [avg_windspeed]
    const cols_stats_avgs = ["avg_windspeed"]
    const cols = ["windspeed"]
    
    //Calculate averages
    for (let i = 0; i < inds_length; i++)
    {
       for (let x in averages)
       {
           averages[x] += d1[cols[x]][inds[i]]; 
       }

    }
    
    for (let x in averages)
    {
        averages[x] /= inds_length;
        d2[cols_stats_avgs[x]].push(averages[x].toFixed(2))
    }
    
    d2["num_points"].push(inds_length)
    d2["start_index"].push(inds[0])
    d2["end_index"].push(inds[inds_length-1])
    d2["start_time"].push(d1["datetime"][inds[0]])
    d2["end_time"].push(d1["datetime"][inds[inds_length-1]])
    
    //d2["time_duration"].push(time_duration)
    
    //Calculate standard deviation
    avg_windspeed = averages[0]
    
    let sum_deviations = 0;
    let std_dev_windspeed = 0;
    let turb_int = 0;
    for (let i = 0; i < inds_length; i++)
    {
        sum_deviations += Math.pow((d1["windspeed"][inds[i]] - avg_windspeed), 2)     
    }

 
    std_dev_windspeed = (1/(inds_length-1)) * sum_deviations
    std_dev_windspeed = Math.sqrt(std_dev_windspeed)

    
    d2["ws_std"].push(std_dev_windspeed.toFixed(2))
    turb_int = std_dev_windspeed / avg_windspeed
    d2["turb_int"].push(turb_int.toFixed(2))

    
    //Display the range selection
    ranges.text = "SELECTED INDICES: " + inds[0] + " - " + inds[inds.length-1]
    
    statistics.change.emit()
    //regr.change.emit()
    
"""
                                                                                           
))

reset_dt = CustomJS(args=dict(
    statistics=statistics,
    cols_stats=statistics.column_names,
), 
code="""
const d2 = statistics.data;

for (let x in cols_stats)
{
    d2[cols_stats[x]] = []
}

statistics.change.emit()
"""
)

#Clear the statistics data table when one of the graphs is reset
ws_plot.js_on_event('reset', reset_dt)
show(column(field_test_date_label, ws_plot, dt, ranges))


# Slicing

In [None]:
from bokeh.layouts import layout
from bokeh.palettes import Category20b
from bokeh.io import output_file
output_file("Turbulence Intensity Plots Layout Sample 1.html")

#Time windows and labels must correspond to each other

timewindows = [10, 
               20, 
               30, 
               60, 
               300, 
               600, 
               1800, 
               3600]

labels = ["10 sec time windows",
          "20 sec time windows",
          "30 sec time windows",
          "1 min time windows",
          "5 min time windows",
          "10 min time windows",
          "30 min time windows",
          "1 hour time windows",
]

#Colors to
num_of_colors = len(timewindows)
if num_of_colors <= 2:
  colors = ["red", "blue"]
else:
  colors = Category20b[num_of_colors]

avgs = {}
times_avgws = {}
plot_layout = []


for timewindow, label in zip(timewindows, labels):

  turb_int_data = turb_int_analyzer(sampling_rate=5, time_window=timewindow, label=label, df=df)
  turbintplot = turb_int_data["turbintplot"]
  turbintvsws = turb_int_data["turbintvsmeanws"] 
  wsvstime = turb_int_data["wsplot"]
  times_avgws[label] = turb_int_data["stats_df"]["Time"]
  avgs[label] = turb_int_data["stats_df"]["Wind Speed Average"]

  plot_layout.append(column(row(turbintplot, turbintvsws)))

#Comparing the average wind speed for different time windows
times = df[time]

ws_avgs = figure(title="Average Wind Speeds for different time windows", 
                 x_axis_label = "Time", y_axis_label='Average Wind Speed', x_axis_type = "datetime",
                 plot_width = plots_width, plot_height = plots_height,
                 tools = ["pan, wheel_zoom, box_select, box_zoom, reset"])

for window, color in zip(avgs, colors):
  ws_avgs.line(times_avgws[window], avgs[window], legend_label = window, line_color = color, line_width=2)

legend = ws_avgs.legend[0]
ws_avgs.add_layout(legend, "right")
ws_avgs.legend.click_policy = "hide"
ws_avgs.xaxis.formatter=DatetimeTickFormatter(
    hours="%I:%M:%S %p",
    minutes="%I:%M:%S %p"
)

show(column(layout(plot_layout), ws_avgs))