In [1]:
import os
import pickle
import datetime
import numpy as np
import pandas as pd

In [2]:
cur_dir = os.getcwd()
main_dir = os.path.dirname(os.path.dirname(cur_dir))
data_dir = os.path.join(main_dir, "ConsolidatedData", "Dynamic1_Jan2023")

all_df_dataname = "videotimed_labeled_consolidated_all.pkl"
pzt_dataname = "consolidated_pzt_renorm_all.pkl"
sg_dataname = "consolidated_repeated_sg_all.pkl"

all_df_pkl = os.path.join(data_dir, all_df_dataname)
pzt_df_pkl = os.path.join(data_dir, pzt_dataname)
sg_df_pkl = os.path.join(data_dir, sg_dataname)

with open(all_df_pkl, 'rb') as f:
  all_df = pickle.load(f)
with open(pzt_df_pkl, 'rb') as f:
  pzt_df = pickle.load(f)
with open(sg_df_pkl, 'rb') as f:
  sg_df = pickle.load(f)


In [3]:
print (all_df.shape)
print (pzt_df.shape)
print (sg_df.shape)

(736396, 119)
(71377800, 7)
(71424800, 8)


In [None]:
all_df.head()

In [None]:
all_df.tail()

In [None]:
pzt_df.head()

In [None]:
sg_df.head()

In [3]:
#Arrange data function for training tests
def arrange_data_training(available_airspeeds, available_aoas, available_sensors, all_df, pzt_df, sg_df):
  # Get the total data parameters
  #Comment1
  print ("Starting to record the timings and datapoints of tests.")
  sg_df_start_times = list()
  shortest_test_time = datetime.timedelta(days=1)

  for airspeed in available_airspeeds:
    for aoa in available_aoas:
      state_label = f"{airspeed}m/s_{aoa}deg"
      start_t = all_df.loc[all_df["Label"] == state_label]["rtdstr_DateTime Obj"].iloc[0]
      end_t = all_df.loc[all_df["Label"] == state_label]["rtdstr_DateTime Obj"].iloc[-1]
      
      if end_t - start_t < shortest_test_time:
        shortest_test_time = end_t - start_t
      sg_df_start_times.append(start_t)

  test_lines = int (shortest_test_time.total_seconds() * 10000)
  print ("Recorded: \n (i): start time of each training experiment \n (ii): Number of lines in each test.")
  print ()

  #Comment2
  print ("Starting to record the start and end indices of each test.")
  sg_df_ix = 0
  dense_data_start_ixs = list()
  start_rows = sg_df[sg_df["repeated_DateTime Obj"].isin(sg_df_start_times)]
  dense_data_start_ixs = start_rows.index.to_list()
  print ("Recorded start and end indices of each test.")
  print ()

  #Comment3
  state_count = len(available_airspeeds) * len(available_aoas)
  sensor_count = len(available_sensors)
  all_examples = np.zeros((state_count, sensor_count, test_lines))
  all_state = np.empty(state_count, dtype="S20")
  
  #Comment4
  pzt_channels = [sensor_id for sensor_id in available_sensors if "PZT" in sensor_id]
  sg_channels = [sensor_id + " (V) (normalized) (compensated)" for sensor_id in available_sensors if "SG" in sensor_id]
  
  #Comment5
  state_id = 0
  for airspeed in available_airspeeds:
    for aoa in available_aoas:
      print (f"Processing: {airspeed}m/s_{aoa}deg")

      df_start_ix = dense_data_start_ixs[state_id]
      df_end_ix = df_start_ix + test_lines
      all_examples[state_id, 0:len(pzt_channels), 0:test_lines] = pzt_df.iloc[df_start_ix:df_end_ix].T
      all_examples[state_id, len(pzt_channels):, 0:test_lines] = sg_df.loc[df_start_ix:df_end_ix-1, sg_df.columns != "repeated_DateTime Obj"].T
      all_state[state_id] = f"{airspeed}m/s_{aoa}deg"

      state_id += 1

  print ("Finished processing all data.")
  return (all_examples, all_state)

In [None]:
#Data function call for training tests
available_airspeeds = [7, 8.3, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
available_aoas = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
available_sensors = ['PZT 1', 'PZT 2', 'PZT 3', 'PZT 4', 'PZT 5', 'PZT 6', 'PZT 7', 
                    'SG 1', 'SG 2', 'SG 4', 'SG 5', 'SG 6', 'SG LE', 'SG TE']
(allExamples, allState) = arrange_data_training (available_airspeeds, available_aoas, available_sensors, all_df, pzt_df, sg_df)

In [4]:
# For dynamic only
# Cut the beginning of the data to matvh the slicing in the eds data
offsets_savedir = os.path.join(cur_dir, 'offset_pickles')
with open(os.path.join(offsets_savedir,'starttime_dynamic1.pkl'), 'rb') as f:
  eds_starttime = pickle.load(f)

eds_rtdstr_deltas = np.abs(eds_starttime - sg_df["repeated_DateTime Obj"])
rtdstr_offset = eds_rtdstr_deltas.argmin()

sg_df = sg_df[rtdstr_offset:]
pzt_df = pzt_df[rtdstr_offset:]

In [5]:
#Arrange data function for dynamic tests
def arrange_data_dynamic(available_sensors, all_df, pzt_df, sg_df):
  dynamic_examples = dict()

  #First get the number of datapoints in each run we'll be dealing with
  run_all_datetimes = all_df["Date/Time"].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d_%H-%M-%S-%f"))
  run_rtdstr_datetimes = sg_df["repeated_DateTime Obj"]
  
  unique_runs = all_df["Run Number"].unique()
  run_start_row_eds = 0
  run_start_row_rtdstr = 0

  pzt_channels = [sensor_id for sensor_id in available_sensors if "PZT" in sensor_id]
  sg_channels = [sensor_id + " (V) (normalized) (compensated)" for sensor_id in available_sensors if "SG" in sensor_id]

  for run_number in unique_runs:
    print (f"Processing run: {run_number}")

    run_start_time = run_all_datetimes.iloc[run_start_row_eds]
    run_row_cnt_eds = all_df["Run Number"].value_counts()[run_number]
    if run_start_row_eds + run_row_cnt_eds < run_all_datetimes.shape[0]:
      run_end_time = run_all_datetimes.iloc[run_start_row_eds + run_row_cnt_eds]
      run_end_row_rtdstr = np.abs(run_rtdstr_datetimes - run_end_time).argmin()
    else:
      run_end_time = run_all_datetimes.iloc[-1]
      run_end_row_rtdstr = pzt_df.shape[0] #Using PZT DF here to guarantee that we're not exceeding beyond the difference between SG and PZT data due to missing PZT samples
    

    sensor_count = len(available_sensors)
    examples = np.zeros((sensor_count, run_end_row_rtdstr-run_start_row_rtdstr))

    examples[0:len(pzt_channels), :] = pzt_df.iloc[run_start_row_rtdstr:run_end_row_rtdstr].T
    examples[len(pzt_channels):, :] = sg_df.iloc[run_start_row_rtdstr:run_end_row_rtdstr, sg_df.columns != "repeated_DateTime Obj"].T

    run_start_row_eds += run_row_cnt_eds
    run_start_row_rtdstr = run_end_row_rtdstr

    dynamic_examples[run_number] = examples
  
  return dynamic_examples

In [6]:
#Data function call for dynamic tests
available_sensors = ['PZT 1', 'PZT 2', 'PZT 3', 'PZT 4', 'PZT 5', 'PZT 6', 'PZT 7', 
                    'SG 1', 'SG 2', 'SG 4', 'SG 5', 'SG 6', 'SG LE', 'SG TE']
dynamic_examples = arrange_data_dynamic (available_sensors, all_df, pzt_df, sg_df)

Processing run: 15
Processing run: 16
Processing run: 17
Processing run: 18
Processing run: 19
Processing run: 20
Processing run: 21


In [8]:
output_folder = os.path.join(main_dir, "KerasML", "Dynamic1_Jan2023", "data")

for run_number, example in dynamic_examples.items():
  np.save(os.path.join(output_folder, f"dynamic1_run{run_number}"), example)