In [1]:
import pickle
import time
import os
import datetime
import numpy as np
import pandas as pd

cur_dir = os.getcwd()
main_dir = os.path.dirname(os.path.dirname(cur_dir))

In [2]:
test_dates = ["08_01_2022", "08_02_2022"]
test_folders = ["Day1_Training1", "Day2_Training1"]
rtdstr_filenames = ["compensated_normalized_WTRUN2_training_sweep1_2022-08-01_17-24-43-50_rtd-str_uniqueRTDs_infrequent",
                    "compensated_normalized_WTRUN2_day2_training1_2022-08-02_12-38-30-01_rtd-str_uniqueRTDs_infrequent"]

eds_rtdstr_offsets_dir = os.path.join(cur_dir, "offset_pickles")

In [3]:
#Load in and arrange the data.
sg_repeated_colnames = ["SG 1 (V) (normalized) (compensated)",
                        "SG 2 (V) (normalized) (compensated)",
                        "SG 4 (V) (normalized) (compensated)",
                        "SG 5 (V) (normalized) (compensated)",
                        "SG 6 (V) (normalized) (compensated)",
                        "SG TE (V) (normalized)",
                        "SG LE (V) (normalized)"]

# Read in the offsets in IMGenie data. We'll use these values to crop out any extra lingering lines.
with open(os.path.join(eds_rtdstr_offsets_dir,'eds_rtdstr_offsets.pkl'), 'rb') as f:
  eds_rtdstr_offsets = pickle.load(f)
with open(os.path.join(eds_rtdstr_offsets_dir,'rtdstr_offsets_trailing.pkl'), 'rb') as f:
  rtdstr_offsets_trailing = pickle.load(f)

In [4]:
###
#Cleaning and repetition for all the data
###

sg_dfs = list()

for i in range(len(test_folders)):
  ###
  #First modify them according to the edits we made to the overall files (mainly crops due to different start times of EDS & UAV)
  ###
  data_dir = os.path.join(main_dir, test_dates[i]+"_Tests", "testdata", test_folders[i])
  
  #Read the RTDSTR file
  rtdstr_filename = rtdstr_filenames[i]
  rtdstr_file_csv = os.path.join(data_dir, rtdstr_filename)
  rtdstr_file_df = pd.read_csv(rtdstr_file_csv+".csv", header=0)
  rtdstr_file_df_edited = rtdstr_file_df

  #Insert proper time object
  month = int (test_dates[i][0:2])
  day = int (test_dates[i][3:5])
  year = int (test_dates[i][6:])
  rtdstr_times = rtdstr_file_df["Date/Time"].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d_%H-%M-%S-%f"))
  rtdstr_times = rtdstr_times.apply(lambda x: datetime.datetime(year, month, day, x.hour, x.minute, x.second, x.microsecond)) #We do this because we haven't recorded date in the original data.
  if "rtdstr_DateTime Obj" not in rtdstr_file_df_edited.columns:
    rtdstr_file_df_edited.insert(1, "rtdstr_DateTime Obj", rtdstr_times)
  else:
    rtdstr_file_df_edited["rtdstr_DateTime Obj"] = rtdstr_file_df_edited["rtdstr_DateTime Obj"].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
    
  if eds_rtdstr_offsets[i][0] != "rtdstr": #if vantage[i] is not rtdstr (meaning that rtdstr started earlier, so some of it was cut).
    #Determine how many seconds of IMGenie data we cut when aligning the data and crop out that many lines from PZT data.
    rtdstr_file_df_edited = rtdstr_file_df_edited.iloc[eds_rtdstr_offsets[i][1]:]
  
  if rtdstr_offsets_trailing[i] != -1: #if vantage[i] is not rtdstr (meaning that rtdstr ended later, so some of it was cut).
    rtdstr_file_df_edited = rtdstr_file_df_edited.iloc[:rtdstr_offsets_trailing[i]+1]
    
  sg_dfs.append(rtdstr_file_df_edited)

In [5]:
### BEWARE: BE CAREFUL NOT TO RUN THIS BLOCK MORE THAN ONCE TIMES AS coarse_times IS MODIFIED INSIDE 
### AND RUNNING MULTIPLE TIMES WOULD GENERATE VERY INCONSISTENT DATA. 
### ALWAYS RUN THE BLOCK ABOVE RIGHT BEFORE RUNNING THIS

###
#Then add repeated lines to the SG/RTD data
###
def construct_repeated_sgs(fine_times, coarse_times, SG_vals):
  #fine_times: shape(# lines in consolidated pzt)
  #coarse_times: shape(# lines in consolidated rtd/sg)
  #SG_vals: shape(coarse_times.shape[0], 7)

  SG_vals_interp = np.zeros((fine_times.shape[0], SG_vals.shape[1]))
  prev_time = coarse_times[0]
  next_time_ix = 5 #This is 5 here to avoid any repeated lines at the beginning.
  next_time = coarse_times[next_time_ix]
  coarse_delta = next_time-prev_time
  
  for line_ix in range(len(fine_times)-10000): #We're ignoring the last 1 second to avoid index errors.
    fine_time = fine_times[line_ix]
    if line_ix % 1000000 == 0:
      print (f"{float(line_ix/len(fine_times)*100)} percent is complete.")
    if next_time - fine_time < np.timedelta64(100,"us"):
      prev_time = next_time
      next_time_ix += 1
      time_ixs_rep_cnt = 1
      while coarse_times[next_time_ix] == coarse_times[next_time_ix+time_ixs_rep_cnt]:
        time_ixs_rep_cnt += 1
      if time_ixs_rep_cnt != 1:
        prior_time = coarse_times[next_time_ix-1]
        next_time = coarse_times[next_time_ix+time_ixs_rep_cnt]
        time_delta = next_time - prior_time
        time_increment = time_delta/(time_ixs_rep_cnt+1)
        for i in range(time_ixs_rep_cnt):
          coarse_times[next_time_ix+i] = prior_time + time_increment*(i+1)
      next_time = coarse_times[next_time_ix]
      coarse_delta = next_time-prev_time

    t1 = fine_time - prev_time
    time_ratio = t1/coarse_delta
    SG_vals_interp[line_ix] = SG_vals[next_time_ix-1]*(1-time_ratio) + SG_vals[next_time_ix]*time_ratio
  return SG_vals_interp

repeated_sg_data = list()   
for sg_df in sg_dfs:
  pzt_times_reconstruct = np.arange(sg_df['rtdstr_DateTime Obj'].iloc[0], sg_df['rtdstr_DateTime Obj'].iloc[-1], datetime.timedelta(0,1/10000))
  SG_vals_interp = construct_repeated_sgs(pzt_times_reconstruct, sg_df['rtdstr_DateTime Obj'].to_numpy(), sg_df[sg_repeated_colnames].to_numpy())
  repeated_sg_data.append({"pzt_times_reconstruct":pzt_times_reconstruct, "SG_vals_interp":SG_vals_interp})

0.0 percent is complete.
0.6440514442531612 percent is complete.
1.2881028885063224 percent is complete.
1.9321543327594832 percent is complete.
2.5762057770126447 percent is complete.
3.220257221265806 percent is complete.
3.8643086655189665 percent is complete.
4.508360109772128 percent is complete.
5.1524115540252895 percent is complete.
5.79646299827845 percent is complete.
6.440514442531612 percent is complete.
7.084565886784773 percent is complete.
7.728617331037933 percent is complete.
8.372668775291094 percent is complete.
9.016720219544256 percent is complete.
9.660771663797417 percent is complete.
10.304823108050579 percent is complete.
10.94887455230374 percent is complete.
11.5929259965569 percent is complete.
12.236977440810062 percent is complete.
12.881028885063223 percent is complete.
13.525080329316385 percent is complete.
14.169131773569546 percent is complete.
14.813183217822706 percent is complete.
15.457234662075866 percent is complete.
16.10128610632903 percent is

In [6]:
df_save_dir = os.path.join(main_dir, "ConsolidatedData", "Training1")
repeated_sg_dfs = list()

for ix, repeated_sg_dat in enumerate(repeated_sg_data):
  repeated_sg_df = pd.DataFrame(repeated_sg_dat["SG_vals_interp"], columns=sg_repeated_colnames)
  repeated_sg_df["repeated_DateTime Obj"] = repeated_sg_dat["pzt_times_reconstruct"]
  repeated_sg_df.to_pickle(os.path.join(df_save_dir,f'consolidated_repeated_sg_{ix}_uniqueRTDs_infrequent.pkl'))
  repeated_sg_dfs.append(repeated_sg_df)

concated_all_sg_df = pd.concat((repeated_sg_df for repeated_sg_df in repeated_sg_dfs), ignore_index=True)
concated_all_sg_df.to_pickle(os.path.join(df_save_dir,f'consolidated_repeated_sg_all_uniqueRTDs_infrequent.pkl'))