In [12]:
import os
import datetime
import numpy as np
import pandas as pd

In [13]:
test_date = "08_01_2022"
test_folder = "Day1_Training1"
test_name = "WTRUN2_day1_training1_EDS"

cur_dir = os.getcwd()
main_dir = os.path.dirname(os.path.dirname(cur_dir))
data_dir = os.path.join(main_dir, test_date+"_Tests", "testdata", test_folder)
test_csv = os.path.join(data_dir, test_name+".csv")

if "EDS" not in test_csv:
  _ = input("The .csv file doesn't seem to belong to WT EDS stream. Are you sure you want to continue?")

eds_df = pd.read_csv(test_csv, header=0) #header=1 for normalized SGs/RTDs. header=0 for non-normalized

In [9]:
# Find the rows when test point is changing:
# We will use these rows to find out when Flag=1 during test
# (the data we'll use for training)
test_point_change = eds_df["Test Point"].diff()
test_pt_loc = eds_df.columns.get_loc("Test Point")
eds_df.insert(test_pt_loc+1, "Test Point Change", test_point_change)

In [10]:
# Convert the timestamp in df to a form that we can use for calculating time delta
parsed_datetime_srs = eds_df["Parsed Date & Time"].values.astype('str') # We're converting to a np.'str' array to be able to extract the millisecond component.

def truncate_milliseconds(parsed_datetime_srs):
  milliseconds = np.stack (np.char.split (np.stack(np.char.split(parsed_datetime_srs, "seconds "))[:,1], " milliseconds"))[:, 0] #Extracting "millisecond" field. Some conversions going on for compatibility.
  pad = lambda x: x.zfill(3) #Actual truncation function
  padded_arr = np.array(list(map(pad, milliseconds))) #Apply the function to all elements in the array.
  return padded_arr

milliseconds = truncate_milliseconds(parsed_datetime_srs) # truncating the 1- and 2-digit milliseconds to 3-digit for making them compatible with datetime.datetime.strptime function (bad design decision in the first place)
lhs = np.stack(np.char.split(parsed_datetime_srs, " seconds"))[:,0]
new_parsed_datetime = np.char.add(np.char.add(np.char.add(lhs," seconds "), milliseconds), " milliseconds") #Concatenating fields. Similar to str+str, but stranger in np.char arrays.
eds_df["Parsed Date & Time"] = new_parsed_datetime

eds_df.insert(1, "DateTime Obj", 0)
eds_df["DateTime Obj"] = eds_df["Parsed Date & Time"].apply(lambda x: datetime.datetime.strptime(test_date+" - "+x, '%m_%d_%Y - %H hours %M minutes %S seconds %f milliseconds'))

In [5]:
# Finally populating flag fields:
"""
In general with EDS stream:
0b100000: At start and end waiting periods.
0b110: When moving.
0b1010: When dwelling.
0b10011: When capturing data.
"""

# We're populating only "0b10011" according to the following:
"""
- Find the rows where “Test Point Change” column == 1: 
    - Assign flag “0b10011” to the 60 seconds of data prior to that (not including the that row).
- Remember to monitor the RunLog for “Unique Run Properties” and “Exceptions” while doing this.
"""

test_ch_ids = eds_df.index[eds_df["Test Point Change"] == 1]
for end_id in test_ch_ids:
    time_end = eds_df["DateTime Obj"][end_id]

    delta_from_sixty_secs = 60
    start_id = end_id - 1

    while True:
        new_delta_from_sixty_secs = abs(60 - (time_end - eds_df["DateTime Obj"][start_id]).total_seconds())
        if new_delta_from_sixty_secs > delta_from_sixty_secs:
            break
        delta_from_sixty_secs = new_delta_from_sixty_secs
        start_id -= 1
  
    eds_df["Flag"][start_id:end_id] = "0b10011"


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eds_df["Flag"][start_id:end_id] = "0b10011"


In [7]:
output_test_csv = os.path.join(data_dir, "flagged_DateTimed_"+test_name+".csv")
eds_df.rename(columns={"DateTime Obj":"DateTime Str"}, inplace=True)
eds_df.to_csv(output_test_csv, index=False)