#### Goal
In this analysis, the raw DOA estimation will be extracted from SSL log files across all arrays. During the recording experiment, only one source was present at all times. Since SSL log ranks DOA estimation by energy from the highest to the lowest. The first entry in each log block will be used. 

#### Procedures
Within each 5 minutes interval, take the average of first entries by each minute, e.g. 11:10-11:01, 11:01-11:02, etc. 

In [1]:
import glob
import os
import pandas as pd
import re
import json
import datetime
import time as timelib
import numpy as np

In [2]:
'''
Define functions
'''
# Extract time information of each recording from the log file
def timeExtract(filename):
    with open(filename, 'rb') as f:
        # Start counting from the last byte
        counter = 1
        # Go to the 2nd byte before the end of the last line
        f.seek(-2, 2)
        while f.read(1) != b'\n':
            f.seek(-2, 1)
            counter = counter + 1
        endTime_line = f.readline().decode()
        # Go to the 2nd byte before the end of the last second line
        f.seek(-counter - 2, 2)
        while f.read(1) != b'\n':
            f.seek(-2, 1)
        startTime_line = f.readline().decode()

    return [startTime_line, endTime_line]

# Calculate duration of each recording in microseconds
def durationinMicroseconds(filename):
    startTime = timeExtract(filename)[0].split()[2:]
    endTime = timeExtract(filename)[1].split()[2:]
    startTimeStr = startTime[0] + ' ' + startTime[1]
    endTimeStr = endTime[0] + ' ' + endTime[1]
    T1 = datetime.datetime.strptime(startTimeStr, '%Y-%m-%d %H:%M:%S.%f')
    T2 = datetime.datetime.strptime(endTimeStr, '%Y-%m-%d %H:%M:%S.%f')
    delta = T2 - T1
    duration = delta.seconds * 1000000 + delta.microseconds

    return duration, T1, T2


# Converts .log files into pandas dataframes
def extractData(filename, arrayNum):
    with open(filename, 'r') as f:
        text = f.read()

    # Use repex to store blocks of data
    data = re.split('(?<=})\n(?={)', text)
    # Delete the time info from the last data block
    tmp = data[-1][:(data[-1].rfind("}") + 1)]
    data[-1] = tmp

    # Store each log block into a list
    srcList = [json.loads(block)["src"] for block in data]

    # Dataframe will have columns: timestamp, time, data
    df = pd.DataFrame(
        columns=['Timestamp', 'Time', 'Time In Seconds', 'Array Number', 'X', 'Y', 'Z', 'E'])

    # Retrieve time information
    duration, startTime, endTime = durationinMicroseconds(filename)
    start_time_in_seconds = timelib.mktime(startTime.timetuple())
    t = duration / len(data) / 1000000.0

    timestamp = 1 # Timestamp in log blocks starts at 1
    ind = 0
    df_dict = {}
    for block in srcList:
        arr = np.array([block[0]["E"],block[1]["E"],block[2]["E"],block[3]["E"]])
        # Check if there is a nonzero entry, then proceed
        if arr.any(): 
            time_in_seconds = start_time_in_seconds + (timestamp - 1.0) * t
            if block[arr.argmax()]['E'] != 0:
                i = arr.argmax()
                df_dict[ind] = {"Timestamp": timestamp,
                                "Time": datetime.datetime.fromtimestamp(time_in_seconds).strftime("%B %d, %I:%M, %Y"), 
                                "Time In Seconds": time_in_seconds,
                                "Array Number": arrayNum, 
                                "X": block[i]["x"],
                                "Y": block[i]["y"], 
                                "Z": block[i]["z"], 
                                "E": block[i]["E"]}
                ind = ind + 1
        timestamp = timestamp + 1

    df = df.append(pd.DataFrame.from_dict(df_dict, "index"))
    return (df)


In [3]:
'''
Main
'''
# Obtain a list of files at 11:10 and 11:15
listOfFiles1  = glob.glob('/Users/yihanhu/Desktop/CSE4223/Data/cSSL_2020-08-19_11_10_*.log')
listOfFiles2  = glob.glob('/Users/yihanhu/Desktop/CSE4223/Data/cSSL_2020-08-19_11_15_*.log')

In [15]:
timeBins = ['August 19, 11:10, 2020',
            'August 19, 11:11, 2020',
            'August 19, 11:12, 2020',
            'August 19, 11:13, 2020',
            'August 19, 11:14, 2020']
df_dict = {}
df_SSL = pd.DataFrame(
    columns=['Time','Array Number', 'Mean X', 'Mean Y', 'Mean Z', "Median X", "Median Y", "Median Z"])
ind = 0
listOfFiles = listOfFiles1
for i in range(0,len(listOfFiles)):
    arrayInd = int(listOfFiles[i][-5])
    df = extractData(listOfFiles[i],arrayInd)
    for timeMark in timeBins:
        tmp = df.loc[df['Time'] == timeMark]
        x1 = np.mean(tmp['X'])
        y1 = np.mean(tmp['Y'])
        z1 = np.mean(tmp['Z'])
        x2 = np.median(tmp['X'])
        y2 = np.median(tmp['Y'])
        z2 = np.median(tmp['Z'])
        df_dict[ind] = {"Time": timeMark, 
                        "Array Number": arrayInd, 
                        "Mean X": x1,
                        "Mean Y": y1, 
                        "Mean Z": z1,
                        "Median X": x2,
                        "Median Y": y2,
                        "Median Z": z2,} 
        ind = ind + 1
df_SSL = df_SSL.append(pd.DataFrame.from_dict(df_dict, "index"))

In [16]:
df_SSL = df_SSL.sort_values(by = ['Time', 'Array Number'])

In [17]:
df_SSL

Unnamed: 0,Time,Array Number,Mean X,Mean Y,Mean Z,Median X,Median Y,Median Z
0,"August 19, 11:10, 2020",0,-0.109869,-0.169869,0.573968,-0.22,-0.234,0.598
25,"August 19, 11:10, 2020",1,-0.145667,-0.298729,0.730431,-0.239,-0.506,0.753
20,"August 19, 11:10, 2020",2,-0.226128,0.179839,0.772009,-0.264,0.197,0.843
5,"August 19, 11:10, 2020",3,-0.451725,0.119296,0.434322,-0.793,0.108,0.396
15,"August 19, 11:10, 2020",4,0.126291,-0.234421,0.753014,0.247,-0.425,0.753
10,"August 19, 11:10, 2020",5,0.045916,0.135404,0.796802,0.133,0.246,0.86
1,"August 19, 11:11, 2020",0,-0.018503,-0.17664,0.606039,0.0,-0.233,0.68
26,"August 19, 11:11, 2020",1,-0.244324,-0.148029,0.687011,-0.414,-0.212,0.685
21,"August 19, 11:11, 2020",2,-0.364442,0.094269,0.711741,-0.511,0.064,0.74
6,"August 19, 11:11, 2020",3,-0.46625,-0.083426,0.404526,-0.853,-0.263,0.28
