# extractDirectionality

## Imports

In [142]:
import pandas as pd
import re 
import json
import datetime
import os
import time
import glob
%pylab inline

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


## Subroutines
Subroutines neccessary for extractDirectionality
timeExtract takes the log file to be parsed, and returns the start time and end time in a tuple:<br>
    [startTime, endTime]<br>
<br>
durationinMicroseconds takes the log file to be parsed, and it returns the total duration, startTime, and endTime:<br>
    duration, startTime, endTime

In [143]:
# Extract time information of each recording from the log file
def timeExtract(filename):
    with open(filename, 'rb') as f:
        # Start counting from the last byte
        counter = 1
        # Go to the 2nd byte before the end of the last line
        f.seek(-2, 2) 
        while f.read(1) != b'\n':
            f.seek(-2, 1)
            counter=counter+1
        endTime_line = f.readline().decode()
        # Go to the 2nd byte before the end of the last second line
        f.seek(-counter-2, 2)
        while f.read(1) != b'\n':
            f.seek(-2, 1)
        startTime_line = f.readline().decode()

    return [startTime_line, endTime_line]

In [144]:
# Calculate duration of each recording in microseconds
def durationinMicroseconds(filename):
    startTime = timeExtract(filename)[0].split()[2:]
    endTime = timeExtract(filename)[1].split()[2:]
    startTimeStr = startTime[0] + ' ' + startTime[1]
    endTimeStr = endTime[0] + ' ' + endTime[1]
    T1 = datetime.datetime.strptime(startTimeStr, '%Y-%m-%d %H:%M:%S.%f')
    T2 = datetime.datetime.strptime(endTimeStr, '%Y-%m-%d %H:%M:%S.%f')
    delta = T2-T1
    duration = delta.seconds*1000000 + delta.microseconds
    
    return duration, T1, T2

## Function: extractDirectionalities
Decription:<br>
extractDirectionalities accepts a log file generated from data collected from an ODAS microphone, it will then organize the file into a dataframe, where each data point is time, direction, and strength of a single source.<br><br>
Parameters:<br>
**log file path, Microphopne Number**<br><br>
Returns a dataframe with the following columns:<br> 
**Timestamp, Time, Time In Seconds, Microphone Number, ID, X, Y, Z, Activity** <br>


In [159]:
def extractDirectionalities(filename, mic_number):
    with open(filename, 'r') as f:
        text = f.read()
        # Use repex to store blocks of data into a list
    data = re.split('(?<=})\n(?={)', text) 
        # Delete the time info from the last data block
    tmp = data[-1][:(data[-1].rfind("}")+1)]
    data[-1] = tmp
        
    #list of src blocks 
    srcList = [json.loads(block)["src"] for block in data]
    
    #initialize dataframe to have colums: timestamp, time, data inside source
    #timestamp is the initial time stamp
    #time is the datetime value converted from the timestamp and intitial time
    #source is a 4 by 6 array where the rows are the source, and the columns are the source values
    df = pd.DataFrame(columns = ['Timestamp', 'Time', 'Time In Seconds', 'Microphone Number', 'Source ID', 'X', 'Y', 'Z', 'Activity'])
    
    #Used for calculating timestamps -> time
    duration, startTime, endTime = durationinMicroseconds(filename)
    start_time_in_seconds = time.mktime(startTime.timetuple())
    t = duration/len(data) / 1000000
    
    index = 1.0
    for block in srcList:
        if block[0]["id"] != 0 or block[1]["id"] != 0 or block[2]["id"] != 0 or block[3]["id"] != 0:
            time_in_seconds = start_time_in_seconds + (index - 1.0) * t
            for i in range(0, 4):
                if block[i]['id'] != 0:
                    df = df.append(pd.DataFrame({"Timestamp": [index], "Time":datetime.datetime.fromtimestamp(time_in_seconds).strftime("%A, %B %d, %Y %I:%M:%S"), "Time In Seconds": time_in_seconds, "Microphone Number":mic_number, "Source ID": block[i]["id"], "X": block[i]["x"], "Y": block[i]["y"], "Z": block[i]["z"], "Activity": block[i]["activity"]}, index=[0]))
        index = index + 1.0
            
    return(df)
            

## Function: mergeDirectionalities
Decription: <br>
mergeDirectionalities will iterate through all the files in "data" folder, and it will use extractDirectionalities create each file into a dataframe. The dataframe created will be appended into a master dataframe consiting of all the dataframes created from using extractDirectionalities on each file in "data" folder. <br>

Prerequirements:<br>
All folders in "data" folder must be filled with desired .log files in their respective recordingx folders.To do this, run the function above to automate it, or manually download the files from google drive and insert them into the correct folders.<br><br>

Parameters:<br>
None.<br><br>

Returns a dataframe with the following columns:<br> 
**Timestamp, Time, Time In Seconds, Microphone Number, ID, X, Y, Z, Activity** <br>

In [160]:
def mergeDirectionalities():   
    #create dataframe
    df = pd.DataFrame(columns = ['Timestamp', 'Time', 'Microphone Number', 'Source ID', 'X', 'Y', 'Z', 'Activity'])
    for i in range(4):
        for filename in glob.glob("/Users/ardelalegre/CSE4223-ODAS/data/recordings" + str(i) + "/*.log"):
            with open(filename, 'r') as f:
                firstline = f.readline()
            if firstline == "SST log contains no useful data\n":
                pass
            else:
                df1 = extractDirectionalities(filename, i)
                df = df.append(df1)
            
    df = df.sort_values(['Time In Seconds'])
    return df

## Testing

In [161]:
df = mergeDirectionalities()
df.head(20)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,Activity,Microphone Number,Source ID,Time,Time In Seconds,Timestamp,X,Y,Z
0,1.0,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,300.0,0.701,-0.561,0.441
0,0.999,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,301.0,0.701,-0.561,0.441
0,0.999,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,302.0,0.701,-0.561,0.441
0,0.999,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,303.0,0.701,-0.561,0.441
0,0.998,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,304.0,0.701,-0.561,0.441
0,0.999,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,305.0,0.701,-0.561,0.441
0,0.999,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,306.0,0.701,-0.561,0.441
0,0.999,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,307.0,0.701,-0.561,0.441
0,0.999,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,308.0,0.701,-0.561,0.441
0,0.999,0,2,"Monday, September 09, 2019 02:55:05",1568066000.0,309.0,0.701,-0.561,0.441
