<a href="https://colab.research.google.com/github/xray-hep-ucd/Environment-Monitoring/blob/analysis/AnalysisTools.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Analysis Tools


Tools for post run analysis of enviroment data

# Setup

## General Setup

In [None]:
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Google Colab Setup

In [None]:
#Mount Google Drive
try:
    from google.colab import drive
    drive.mount("/content/drive", force_remount=False)
    main_path = '/content/drive/MyDrive/'
except:
    main_path = ''
sys.path.append(main_path)

In [None]:
main_path += "/Project Death Ray/Monitoring/"
main_path

# Class and Utility Function Definitions

## Time Manipulation Utilities

In [None]:
#Return the hours, minutes, seconds of the passed time as ints
def splitTimes(timeStr):
    splitTs = timeStr.split(":")
    return int(splitTs[0]), int(splitTs[1]), int(splitTs[2])

#Return time in seconds
def asSeconds(time):
    hr, min, sec = splitTimes(time)
    return (hr*3600) + (min*60) + sec

def asMinutes(time):
    hr, min, sec = splitTimes(time)
    return (hr*60) + min + (sec / 60.0)

#Subtract return time1-time2
#If asSec=False, returns as hh:mm:ss as a str
#If asSec=False, returns as total seconds as an int
def timeDiff(time1, time2, asSec=False):
    hr1, min1, sec1 = splitTimes(time1)
    hr2, min2, sec2 = splitTimes(time2)
    hrDiff = hr1 - hr2
    minDiff = min1 - min2
    secDiff = sec1 - sec2
    if secDiff < 0:
        minDiff -= 1
        secDiff += 60
    if minDiff < 0:
        hrDiff -= 1
        minDiff += 60
    
    timeDiff = str(hrDiff) + ":" + str(minDiff) + ":" + str(secDiff)
    if asSec:
        return asSeconds(timeDiff)
    else:
        return timeDiff

    
    

## Class Definitions

In [None]:

#A class to store monitoring data from a run
#Data is stored in a Pandas Dataframe
class Run:

    name = "" 
    filepath = ""
    data = None #A Pandas Dataframe storing the events
    nEvents = 0


    def __init__(self, name = ""):
        self.name = name


    #Attempt to read the file in filepath into a Pandas DF
    def readFile(self, filepath):
        self.filepath = filepath
        if self.name == "":
            self.name = filepath
        
        self.data = pd.read_csv(filepath, header=0, usecols=[0,1,2,3,4,5,6])
        self.nEvents = self.data.shape[0]
    

    #Append data in filepath to the current dataframe
    def addFile(self, filepath):
        if self.filepath == "":
            self.readFile(filepath)
        else:
            if isinstance(self.filepath, str):
                temp = self.filepath
                self.filepath = []
                self.filepath.append(temp)
            self.append(filepath)
            newDF = pd.read_csv(filepath, header=0, usecols=[0,1,2,3,4,5,6])
            self.data.append(newDF)


    #Drop events which are incomplete from the dataframe
    #if how="all", drops only rows which are entirely NA
    #if how="any" (default) drops all rows which have an NA
    #if resetIndex=True (default), index is reset to be sequential [0, nEvents-1],
    #if resetIndex=False otherwise leaves original row number as index
    def dropIncompleteEvents(self, how="any", resetIndex=True):
        self.data.dropna(inplace=True, how=how)
        self.nEvents = self.data.shape[0]
        if resetIndex:
            self.data.reset_index(inplace=True, drop=True)

    
    #Def drop unwanted rows or columns from the data
    #labels : a single label (e.g row number in index of column name) or list of such to drop
    #axis : 0= drop row(s) (default), 1= drop column(s)
    def drop(self, labels, axis=0, resetIndex = True):
        self.data.drop(labels, axis=axis, inplace=True)
        self.nEvents = self.data.shape[0]
        if resetIndex:
            self.data.reset_index(inplace=True, drop=True)


    #Return a list of column names
    def getCols(self):
        return self.data.columns


    #Retrieve a column(s) from the dataframe
    #colNames should be either a column name or list of column names
    def get(self, colNames, timeUnits=""):
        return self.data[colName]


    #Add a column for time in specificed units, either "s" (seconds) (default) or "m" (minutes)
    def addTimeCol(self, units="s"):
        timeDefault = self.get("Time [hh:mm:ss]")
        if units == "s":
            timeNew = timeDefault.apply(asSeconds)
        elif units == "m":
            timeNew = timeDefault.apply(asMinutes)
        else:
            print("Unrecognized time units, use either 's' for seconds or 'm' for minutes")
            return
        self.data["Time [" + units + "]"] = timeNew


    #Return the total time duration of the run
    def duration(self, asSec=False, startInd=0, endInd=-1):
        if endInd < startInd:
            endInd = self.nEvents - 1
        startTime = self.data.at[startInd, "Time [hh:mm:ss]"]
        endTime = self.data.at[endInd, "Time [hh:mm:ss]"]
        return timeDiff(endTime, startTime, asSec)




# Examples and Testing

## Read a file into a Run object

In [None]:
testRun = Run("Test Run")
filepath = main_path + "testFile.csv"
testRun.readFile(filepath)
testRun.dropIncompleteEvents(how="any")
testRun.data


## Examples with times

In [None]:
dur = testRun.duration(asSec=False, startInd=20, endInd=21)
dur

In [None]:
testRun.addTimeCol(units="s")
testRun.addTimeCol(units="m")
testRun.data

## Try Dropping Unwanted Rows

In [None]:
testRun.drop(9, axis=0, resetIndex=True) #Drop row 9 and relabel rows sequentially
testRun.data

## Plotting Using Built-Int Pandas Features

In [None]:
testRun.data.plot(x="Time [s]", y=testRun.getCols()[1:5], kind="line", subplots=True, figsize=(10,10), title="Temperatures for Run=" + testRun.name, ylabel="[C]")