In [1]:
import json
import time
import re

In [2]:
fpath_valid = 'rocopylog.txt'
fpath_invalid = 'rocopylog_invalid_source.txt'

In [3]:
class MetricsLog:
    '''
    Used for capturing the basic Metric Log information from the Windows Robocopy Log file.\n
    Returns the directory names, timestamps, table entries, speeds.\n
    If error exists, then returns an error message.\n
    '''   
    def __init__(self, filePath):
        self.__log_filePath = filePath
        self.__directory = []
        self.__ts = []
        self.__entries = []
        self.__speed = []
        self.__error = False
        self.__error_message = []
        
        # above are all the variables to store the data extracted by using the below regular expressions in the createMetric function
        
        self.__DIR_NAME_RE = re.compile(r"\b(?P<type>Source|Dest)\s+:\s+(?P<path>.+)")
        self.__SPEED_RE = re.compile(r"\b(?:Speed)\s+:\s+(?P<amount>\d+.?\d*)\s+(?P<unit>.*)")
        self.__TIMESTAMP_RE = re.compile(r"\b(?P<type>Started|Ended)\s+:\s+(?P<DayName>\w+),\s+(?P<Month>\w+)\s+(?P<Day>\d+),\s+(?P<Year>\d{4})\s+(?P<Hour>\d+):(?P<Minute>\d+):(?P<Second>\d+)\s+\b(?P<Noon>\w+)\b")
        self.__TABLE_ENTRIES_RE = re.compile(r"\b(?P<type>Dirs|Files|Bytes)\s+:\s+(?P<Total>\d+)\s+(?P<Copied>\d+)\s+(?P<Skipped>\d+)\s+(?P<Mismatch>\d+)\s+(?P<FAILED>\d+)\s+(?P<Extras>\d+)")
        self.__ERROR_RE = re.compile(r"\b(?P<Year>\d{4})/(?P<Month>\d{2})/(?P<Day>\d{2})\s+(?P<Hour>\d{2}):(?P<Minute>\d{2}):(?P<Second>\d{2})\s+ERROR\s+(?P<Error>.+)")
        
    
    def createMetric(self): # to read the file and apply the re functions (can be used independent of the main function) 
        with open(self.__log_filePath, 'r', encoding='utf-8') as file:
            for line in file: # going through one line at a time
                match = self.__DIR_NAME_RE.search(line)

                if match: # if a directory location is found
                    self.__directory.append(match.groupdict())
                    continue

                match = self.__SPEED_RE.search(line)

                if match: # is speed pattern is found
                    self.__speed.append(match.groupdict())
                    continue

                match = self.__TIMESTAMP_RE.search(line)

                if match: # if start or end timestamp is found
                    temp = match.groupdict()
                    year, month, day = temp.pop('Year'), temp.pop('Month'), temp.pop('Day')
                    hour, minute, second, noon = temp.pop('Hour'), temp.pop('Minute'), temp.pop('Second'), temp.pop('Noon')
                    temp['date'] = f'{year}-{month}-{day}'
                    temp['time'] = f'{hour}:{minute}:{second} {noon}'
                    self.__ts.append(temp)
                    continue

                match = self.__TABLE_ENTRIES_RE.search(line)

                if match: # if entires of the table are found
                    self.__entries.append(match.groupdict())
                    continue

                match = self.__ERROR_RE.search(line)

                if match: # if error is found
                    self.__error = True
                    temp = match.groupdict()
                    year, month, day = temp.pop('Year'), temp.pop('Month'), temp.pop('Day')
                    hour, minute, second = temp.pop('Hour'), temp.pop('Minute'), temp.pop('Second')
                    temp['date'] = f'{year}-{month}-{day}'
                    temp['time'] = f'{hour}:{minute}:{second}'
                    self.__error_message.append(temp)
                    continue
    
    
    def __calcTime(function): # used for decorating the function for calculating the time taken
        def wrapper(self):
            start_time = time.time()
            function(self)
            end_time = time.time()
            return f'Time taken = {(end_time - start_time):0.6f} seconds'
        return wrapper
    
    
    @__calcTime
    def main(self): # main function to be run in the class
        return self.createMetric()
    
    def resultDictionary(self): # return dictionary of all the details gathered
        return {
            'File Path' : self.__log_filePath,
            'Directory' : self.__directory,
            'Time Stamp' : self.__ts,
            'Table Entries' : self.__entries,
            'Speed' : self.__speed,
            'Error' : self.__error,
            'Error Message' : self.__error_message
        }
    
    def resultJSON(self):
        fileName = self.__log_filePath.split('/')[-1].split('.')[0] + '.json'
        with open(fileName, 'w', encoding = 'utf-8') as writeFile:
            json.dump(self.resultDictionary(), writeFile, ensure_ascii=False, indent = 4)
        print('JSON file created')

In [4]:
a = MetricsLog(fpath_valid)
print(a.main())
print()
a.resultJSON()

Time taken = 0.002297 seconds

JSON file created


In [5]:
b = MetricsLog(fpath_invalid)
print(b.main())
print()
b.resultJSON()

Time taken = 0.001997 seconds

JSON file created
