In [5]:
import time
import os
import re
from dateutil import parser
import yaml
import json
import hashlib

### LOAD YAML OR JSON FILE

In [3]:
def load_yaml(path):
    with open(path, mode='r') as file:
        return yaml.load(file, Loader=yaml.FullLoader)

In [4]:
def load_json(path):
    with open(path) as json_file:
        return json.load(json_file)

### HASH DATA WITH SHA256

In [6]:
def hash_data(data):
    
    # REMOVE WHITESPACES
    to_string = json.dumps(data, separators=(',', ':'))
    
    # ENCODE THE STRING WITH UTF8
    encoded = to_string.encode('utf-8')
    
    # HASH ENCODED DATA
    hashed = hashlib.sha256(encoded).hexdigest()
    
    return hashed

### TRACK A LOGFILE

In [8]:
def track(logfile, container):
    
    # OPEN THE FILE
    file = open(logfile, 'r')

    # EXTRACT FILE PARAM
    st_results = os.stat(logfile)
    st_size = st_results[6]
    
    # FIND THE TAILEND OF THE FILE
    file.seek(st_size)

    # EVENT LOOP
    try:
        print('LISTENING TO:', logfile)  
        
        while True:

            # READ THE LAST LINE
            where = file.tell()
            line = file.readline()

            # IF THERE IS NOTHING THERE, SLEEP FOR 1 SECOND
            if not line:
                time.sleep(1)
                file.seek(where)

            # OTHERWISE...
            else:

                # PARSE THE LINE
                parsed = parse_line(line)

                # APPEND TO THE CONTAINER
                container.append(parsed)
    
    # WHEN THE PROCESS IS KILLED
    except:
        print('\nTHE PROCESS WAS MANUALLY KILLED')  

### PARSE LINE INTO DICT

In [57]:
def parse_line(line):
    
    # TRUNCATE MULTI-SPACING
    line = re.sub(' +', ' ', line)
    
    # REMOVE LINEBREAK CHAR
    line = re.sub('\n', '', line)
    
    # FIND EACH OCCURRENCE OF SPACES IN LINE
    spaces = [i for i, ltr in enumerate(line) if ltr == ' ']
    
    # PROCESS DATE
    raw_date = line[:spaces[2]]
    unix = parser.parse(raw_date).timestamp()
    
    # PROCESS MODULE
    raw_module = line[spaces[3] + 1:spaces[4] - 1]
    
    # MODULE CODE & NAME
    code = re.search('(?!\[)\d*?(?=\])', raw_module).group(0)
    name = raw_module.replace('[' + str(code) + ']', '')
    
    return {
        'timestamp': int(unix),
        'platform': line[spaces[2] + 1:spaces[3]],
        'module': {
            'code': code,
            'name': name
        },
        'msg': line[spaces[4] + 1:]
    }