In [5]:
import time
import re
from dateutil import parser
import yaml
import json
import hashlib
import base64

### LOAD YAML OR JSON FILE

In [3]:
def load_yaml(path):
    with open(path, mode='r') as file:
        return yaml.load(file, Loader=yaml.FullLoader)

In [4]:
def load_json(path):
    with open(path) as json_file:
        return json.load(json_file)

### HASH DATA WITH SHA256

In [6]:
def hash_data(data):
    
    # REMOVE WHITESPACES
    to_string = json.dumps(data, separators=(',', ':'))
    
    # ENCODE THE STRING WITH UTF8
    encoded = to_string.encode('utf-8')
    
    # HASH ENCODED DATA
    hashed = hashlib.sha256(encoded).hexdigest()
    
    return hashed

### PARSE LINE INTO DICT

In [3]:
def parse_line(line):
    
    # TRUNCATE MULTI-SPACING
    line = re.sub(' +', ' ', line)
    
    # REMOVE LINEBREAK CHAR
    line = re.sub('\n', '', line)
    
    # FIND EACH OCCURRENCE OF SPACES IN LINE
    spaces = [i for i, ltr in enumerate(line) if ltr == ' ']
    
    # PROCESS MODULE
    raw_module = line[spaces[3] + 1:spaces[4] - 1]
    
    # MODULE CODE
    raw_code = re.search('(?!\[)\d*?(?=\])', raw_module)
    
    # IF THERE IS NO CODE
    if raw_code == None:
        code = 'None'
        
    # OTHERWISE, EXTRACT IT
    else:
        code = raw_code.group(0)
    
    # MODULE NAME
    module = raw_module.replace('[' + str(code) + ']', '')
    
    # PROCESS TIMESTAMP
    raw_date = line[:spaces[2]]
    unix = parser.parse(raw_date).timestamp()
    
    # MODULE, CODE, MESSAGE, TIMESTAMP
    return module, code, line[spaces[4] + 1:], int(unix)

### ENCODE JSON WITH BASE64

In [4]:
def encode(data):
    
    # STRINGIFY & CONVERT TO BYTES
    stringified = json.dumps(data)
    to_bytes = str.encode(stringified)
    
    # ENCODE
    encoded = base64.b64encode(to_bytes)
    
    # RETURN AS STRING
    return encoded.decode()