In [5]:
import time
import re
from dateutil import parser
import yaml
import json
import hashlib
import base64

### LOAD YAML OR JSON FILE

In [3]:
def load_yaml(path):
    with open(path, mode='r') as file:
        return yaml.load(file, Loader=yaml.FullLoader)

In [4]:
def load_json(path):
    with open(path) as json_file:
        return json.load(json_file)

### HASH DATA WITH SHA256

In [6]:
def hash_data(data):
    
    # REMOVE WHITESPACES
    to_string = json.dumps(data, separators=(',', ':'))
    
    # ENCODE THE STRING WITH UTF8
    encoded = to_string.encode('utf-8')
    
    # HASH ENCODED DATA
    hashed = hashlib.sha256(encoded).hexdigest()
    
    return hashed

### PARSE LINE INTO DICT

In [5]:
def parse_line(line):
    
    # FISH OUT THE CODE
    matches = re.findall(r"\[([0-9. ]*[0-9]+)\]", line)
    
    # DEFAULT TO NO CODE
    code = 'none'
    
    # IF A CODE MATCH IS FOUND, USE IT INSTEAD
    if len(matches) > 0:
        raw_code = matches[0]
        code = raw_code.replace(' ', '')
    
        # REMOVE THE BRACKETED CODE FROM THE LINE
        temp_block = '[{}]'.format(raw_code)
        line = line.replace(temp_block, '')
    
    # FIND ALL SPACES IN STRING
    spaces = [i for i, ltr in enumerate(line) if ltr == ' ']
    
    # FISH OUT THE DATE
    raw_date = line[:spaces[2]]
    timestamp = int(parser.parse(raw_date).timestamp())
    
    # FISH OUT THE MODULE
    raw_module = line[spaces[3]:spaces[4]]
    module = re.sub(r'[^A-Za-z]', '', raw_module)
    
    # FISH OUT THE REMAINING MESSAGE
    message = line[spaces[4]:].strip()
    message = re.sub(' +', ' ', message)
    
    return module, code, message, timestamp

### EXTRACT NUMBERS FROM MESSAGE

In [1]:
def extract_numbers(message):

    # EXTRACT NUMERIC VALUES
    values = re.findall(r"([0-9. ]*[0-9]+)", message)
    
    # SUBSTITUTE THEM WITH BRACKETS
    substituted = re.sub(r"([0-9. ]*[0-9]+)", '{}', message)
    
    return values, substituted

### ENCODE JSON WITH BASE64

In [4]:
def encode(data):
    
    # STRINGIFY & CONVERT TO BYTES
    stringified = json.dumps(data)
    to_bytes = str.encode(stringified)
    
    # ENCODE
    encoded = base64.b64encode(to_bytes)
    
    # RETURN AS STRING
    return encoded.decode()