In [2]:
import opengm
import numpy as np
from operator import itemgetter

import sys
import logging
logging.basicConfig(level=logging.DEBUG,
                    format='[%(levelname)s] (%(threadName)-10s) %(message)s',
                    stream=sys.stdout 
)

class FactorGraph(object):
    
    def __init__(self, variables, operator='multiplier'):
        
        assert isinstance(variables, dict)
        
        self.var_names = [vn for vn,_ in variables.items()]
        dimensionality = [d for _,d in variables.items()]
        
        self.gm = opengm.graphicalModel(dimensionality, 
                                        operator=operator)
        
        self.inference = None
        
    
    def add_factor_function(self, variables, probabilities):
        
        if not isinstance(probabilities, np.ndarray):
            probabilities = np.array(probabilities)
        
        if not isinstance(variables, list):
            variables = [variables]
        
        variables = [self.var_names.index(v) for v in variables]
        
        self.gm.addFactor(self.gm.addFunction(probabilities),
                          variables)
        
        self.inference = None
    
    def infer(self):
        self.inference = opengm.inference.BeliefPropagation(self.gm, accumulator='maximizer')
        self.inference.infer()
        
    def get_argmax(self):
        
        if not self.inference:
            self.infer()
            
        argmax = self.inference.arg()
        
        return dict((vn, argmax[i]) for i, vn in enumerate(self.var_names))
    
    def get_marginals(self, marginal_vars):
        
        if not isinstance(marginal_vars, list):
            marginal_vars = [marginal_vars]
        
        if not self.inference:
            self.infer()
        
        marginal_probabilities =  self.inference.marginals(range(len(self.var_names)))
        marginals_ret = {}
        for v in marginal_vars:
            i = self.var_names.index(v)
            marginals_v = marginal_probabilities[i]
            marginals_v /= np.sum(marginals_v)
            marginals_ret[v] = marginals_v
            
        return marginals_ret
    



ModuleNotFoundError: No module named 'opengm'

### Task 2 Solution

In [None]:

m = FactorGraph({'S1':2, 'E1':2})
m.add_factor_function('S1', [0.9, 0.1])           # f(S1)
m.add_factor_function(['S1', 'E1'], [[0, 0.2],    # g(S1,E1)
                                     [0, 0.5]])
m.infer()

argmax = m.get_argmax()

marginal_S1 = m.get_marginals('S1')

print(marginal_S1)
print(argmax)

### Task 3

In [None]:
STAGE_MAP = {
    'benign': 1,
    'discovery': 2,
    'access': 3,
    'lateral_movement': 4,
    'privilege_escalation': 5,
    'persistence': 6,
    'defense_evasion': 7,
    'collection': 8,
    'exfiltration': 9,
    'command_control': 10,
    'execution': 11
}

EVENT_MAP = {
    'scan': [1],
    'login': [2],
    'sensitive_uri': [3,4,5],
    'new_kernel_module': [6],
    'dns_tunneling': [7,8,9]
}


ACTIONS = {
    # each value in an actions' vector corresponds to an attack stage
    'NO-OP':   [1.,   0.61, 0.69, 0.09, 0.2 , 0. ,  0.,   0.,   0. ,  0. ,  0.  ],
    'MONITOR': [0.  , 0.39, 0.31 ,0.84, 0.63, 0.7,  0.07 ,0.1 , 0. ,  0. ,  0.  ],
    'STOP':    [0.  , 0.,   0.  , 0.07, 0.17, 0.3,  0.93 ,0.9 , 1. ,  1. ,  1.  ]
}


def get_prob(stages, p, q):
    assert len(p) == len(q) == len(stages)
    prob = np.zeros(11)
    for i in range(len(p)):
        stage_idx = STAGE_MAP[stages[i]] - 1
        prob[stage_idx] = q[i] * (1 - p[i])
    return prob
    


In [None]:
"""
As an example, we provide the Factor Graph at t=1
Your task is to come up with a general Factor Graph model 
that is parametrized for some general time t
"""
# our sequence of events is simply ['scan']
m = FactorGraph({'E1': 11})
m.add_factor_function('E1', get_prob(['discovery', 'benign'], [0.04, 0.47], [0.5, 0.5]))

m.infer()

argmax = m.get_argmax()
marginal_E1 = m.get_marginals('E1')

print(argmax)
print(marginal_E1)

# argmax(marginal_E1) = 1, which represents the discovery stage (2nd position in array)

# to determine the action to be taken, we look at the probability values
# for the discovery stage for all posible actions, and pick the action
# with the maximum probability
idx = argmax['E1']
action_probabilities = [(k, stage_list[idx]) for k, stage_list in ACTIONS.items()]
print(action_probabilities)
max_action, max_probability = max(action_probabilities, key=itemgetter(1))
print(max_action)

In [None]:
# HINT: Since you only require the argmax of the ACTION dictionary at each stage,
#       convert the dictionary to a list of actions indexed by stage instead

In [None]:
"""
Build your general model below.
Run your inference for t=1 through t=9
"""
full_sequence = ['scan', 'login', 'sensitive_uri', 'sensitive_uri', 'sensitive_uri',
                 'new_kernel_module', 'dns_tunneling', 'dns_tunneling', 'dns_tunneling']

