In [3]:
!conda env list
!python -V
import sys
print(sys.executable)

# conda environments:
#
base                     /Applications/anaconda3
phd                   *  /Applications/anaconda3/envs/phd

Python 3.10.0
/Applications/anaconda3/envs/phd/bin/python


In [4]:
# DATA ANALYSIS
import numpy as np
import pandas as pd
import random as rand

# PLOTS
# import matplotlib.pyplot as plt
# import seaborn as sns
# import plotly.express as px
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots

# OTHERS
import math
import statistics as stats
import time as t
#import pickle
import functools as fc # contains reduce
import copy

# DEBUGGER
from IPython.core.debugger import set_trace
# For executing line by line use n and 
# for step into a function use s and 
# to exit from debugging prompt use c.
# REFACTOR: uninstall jupyter packages that I installed before, not using PyCharm built in debugger


# import color blind colors
colorblind = ['#377eb8', '#ff7f00', '#4daf4a',
              '#f781bf', '#a65628', '#984ea3',
              '#999999', '#e41a1c', '#dede00']

# Library <a class="anchor" id="1"></a>

In [5]:
# Utility functions
%load utils.py
import utils

In [6]:
# Lung Function Model
class lungModel:

    # Names
    """
    i: inflammation
    bl: bacterial load
    w: wellness
    """
    # Marginal distributions
    marginal_i = {
        "absent": 0.3,
        "small": 0.5,
        "heavy": 0.2
    }
    # Conditional probability table (cpt) of varB knowing varA
    cpt_bl_i = {
        "absent": {
            "low": 0.6,
            "medium": 0.2,
            "high": 0.2
        },
        "small": {
            "low": 0.3,
            "medium": 0.3,
            "high": 0.4
        },
        "heavy": {
            "low": 0.1,
            "medium": 0.3,
            "high": 0.6
        }
    }
    cpt_w_i = {
        "absent": {
            1: 0.01,
            2: 0.09,
            3: 0.2,
            4: 0.3,
            5: 0.4
        },
        "small": {
            1: 0.2,
            2: 0.3,
            3: 0.3,
            4: 0.1,
            5: 0.1
        },
        "heavy": {
            1: 0.4,
            2: 0.3,
            3: 0.15,
            4: 0.1,
            5: 0.05
        }
    }
    cpt_FEV1_bl = {
        'low': {
            'low': 0.6,
            'medium': 0.3,
            'high': 0.2,
        },
        'medium': {
            'low': 0.3,
            'medium': 0.4,
            'high': 0.3,
        },
        'high': {
            'low': 0.1,
            'medium': 0.4,
            'high': 0.5,
        }
    }

    def sample(self):
        inflammation = utils.threeStatesSample(list(self.marginal_i.values()),
                                               list(self.marginal_i.keys()))

        wellness = utils.nStatesSample(list(self.cpt_w_i[inflammation].values()),
                                 list(self.cpt_w_i[inflammation].keys()))

        bacterial_load = utils.threeStatesSample(list(self.cpt_bl_i[inflammation].values()),
                                            list(self.cpt_bl_i[inflammation].keys()))

        FEV1 = utils.threeStatesSample(list(self.cpt_FEV1_bl[bacterial_load].values()),
                                list(self.cpt_FEV1_bl[bacterial_load].keys()))

        return {
            "Inflammation": inflammation,
            "Wellness": wellness,
            "Bacterial load": bacterial_load,
            "FEV1": FEV1
        }
model = lungModel()
model.sample()

{'Inflammation': 'small',
 'Wellness': 3,
 'Bacterial load': 'low',
 'FEV1': 'high'}

In [7]:
# inference code
def observe(obs_B, key_value_pair_B):
    node_message_B = copy.copy(key_value_pair_B)
    for key, val in node_message_B.items():
        if key != obs_B: node_message_B[key] = 0
        else: node_message_B[key] = 1

    print("Observation:", node_message_B)
    return node_message_B

# factor node message
def message_down(dist):
    """
    dist: marginal distribution
    """
    print("Message down: ", dist)
    return dist

def factor_message_up(incoming_message, cpt_B_A):
    """
    The factor mode message is the product of all received messages on the other edges, multiplied by the conditional probability distribution for the factor and summed over all variables except the one being sent to
    :param incoming_message:
    :param cpt_B_A:
    :return:
    """
    # TODO: add a function to invert a conditional probability table to merge factor_message_up and factor_message_down
    # compute sum_B( P(B|A) )
    factor_message = utils.get_first_level_keys(cpt_B_A)
    for key_A, items_B in cpt_B_A.items():
        for key_B, p_B_A in items_B.items():
            factor_message[key_A] += p_B_A * incoming_message[key_B]

    message = utils.normalise(factor_message)
    print("Message up:", message)

    return message

def factor_message_down(incoming_message, cpt_B_A):
    """
    The factor mode message is the product of all received messages on the other edges, multiplied by the conditional probability distribution for the factor and summed over all variables except the one being sent to.
    :param incoming_message:
    :param cpt_B_A:
    :return:
    """
    # compute sum_B( P(B|A) )
    factor_message = utils.get_second_level_keys(cpt_B_A)
    for key_A, items_B in cpt_B_A.items():
        for key_B, p_B_A in items_B.items():
            factor_message[key_B] += p_B_A * incoming_message[key_A]

    message = utils.normalise(factor_message)
    print("Message down:", message)

    return message

def node_message(messages):
    """
    The variable node message is the product of all received messages on the other edges
    :param messages: incoming variable node messages
    :return: variable node belief message
    """
    # All messages have the same keys
    belief = utils.get_first_level_keys(messages[0], init_value=1)

    for key in belief.keys():
        for message in messages:
            belief[key] *= message[key]

    # Normalise
    p_A = utils.normalise(belief)
    
    print('Node belief:', p_A)
    return p_A


In [21]:
a = m.sample()
[a['Inflammation']]

['small']

## Gibbs sampling <a class="anchor" id="3.3"></a>

In [31]:
m = lungModel()

# Gibbs inference code
def gibbs_inference_code(I, B, n=100000):

    # Random initialisation
    sample = m.sample()
    inflammations = [sample[I]]
    bacterial_loads = [sample[B]]

    for j in np.arange(1,n):
        # sample from P( bacterial load | (n-1)th inflammation sample )
        point_mass_i = observe(inflammations[j-1], utils.get_first_level_keys(m.cpt_bl_i))
        dist_bl = factor_message_down(point_mass_i, m.cpt_bl_i)
        sample_bl = utils.threeStatesSample(list(dist_bl.values()), list(dist_bl.keys()))
        bacterial_loads.append(sample_bl)

        # use the above bacterial load sample to sample inflammation using P( inflammation | nth bacterial load sample)
        point_mass_bl = observe(bacterial_loads[j], utils.get_second_level_keys(m.cpt_bl_i))
        dist_i = factor_message_up(point_mass_bl, m.cpt_bl_i) # multiply by prior
        # message down from prior
        # compute node belief
        sample_i = utils.threeStatesSample(list(dist_i.values()), list(dist_i.keys()))
        inflammations.append(sample_i)

    return bacterial_loads, inflammations

# Data processing
def get_marginal(name, samples):
    """

    :param name: identifier
    :param samples: a list of values
    :return: distinct values count in list, and related proportions (probability estimate)
    """

    marginal = pd.DataFrame(data={name: samples})
    marginal = pd.DataFrame(data={'count': marginal.value_counts()}).reset_index()
    marginal['P(count)'] = marginal['count']/marginal['count'].sum()
    
    return marginal

In [34]:
# Gibbs sampling implementation for two variables

# Variables
I = 'Inflammation'
B = 'Bacterial load'
    
n_high=1000; n_low=100
b, i = gibbs_inference_code(I, B, n=n_high)
b_fuzzy, i_fuzzy = gibbs_inference_code(I, B, n=n_high)

# Marginal distribution of inflammation
i_marginal = get_marginal(I, i)
i_marginal_fuzzy = get_marginal(I, i_fuzzy)
i_marginal

Observation: {'absent': 1, 'small': 0, 'heavy': 0}
Message down: {'low': 0.6, 'medium': 0.2, 'high': 0.2}
Observation: {'low': 0, 'medium': 1, 'high': 0}
Message up: {'absent': 0.25, 'small': 0.37499999999999994, 'heavy': 0.37499999999999994}
Observation: {'absent': 1, 'small': 0, 'heavy': 0}
Message down: {'low': 0.6, 'medium': 0.2, 'high': 0.2}
Observation: {'low': 0, 'medium': 0, 'high': 1}
Message up: {'absent': 0.16666666666666666, 'small': 0.3333333333333333, 'heavy': 0.4999999999999999}
Observation: {'absent': 0, 'small': 1, 'heavy': 0}
Message down: {'low': 0.3, 'medium': 0.3, 'high': 0.4}
Observation: {'low': 1, 'medium': 0, 'high': 0}
Message up: {'absent': 0.6000000000000001, 'small': 0.30000000000000004, 'heavy': 0.10000000000000002}
Observation: {'absent': 1, 'small': 0, 'heavy': 0}
Message down: {'low': 0.6, 'medium': 0.2, 'high': 0.2}
Observation: {'low': 1, 'medium': 0, 'high': 0}
Message up: {'absent': 0.6000000000000001, 'small': 0.30000000000000004, 'heavy': 0.100000

Unnamed: 0,Inflammation,count,P(count)
0,heavy,354,0.354
1,small,327,0.327
2,absent,319,0.319


In [35]:
i_marginal_fuzzy

Unnamed: 0,Inflammation,count,P(count)
0,heavy,339,0.339
1,absent,337,0.337
2,small,324,0.324


In [None]:
# Marginal distribution of bacterial load
b_marginal = get_marginal(B, b)
b_marginal

In [None]:
plotFigure(i_marginal, I, column=1, title="Marginal distribution for inflammation for "+str(n_high)+" iterations")
plotFigure(i_marginal_fuzzy, I, column=1, title="Marginal distribution for inflammation "+str(n_low)+" iterations")
plotFigure(b_marginal, B, column=1, title="Marginal distribution for inflammation for "+str(n_high)+" iterations")