### Expert Knowledge / Assumptions for Probabilities

1. Female MS patients experience more pain than men.
2. For the initial day, the probability that participants will Not Record their symptoms is very low.
3. Patients in the younger age group ( < 30) have more severe symptoms than patients in the older age group (> 60). This is because older patients have learnt to manage their symptoms, but younger patients are more active and have higher pain.
4. Patients in the middle age group have moderate level of pain

### Libraries

In [1]:
import pickle
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator
import networkx as nx
import pylab as plt
from pgmpy.inference import VariableElimination
import pdb
import seaborn as sns
import matplotlib.pyplot as plt   
from sklearn.metrics import confusion_matrix
from scipy.stats import truncnorm
from networkx.drawing.nx_agraph import graphviz_layout
import numpy as np
from sklearn.metrics import precision_score, recall_score, balanced_accuracy_score, accuracy_score
import os

### Functions

In [4]:
import pandas as pd
from scipy.stats import truncnorm
import numpy as np

In [5]:
def generate_ISP_samples():
    
    age_keys = ['60 and Older', 'Between 30 to 60', 'Younger than 30']
    age_values = [0.1, 0.8, 0.1]
    gender_keys = ['Female', 'Male'] 
    gender_values = [0.7, 0.3]
    pain_keys = ['High', 'Low', 'Medium', 'None', 'Not Recorded']
    fatigue_keys = ['High', 'Low', 'Medium', 'None', 'Not Recorded']
    
    #Sample from Distribution:
    
    #Gender:
    gender_prob = dict(zip(gender_keys,sample_distribution(gender_values)))

    #Age:
    age_prob = dict(zip(age_keys,sample_distribution(age_values)))
    
    #Pain:
    pain_df = create_pain_df(gender_keys, age_keys, pain_keys)
    
    #Fatigue:
    fatigue_df = create_fatigue_df(gender_keys, age_keys, fatigue_keys)
    
    
    return age_prob, gender_prob, pain_df, fatigue_df


def sample_truncnorm(prob):
    '''
    Picks random value from truncated normal distribution
    '''
    low = 0.0
    upp = prob*10
    mean = np.mean([low,upp])
    sd = mean/4
    return truncnorm(
        (low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd).rvs()

def sample_distribution(arr):
    n = len(arr)
    dist = np.zeros(n)
    for i in range(n):
        dist[i] = sample_truncnorm(arr[i])
    #Normalize:
    dist = dist / np.sum(dist)
    return dist
     
def create_df(gender,age,pain_keys,pain_val,col):
    pain_sample = sample_distribution(pain_val)
    glist = [gender]*5
    alist = [age]*5
    data = list(zip(glist, alist, pain_keys, pain_sample))
    df = pd.DataFrame(data=data,columns =col)
    return df


def create_pain_df(gender_keys, age_keys, pain_keys):
    col = ['Gender','Age','Pain','Conditional_Probability']
    pain_CP = pd.DataFrame(columns = col)

    for gender in gender_keys:
        for age in age_keys:

            if (gender == 'Female' and age == '60 and Older'):
                pain_val = [0.02,0.30,0.08,0.57,0.02]
                df = create_df(gender,age,pain_keys,pain_val,col)
                pain_CP = df

            if (gender == 'Female' and age == 'Between 30 to 60'):
                pain_val = [0.02,0.53,0.15,0.27,0.03]
                df = create_df(gender,age,pain_keys,pain_val,col)
                pain_CP = pain_CP.append(df, ignore_index=True)

            if (gender == 'Female' and age == 'Younger than 30'):
                pain_val = [0.25,0.30,0.25,0.12,0.08]
                df = create_df(gender,age,pain_keys,pain_val,col)
                pain_CP = pain_CP.append(df,ignore_index=True)

            if (gender == 'Male' and age == '60 and Older'):
                pain_val = [0.02,0.20,0.05,0.70,0.03]
                df = create_df(gender,age,pain_keys,pain_val,col)
                pain_CP = pain_CP.append(df, ignore_index=True)

            if (gender == 'Male' and age == 'Between 30 to 60'):
                pain_val = [0.10,0.40,0.30,0.20,0.10]
                df = create_df(gender,age,pain_keys,pain_val,col)
                pain_CP = pain_CP.append(df, ignore_index=True)

            if (gender == 'Male' and age == 'Younger than 30'):
                pain_val = [0.20,0.15,0.20,0.37,0.08]
                df = create_df(gender,age,pain_keys,pain_val,col)
                pain_CP = pain_CP.append(df,ignore_index=True)
    # 
    return pain_CP

def create_fatigue_df(gender_keys, age_keys, fatigue_keys):
    col = ['Gender','Age','Fatigue','Conditional_Probability']
    fatigue_CP = pd.DataFrame(columns = col)

    for gender in gender_keys:
        for age in age_keys:

            if (gender == 'Female' and age == '60 and Older'):
                fatigue_val = [0.03,0.26,0.16,0.53,0.02]
                df = create_df(gender,age,fatigue_keys,fatigue_val,col)
                fatigue_CP = df

            if (gender == 'Female' and age == 'Between 30 to 60'):
                fatigue_val = [0.04,0.40,0.35,0.18,0.03]
                df = create_df(gender,age,fatigue_keys,fatigue_val,col)
                fatigue_CP = fatigue_CP.append(df, ignore_index=True)

            if (gender == 'Female' and age == 'Younger than 30'):
                fatigue_val = [0.40,0.22,0.20,0.10,0.08]
                df = create_df(gender,age,fatigue_keys,fatigue_val,col)
                fatigue_CP = fatigue_CP.append(df, ignore_index=True)

            if (gender == 'Male' and age == '60 and Older'):
                fatigue_val = [0.03,0.15,0.10,0.69,0.03]
                df = create_df(gender,age,fatigue_keys,fatigue_val,col)
                fatigue_CP = fatigue_CP.append(df, ignore_index=True)

            if (gender == 'Male' and age == 'Between 30 to 60'):
                fatigue_val = [0.10,0.30,0.25,0.25,0.10]
                df = create_df(gender,age,fatigue_keys,fatigue_val,col)
                fatigue_CP = fatigue_CP.append(df, ignore_index=True)

            if (gender == 'Male' and age == 'Younger than 30'):
                fatigue_val = [0.40,0.10,0.30,0.12,0.08]
                df = create_df(gender,age,fatigue_keys,fatigue_val,col)
                fatigue_CP = fatigue_CP.append(df, ignore_index=True)
    return fatigue_CP

### Sample from Distribution

In [10]:
age_prob, gender_prob, pain_df, fatigue_df = generate_ISP_samples()

In [29]:
age_prob

{'60 and Older': 0.0573552775892085,
 'Between 30 to 60': 0.8784413223662683,
 'Younger than 30': 0.06420340004452331}

In [12]:
gender_prob

{'Female': 0.655047775884212, 'Male': 0.34495222411578796}

In [32]:
gender_prob.values()

dict_values([0.655047775884212, 0.34495222411578796])

In [18]:
pain_df

Unnamed: 0,Gender,Age,Pain,Conditional_Probability
0,Female,60 and Older,High,0.018258
1,Female,60 and Older,Low,0.36741
2,Female,60 and Older,Medium,0.077223
3,Female,60 and Older,,0.513573
4,Female,60 and Older,Not Recorded,0.023536
5,Female,Between 30 to 60,High,0.019841
6,Female,Between 30 to 60,Low,0.568154
7,Female,Between 30 to 60,Medium,0.100359
8,Female,Between 30 to 60,,0.26731
9,Female,Between 30 to 60,Not Recorded,0.044337


In [14]:
fatigue_df

Unnamed: 0,Gender,Age,Fatigue,Conditional_Probability
0,Female,60 and Older,High,0.031344
1,Female,60 and Older,Low,0.318841
2,Female,60 and Older,Medium,0.147449
3,Female,60 and Older,,0.492376
4,Female,60 and Older,Not Recorded,0.009989
5,Female,Between 30 to 60,High,0.033906
6,Female,Between 30 to 60,Low,0.367339
7,Female,Between 30 to 60,Medium,0.500264
8,Female,Between 30 to 60,,0.07761
9,Female,Between 30 to 60,Not Recorded,0.020881


In [25]:
np.where((pain_df['Pain']=='High') & (pain_df['Gender']=='Male') & )

(array([15, 20, 25]),)