<a href="https://colab.research.google.com/github/vivek0464/Credit-Card-Fraud-Detection-and-Robot-Localization/blob/main/Bayesian_Network_and_Inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pomegranate

In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
import random
from pomegranate import *
import enum

# convention: 0 - Travel, 1 - OwnsDevice, 2 - Fraud, 3 - ForeignPurchase , 4 - OnlinePurchase
class RV(enum.Enum): 
     Travel = 0
     OwnsDevice = 1
     Fraud = 2
     ForeignPurchase = 3
     OnlinePurchase = 4

Travel = DiscreteDistribution ({'T': 0.05, 'F': 0.95})

OwnsDevice = DiscreteDistribution ({'T': 0.7, 'F': 0.3})

ForeignPurchase = ConditionalProbabilityTable(
    [['T', 'T', 0.88],
    ['T', 'F', 0.12],
    ['F', 'T', 0.0001],
    ['F', 'F', 0.9999]], [Travel])

OnlinePurchase = ConditionalProbabilityTable(
    [['T', 'T', 0.4],
    ['T', 'F', 0.6],
    ['F', 'T', 0.05],
    ['F', 'F', 0.95]], [OwnsDevice])

Fraud =  ConditionalProbabilityTable(
    [['T', 'T', 'T', 0.995],
    ['T', 'T', 'F', 0.005],
    ['T', 'F', 'T', 0.85],
    ['T', 'F', 'F', 0.15],
    ['F', 'T', 'T', 0.8],
    ['F', 'T', 'F', 0.2],
    ['F', 'F', 'T', 0.75],
    ['F', 'F', 'F', 0.25]], [Travel, OnlinePurchase])


s1 = State(Travel, name = "Travel")
s2 = State(OwnsDevice, name = "OwnsDevice")
s3 = State(ForeignPurchase, name = "ForeignPurchase")
s4 = State(OnlinePurchase, name = "OnlinePurchase")
s5 = State(Fraud, name = "Fraud")

graph = BayesianNetwork( "Credit card fraud detection system using Bayesian Networks" )
graph.add_states(s1, s2, s3, s4, s5)
graph.add_edge(s1, s3)
graph.add_edge(s2, s4)
graph.add_edge(s1, s5)
graph.add_edge(s4, s5)
graph.bake()

In [None]:
beliefs = graph.predict_proba({'Travel' : 'T'})
beliefs = map(str, beliefs)
print("\n".join( "{}\t{}".format( state.name, belief ) for state, belief in zip(graph.states, beliefs)))

Travel	T
OwnsDevice	{
    "class" : "Distribution",
    "dtype" : "str",
    "name" : "DiscreteDistribution",
    "parameters" : [
        {
            "T" : 0.6999999999999997,
            "F" : 0.3000000000000001
        }
    ],
    "frozen" : false
}
ForeignPurchase	{
    "class" : "Distribution",
    "dtype" : "str",
    "name" : "DiscreteDistribution",
    "parameters" : [
        {
            "T" : 0.8799999999999998,
            "F" : 0.12000000000000013
        }
    ],
    "frozen" : false
}
OnlinePurchase	{
    "class" : "Distribution",
    "dtype" : "str",
    "name" : "DiscreteDistribution",
    "parameters" : [
        {
            "T" : 0.29500000000000015,
            "F" : 0.7049999999999998
        }
    ],
    "frozen" : false
}
Fraud	{
    "class" : "Distribution",
    "dtype" : "str",
    "name" : "DiscreteDistribution",
    "parameters" : [
        {
            "T" : 0.892775,
            "F" : 0.10722500000000014
        }
    ],
    "frozen" : false
}


In [None]:
# Approximate inference using Gibbs Sampling

# fill using the bayesian network CPT's
# convention: (false, true)
pd_of_T = np.array((95./100 , 5./100))
pd_of_OD = np.array((30./100 , 70./100))
pd_of_FP_given_T = np.array(((99.99/100 , 0.01/100) , (12.0/100 , 88.0/100)))
pd_of_OP_given_OD = np.array(((95.0/100 , 5./100) , (60.0/100 , 40.0/100)))
pd_of_FR_given_T_and_OP = np.array((((25./100 , 75./100) , (20./100 , 80./100)) , ((15.0/100 , 85./100) , (0.5/100 , 99.5/100))))

# We need conditional probablities of random variables given values of others, which can be found out using CPT's and Markov Blanket
# We use the given formula for calculation of probability distribution of random variables given values of random variables part of it's markov blanket
# P(X|others) = P(X|Parents(X)) x Product_Function {for y in Children(X)} (P(Y|Parent(Y)))

# Markov Blanket = {FP, FR, OP}, P(T | FP, FR, OP) = alpha * P(T) * P(FP | T) * P(FR | T, OP)
def pd_of_T_given_mkb(value_of_FP , value_of_FR,  value_of_OP):
  return pd_of_T * ((pd_of_FP_given_T[0][value_of_FP] , pd_of_FP_given_T[1][value_of_FP])) * ((pd_of_FR_given_T_and_OP[0][value_of_OP][value_of_FR] , pd_of_FR_given_T_and_OP[1][value_of_OP][value_of_FR])) 

# Markov Blanket = {OP}, P(OD | OP) = alpha * P(OD) * P(OP | OD)
def pd_of_OD_given_mkb(value_of_OP) :
  return pd_of_OD * (pd_of_OP_given_OD[0][value_of_OP] , pd_of_OP_given_OD[1][value_of_OP])

# Markov Blanket = {T}, P(FP | T) = pd_of_FP_given_T
def pd_of_FP_given_mkb(value_of_T) :
  return pd_of_FP_given_T[value_of_T]

# Markov Blanket = {FR, T, OD}, P(OP | FR, T, OD) = alpha * P(OP | OD) * P(FR | T, OP)
def pd_of_OP_given_mkb(value_of_FR , value_of_T , value_of_OD) :
  return pd_of_OP_given_OD[value_of_OD] * ((pd_of_FR_given_T_and_OP[value_of_T][0][value_of_OD] , pd_of_FR_given_T_and_OP[value_of_T][1][value_of_OD]))

# Markov Blanket = {T, OP}, P(FR | T, OP) = pd_of_FR_given_T_and_OP
def pd_of_FR_given_mkb(value_of_T , value_of_OP) :
  return pd_of_FR_given_T_and_OP[value_of_T][value_of_OP]


In [None]:
# Sample using a given probability distribution
def sample_using_pd(prob_false , prob_true):
  sum = prob_true + prob_false
  prob_true = prob_true / sum
  prob_false = prob_false / sum
  random_value = np.random.uniform(0.0 , 1.0)
  if random_value <= prob_true:
    return 1
  else:
    return 0

# From the probability distribution of ith random variable, given value of others -> sample it's value
def sample_rv (i , X) :
  (prev_value_of_T , prev_value_of_OD, prev_value_of_FR, prev_value_of_FP, prev_value_of_OP) = (X[0], X[1], X[2], X[3], X[4])
  if i == RV.Travel.value:
    pd_of_ith_given_others = pd_of_T_given_mkb(prev_value_of_FP , prev_value_of_FR , prev_value_of_OP)
  elif i == RV.OwnsDevice.value:
    pd_of_ith_given_others = pd_of_OD_given_mkb(prev_value_of_OP)
  elif i == RV.Fraud.value:
    pd_of_ith_given_others = pd_of_FR_given_mkb(prev_value_of_T , prev_value_of_OP)
  elif i == RV.ForeignPurchase.value:
    pd_of_ith_given_others = pd_of_FP_given_mkb(prev_value_of_T)
  else:
    assert(i == RV.OnlinePurchase.value)
    pd_of_ith_given_others = pd_of_OP_given_mkb(prev_value_of_FR, prev_value_of_T , prev_value_of_OD)
  val = sample_using_pd(pd_of_ith_given_others[0] , pd_of_ith_given_others[1])

  return sample_using_pd(pd_of_ith_given_others[0] , pd_of_ith_given_others[1])

def gibbs_sampling(iterations = 10000, evidence_set = {}) :
  # separate out evidence variables, whose value remains fixed throughout sampling
  evidence_vars = [evidence for evidence in evidence_set]
  RVs = []
  for variable in range(5):
    if variable not in evidence_vars:
      RVs.append(variable)
  samples =  []

  # intialize the values of random variables
  X = [0 , 1 , 0 , 1 , 1]

  # correct the initial value of given evidence variables
  for i in range(5) :
    if i in evidence_vars:
      X[i] = evidence_set[i]
  
  samples.append([X[rv] for rv in range(5)])
  for iter in range(iterations):
    # choose a random variable to sample
    i = random.choice(RVs)
    # sample it's value
    val = sample_rv (i , X) 
    X[i] = val
    samples.append([X[rv] for rv in range(5)])
  return samples


def inference(evidence_set={}):
    max_iterations = 2000000
    # convention: 0 - Travel, 1 - OwnsDevice, 2 - Fraud, 3 - ForeignPurchase , 4 - OnlinePurchase
    samples = gibbs_sampling(iterations=max_iterations, evidence_set=evidence_set)
    T_true_cnt = 0
    OD_true_cnt = 0
    FP_true_cnt = 0
    OP_true_cnt = 0
    FR_true_cnt = 0
    iter = 0
    for sample in samples:
      iter = iter + 1
      T_true_cnt += sample[0]
      OD_true_cnt += sample[1]
      FR_true_cnt += sample[2]
      FP_true_cnt += sample[3]
      OP_true_cnt += sample[4]
    evidence_set = {RV(k).name: bool(v) for k, v in evidence_set.items()}
    print("Given evidence set: " + str(evidence_set))
    print("Travel: True = " + str(T_true_cnt * 1.00 / len(samples)) + ", False = " + str(1 - (T_true_cnt * 1.00 / len(samples))))
    print("OwnsDevice: True = " + str(OD_true_cnt * 1.00 / len(samples)) + ", False = " + str(1 - (OD_true_cnt * 1.00 / len(samples))))
    print("Fraud: True = " + str(FR_true_cnt * 1.00 / len(samples)) + ", False = " + str(1 - (FR_true_cnt * 1.00 / len(samples))))
    print("ForeignPurchase: True = " + str(FP_true_cnt * 1.00 / len(samples)) + ", False = " + str(1 - (FP_true_cnt * 1.00 / len(samples))))
    print("OnlinePurchase: True = " + str(OP_true_cnt * 1.00 / len(samples)) + ", False = " + str(1 - (OP_true_cnt * 1.00 / len(samples))))

In [None]:
# for query 1, when there is no evidence
inference()

Given evidence set: {}
Travel: True = 0.05063297468351266, False = 0.9493670253164873
OwnsDevice: True = 0.7028461485769257, False = 0.29715385142307427
Fraud: True = 0.7725806137096931, False = 0.2274193862903069
ForeignPurchase: True = 0.04471047764476118, False = 0.9552895223552388
OnlinePurchase: True = 0.3047273476363262, False = 0.6952726523636739


In [None]:
# for query 2, when OwnsDevice = true is given
inference(evidence_set={RV.OwnsDevice.value:1})

Given evidence set: {'OwnsDevice': True}
Travel: True = 0.04892997553501223, False = 0.9510700244649878
OwnsDevice: True = 1.0, False = 0.0
Fraud: True = 0.7770676114661943, False = 0.2229323885338057
ForeignPurchase: True = 0.04315897842051079, False = 0.9568410215794892
OnlinePurchase: True = 0.41714679142660427, False = 0.5828532085733957


In [None]:
# for query 3, when  Travel = true  and OwnsDevice = true is given
inference(evidence_set={RV.Travel.value:1, RV.OwnsDevice.value:1})

Given evidence set: {'Travel': True, 'OwnsDevice': True}
Travel: True = 1.0, False = 0.0
OwnsDevice: True = 1.0, False = 0.0
Fraud: True = 0.9134255432872284, False = 0.08657445671277164
ForeignPurchase: True = 0.8802490598754701, False = 0.11975094012452991
OnlinePurchase: True = 0.4374237812881094, False = 0.5625762187118906
