In [1]:
import random

import numpy as np
import pandas as pd
import inspect
import matplotlib.pyplot as plt
import seaborn as sns
toothache = pd.read_csv("data/toothache.csv")
toothache

Unnamed: 0,p,cavity,toothache,catch
0,0.108,1,1,1
1,0.012,1,1,0
2,0.016,1,0,1
3,0.064,1,0,0
4,0.072,0,1,1
5,0.008,0,1,0
6,0.144,0,0,1
7,0.576,0,0,0


In [4]:
class Thing:
    def __init__(self, name=''):
        self.name = name
    def __repr__(self):
        return self.name

class Toothache(Thing):
    def __init__(self, file_path="data/toothache.csv", name=''):
        super().__init__(name)
        self.table = pd.read_csv(file_path)
    # 查询分布
    def cal_p_cond(self, xs, conditions=[]):
        result = self.table
        for k, v in conditions:
            result = result[result[k] == v]
        result = result[['p'] + xs]
        if len(result) > 0:
            result = result.groupby(xs).sum()
            result = result / result.sum()
            print("P({}|{})={}".format(xs, conditions, result.to_json()))
        return result
    # 查询and关系，注意这样会对xs进行布尔取值
    def cal_p_and(self, xs, conditions=[]):
        result = self.table
        for k, v in conditions:
            result = result[result[k] == v]
        result = result[result[xs].all(axis=1)]
        result = result['p']
        if len(result) > 0:
            result = result.sum()
            print("P({}|{})={}".format(xs, conditions, result))
        return result
    # 查询or关系，注意这样会对xs进行布尔取值
    def cal_p_or(self, xs, conditions=[]):
        result = self.table
        for k, v in conditions:
            result = result[result[k] == v]
        result = result[result[xs].any(axis=1)]
        result = result['p']
        if len(result) > 0:
            result = result.sum()
            print("P({}|{})={}".format(xs, conditions, result))
        return result

toothache = Toothache()
# 分布
toothache.cal_p_cond(['cavity'], [('toothache', 1)])
toothache.cal_p_cond(['cavity'], [('toothache', 0)])
toothache.cal_p_cond(['catch'], [('toothache', 1)])
toothache.cal_p_cond(['catch'], [('toothache', 0)])
toothache.cal_p_cond(['cavity', 'catch'], [('toothache', 1)])
toothache.cal_p_cond(['cavity', 'catch'], [('toothache', 0)])
toothache.cal_p_cond(['catch'], [('toothache', 1), ('cavity', 1)])
toothache.cal_p_cond(['catch'], [('toothache', 0), ('cavity', 1)])
toothache.cal_p_cond(['catch'], [('toothache', 0), ('cavity', 0)])
# 与关系
toothache.cal_p_and(['cavity','toothache','cavity'])
# 或关系
toothache.cal_p_or(['cavity','toothache'])


P(['cavity']|[('toothache', 1)])={"p":{"0":0.4,"1":0.6}}
P(['cavity']|[('toothache', 0)])={"p":{"0":0.9,"1":0.1}}
P(['catch']|[('toothache', 1)])={"p":{"0":0.1,"1":0.9}}
P(['catch']|[('toothache', 0)])={"p":{"0":0.8,"1":0.2}}
P(['cavity', 'catch']|[('toothache', 1)])={"p":{"(0, 0)":0.04,"(0, 1)":0.36,"(1, 0)":0.06,"(1, 1)":0.54}}
P(['cavity', 'catch']|[('toothache', 0)])={"p":{"(0, 0)":0.72,"(0, 1)":0.18,"(1, 0)":0.08,"(1, 1)":0.02}}
P(['catch']|[('toothache', 1), ('cavity', 1)])={"p":{"0":0.1,"1":0.9}}
P(['catch']|[('toothache', 0), ('cavity', 1)])={"p":{"0":0.8,"1":0.2}}
P(['catch']|[('toothache', 0), ('cavity', 0)])={"p":{"0":0.8,"1":0.2}}
P(['cavity', 'toothache', 'cavity']|[])=0.12
P(['cavity', 'toothache']|[])=0.28


0.28

In [2]:
# 推广-如果是wumpus世界，联合分布表是怎样的？
# 假设每个格子有0.2的概率出现pit，我们只处理对pit的猜测，因此这个表有16列
# 按0-1取值，那么联合分布表里就有2^16=65536条数据
def gen_pit_table(p=0.2, n=4):
    columns = []
    for x in range(n):
        for y in range(n):
            columns.append((x, y))
    columns = columns + ['p']
    data = []
    recurse_pit_state([], 1, n * n, data)
    pit = pd.DataFrame(data=data, columns=columns)
    return pit

def recurse_pit_state(state, p, n, pit_arr):
    if len(state) == n:
        pit_arr.append(state + [p])
    else:
        recurse_pit_state(state + [1], p * 0.2, n, pit_arr)
        recurse_pit_state(state + [0], p * 0.8, n, pit_arr)

pit = gen_pit_table()
pit

Unnamed: 0,"(0, 0)","(0, 1)","(0, 2)","(0, 3)","(1, 0)","(1, 1)","(1, 2)","(1, 3)","(2, 0)","(2, 1)","(2, 2)","(2, 3)","(3, 0)","(3, 1)","(3, 2)","(3, 3)",p
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,6.553600e-12
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2.621440e-11
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,2.621440e-11
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1.048576e-10
4,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,2.621440e-11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65531,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,7.036874e-03
65532,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1.759219e-03
65533,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,7.036874e-03
65534,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,7.036874e-03
