## Reinforcement Learning Environment

In [144]:
import gym
from gym import spaces
import pygame
import numpy as np
from typing import Dict, Tuple, List
import csv

In [145]:
@dataclasses.dataclass
class Transition:
    state: np.array
    action: str 
    next_state: np.array
    reward: float 

class Env: 
    _condition_symptom_probabilities: Dict[str, Dict[str, float]] # conditions with symptoms and their probabilities
    _actions: set[str] # symptoms
    _init_state: np.array
    _current_state: np.array
    _img: np.array
    _condition: str
    _symptoms_of_condition: Dict[str, float] # symptoms of conditions
    
    def __init__(self,
                 img: np.array,
                 condition: str,
                ) -> None:  
        self._img = img
        self._condition = condition

        # init condition_symptom_probabilities from health knowledge graph
        self._condition_symptom_probabilities= dict()
        with open('HealthKnowledgeGraph.csv', newline='') as csvfile:
            reader = csv.reader(csvfile, delimiter=',')
            reader.__next__() # skip header
            for condition in reader:
                self._condition_symptom_probabilities[condition[0]] = dict()
                for symptom_prob in condition[1].split(','):
                    # examples for symptom_prob: pain (0.318), fever (0.119) or swelling (0.112)
                    symptom = symptom_prob.split('(')[0].strip()
                    prob = float(symptom_prob.split('(')[1].split(')')[0])
                    self._condition_symptom_probabilities[condition[0]][symptom] = prob 

        # check if condition is valid
        if(self._condition not in self._condition_symptom_probabilities.keys()):
            raise ValueError('Unknow Condition: ' + condition + '. Please choose one of the following: ' + str(self._condition_symptom_probabilities.keys()))
        
        # init symptoms_of_condition for easier access
        self._symptoms_of_condition = dict()
        for symptom in self._condition_symptom_probabilities[self._condition]:
            self._symptoms_of_condition[symptom] = self._condition_symptom_probabilities[self._condition][symptom] 
    
        # init actions
        self._actions = set()
        for condition in self._condition_symptom_probabilities.keys(): 
            for symptom in list(self._condition_symptom_probabilities[condition]): 
                self._actions.add(symptom)   

        # init init_state = vector with cnn output (probabilities per condition) and history of asked symptoms (0=not asked, 1=symptom is present, -1=symptom is not present)
        visual_prior = np.random.uniform(size=(len(self._condition_symptom_probabilities.keys()))) #TODO: replace with cnn output
        self._init_state = np.concatenate((visual_prior,np.zeros((len(self._actions)))), axis=0)
        self._current_state = self._init_state 

    def posterior_of_condition(self, condition: str) -> float:
        #TODO: make it faster with matrix multiplication
        likelihood=1
        for idx, symptom in enumerate(self._actions):
            patient_answer = self._current_state[idx+len(self._condition_symptom_probabilities.keys())]
            if (patient_answer!=0) and (symptom not in self._condition_symptom_probabilities[condition].keys()):
                likelihood*= 0.0001 # TODO: laplace smoothing correct?
            elif patient_answer==1:
                likelihood*= self._condition_symptom_probabilities[condition][symptom]
            elif patient_answer==-1:
                likelihood*= (1-self._condition_symptom_probabilities[condition][symptom])

        prior = self._current_state[list(self._condition_symptom_probabilities.keys()).index(condition)] 
        return likelihood*prior
    
    def reward(self) -> float:
        #TODO: Is it a problem when the reward gets smaller and smaller?
        return self.posterior_of_condition(self._condition)
    
    def has_symptom(self, symptom: str) -> bool:
        if symptom not in self._symptoms_of_condition:
            return False
        else:
            phi = np.random.uniform()
            return phi <= self._symptoms_of_condition[symptom]

    def step(self, action: str) -> Transition:
        #check if action is valid
        if(action not in self._actions):
            raise ValueError('Unknow Action: ' + action + '. Please choose one of the following: ' + str(self._actions))
        
        old_state = self._current_state.copy()
        self._current_state[len(self._condition_symptom_probabilities.keys()) + list(self._actions).index(action)] = 1 if self.has_symptom(action) else -1
        
        return Transition(old_state, action, self._current_state, self.reward())
    
    def reset(self) -> None:
        self._current_state = self._init_state

### Test cases

In [146]:
#Testing simulated patient answers
myEnv=Env(np.array([]), 'abscess')
print("Symptoms for abscess:")
print(myEnv._condition_symptom_probabilities['abscess'])
n=0
prob=0
for i in range(10000):
    n+=1
    if myEnv.has_symptom('lump'):
        prob+=1 
print("\nProbability of lump: " + str(prob/n))

Symptoms for abscess:
{'pain': 0.318, 'fever': 0.119, 'swelling': 0.112, 'redness': 0.094, 'chills': 0.092, 'infection': 0.083, 'cyst': 0.047, 'tenderness': 0.037, 'rectal pain': 0.026, 'lesion': 0.025, 'lump': 0.023, 'sore throat': 0.021, 'facial swelling': 0.016, 'pimple': 0.016, 'discomfort': 0.014, 'difficulty swallowing': 0.013, 'cavity': 0.013, 'night sweats': 0.007, 'severe pain': 0.007, 'abdominal pain': 0.007, 'painful swallowing': 0.007, 'back pain': 0.006}

Probability of lump: 0.0244


In [152]:
#Testing reward
myEnv=Env(np.array([]), 'abscess')
print("prior of condition:")
print(myEnv._current_state[list(myEnv._condition_symptom_probabilities.keys()).index("abscess")])

myEnv.step('pain')
result=myEnv._current_state[len(myEnv._condition_symptom_probabilities.keys()) + list(myEnv._actions).index('pain')] 
print("Probability of pain: " + str(myEnv._condition_symptom_probabilities['abscess']['pain']))
print("Result patient asking if he has pain: " + str(result))

myEnv.step('fever')
result=myEnv._current_state[len(myEnv._condition_symptom_probabilities.keys()) + list(myEnv._actions).index('fever')] 
print("Probability of fever: " + str(myEnv._condition_symptom_probabilities['abscess']['fever']))
print("Result patient asking if he has fever: " + str(result))

myEnv.step('swelling')
result=myEnv._current_state[len(myEnv._condition_symptom_probabilities.keys()) + list(myEnv._actions).index('swelling')] 
print("Probability of swelling: " + str(myEnv._condition_symptom_probabilities['abscess']['swelling']))
print("Result patient asking if he has swelling: " + str(result))

print("Reward: " + str(myEnv.reward()))

prior of condition:
0.7992065065582021
Probability of pain: 0.318
Result patient asking if he has pain: -1.0
Probability of fever: 0.119
Result patient asking if he has fever: -1.0
Probability of swelling: 0.112
Result patient asking if he has swelling: -1.0
Reward: 0.42641479020233763
