#List of medical symptoms

###Scope
- Download standard symptime list
- Determine synonyms or layman's terms

###Data Sources:

- Wikipedia
 - [List of medical symptoms](http://en.wikipedia.org/wiki/List_of_medical_symptoms)
 - [List of ICD-9 codes 780–799: symptoms, signs, and ill-defined conditions](http://en.wikipedia.org/wiki/List_of_ICD-9_codes_780–799:_symptoms,_signs,_and_ill-defined_conditions)
 - [ICD-10 Chapter XVIII: Symptoms, signs and abnormal clinical and laboratory findings](http://en.wikipedia.org/wiki/ICD-10_Chapter_XVIII:_Symptoms,_signs_and_abnormal_clinical_and_laboratory_findings#.28R00.E2.80.93R09.29_Circulatory_and_respiratory_systems)
- FamilyDoctor
 - [Search by Symptom](http://familydoctor.org/familydoctor/en/health-tools/search-by-symptom.html)

#Imports

In [1]:
from pprint import pprint
from pattern.web import Wikipedia
from pattern.en import wordnet
import pickle

#Constants

#Code

In [2]:
def getTermsFromWikipedia(article_name, selected_sections=['illness', 'the list']):
    article = Wikipedia().search('List of medical symptoms')
    all_terms = []
    for section in article.sections:
        if section.title.strip().lower() not in selected_sections:
            continue

        print repr(' ' * section.level + section.title), 
        print 'Links:', len(section.links)
        all_terms += [str(x.split('(')[0]).lower().strip() for x in section.links]

    print 'total terms:', len(all_terms)
    all_terms = sorted(list(set(all_terms)))  # dedupe
    print 'terms after dedupe', len(all_terms)
    return all_terms

In [3]:
def show_synonyms(wordlist, threshold=1):
    for term in wordlist:
        for s in wordnet.synsets(term):
            if len(s.synonyms) > threshold:
                print "'{}'".format(term), [str(x) for x in s.synonyms if x != term]

In [4]:
def show_self_synonyms(wordlist):
    for term in wordlist:
        for s in wordnet.synsets(term):
            if len(s.synonyms) == 1:
                print term, [str(x) for x in s.synonyms]

In [5]:
def no_synonym_terms(wordlist):
    return [term for term in wordlist if not wordnet.synsets(term)]

#Analysis

In [6]:
all_terms = getTermsFromWikipedia('List of medical symptoms')

u'  Illness' Links: 40
u' The List' Links: 149
total terms: 189
terms after dedupe 167


In [7]:
# terms with no synonym
no_synonym_terms(all_terms)

['-phobia',
 'abdominal pain',
 'acalculia',
 'akathisia',
 'altered state of consciousness',
 'amaurosis fugax',
 'amusia',
 'anosognosia',
 'antepartum haemorrhage',
 'back pain',
 'bloating',
 'blood in stool',
 'bloody show',
 'blurred vision',
 'bradykinesia',
 'bradypnea',
 'cataplexy',
 'chills',
 'chronic pelvic pain',
 "dalrymple's sign",
 'drug overdose',
 'dysdiadochokinesia',
 'dysgeusia',
 'dyspareunia',
 'dystonia',
 'fainting',
 'fecal incontinence',
 'hemiballismus',
 'homicidal ideation',
 'hypoventilation',
 'icd-10',
 'loss of appetite',
 'mucopurulent discharge',
 'muscle weakness',
 'pathologic nystagmus',
 'pleuritic chest pain',
 'proctalgia fugax',
 'prosopagnosia',
 'rectal pain',
 'retrograde ejaculation',
 'self-injury',
 'strangury',
 'suicidal ideation',
 'tachypnea',
 'urinary frequency',
 'weight gain',
 'weight loss']

In [8]:
# Find synonyms
show_synonyms(all_terms, threshold=0)

'abrasion' ['scratch', 'scrape', 'excoriation']
'abrasion' ['attrition', 'corrasion', 'detrition']
'abrasion' ['grinding', 'attrition', 'detrition']
'acrophobia' []
'agnosia' []
'agoraphobia' []
'akinesia' ['akinesis']
'alexia' ['visual aphasia', 'word blindness']
'alopecia' []
'amaurosis' []
'amnesia' ['memory loss', 'blackout']
'anasarca' []
'anhedonia' []
'anomic aphasia' ['nominal aphasia', 'anomia', 'amnesic aphasia', 'amnestic aphasia']
'anorexia' []
'anosmia' []
'anxiety' ['anxiousness']
'anxiety' []
'aphasia' []
'apnea' []
'apraxia' []
'arachnophobia' []
'arrhythmia' ['cardiac arrhythmia']
'asthenia' ['astheny']
'ataxia' ['ataxy', 'dyssynergia', 'motor ataxia']
'belching' []
'belching' ['belch', 'burp', 'burping', 'eructation']
'bleeding' ['hemorrhage', 'haemorrhage']
'blindness' ['sightlessness', 'cecity']
'blister' []
'blister' []
'blister' ['bulla', 'bleb']
'bradycardia' []
'bruise' ['contusion']
'cachexia' ['cachexy', 'wasting']
'chest pain' []
'childbirth' ['childbearing',

#Manually scrub terms from Wikipedia

In [9]:
selected_terms = [
 'pain',
 'bloating',
 'stool',
 'bloody',
 'blurred',
 'chills',
 'fainting',
 'fecal',
 'incontinence',
 'hypoventilation',
 'appetite',
 'muscle',
 'weakness',
 'rectal',
 'injury',
 'urine',
 'weight gain',
 'weight loss']

In [10]:
selected_terms2 = [
'amnesia','memory loss', 'blackout',
'anorexia',
'anxiety', 'anxiousness',
'apnea',
'arrhythmia',
'belching','belch', 'burp', 'burping',
'bleeding','hemorrhage', 'haemorrhage',
'blindness',
'blister',
'bruise','contusion',
'wasting',
'chorea',
'lameness', 'limping', 'gimp',
'claustrophobia',
'constipation', 'irregularity',
'convulsion', 'fit',
'cough', 'coughing', 
'cramp', 'spasm',
'deformity',
'depression', 'low', 'blue'
'perspiration', 'sweating',
'diarrhea','diarrhoea', 'bowels',
'double vision',
'dizziness', 'lightheadedness', 'vertigo',
'dry mouth',
'indigestion', 'stomach',
'edema', 'dropsy',
'nosebleed',
'fatigue', 'weariness', 'tiredness',
'fever',
'flatulence','flatulency', 'gas', 'turgid',
'hallucination','delusion',
'headache' 'ache',
'hearing', 
'hyperthermia',
'hyperventilation',
'hypothermia',
'impotence', 'impotency',
'infertility',
'insomnia',
'itch', 'scabies', 'itchiness', 'itching',
'jaundice',
'malaise',
'mania'
'miscarriage', 'stillbirth',
'nausea', 'sickness',
'earache',
'pain', 'hurting', 'painful',
'palpitation', 'shaking', 'shakiness', 'trembling', 'quiver', 'quivering', 'vibration',
'paralysis', 'palsy',
'paranoia', 
'perspiration', 'sweat',
'phobia', 'phobic',
'fever', 'feverishness',
'heartburn', 
'rash',
'stiffness', 
'sciatica', 
'shivering', 'chill', 
'somnolence', 'sleepiness', 'drowsiness', 
'sputum', 'phlegm', 
'swelling', 'puffiness', 'lump',
'thirst', 'thirstiness', 'hunger', 'hungriness',
'tic',
'tinnitus', 
'toothache', 
'tremor', 'shudder',
'tuberculosis', 'tb',
'urinary', 'incontinence', 
'hives', 
'vagina',
'vertigo', 'dizziness', 'lightheadedness',
'vomiting', 'vomit', 'puking',
'wound', 'lesion',
'dry mouth'
    ]

In [11]:
other_terms = ['bleed', 'ache', 'sore', 'dizzy', 'nausious', 'sick',
               'tingle', 'tingling', 'infection', 'pneumonia', 'throat', 'wheezing', 'gout']

In [12]:
combined_symptoms = sorted(list(set(selected_terms+selected_terms2+other_terms)))
print len(combined_symptoms)
combined_symptoms

162


['ache',
 'amnesia',
 'anorexia',
 'anxiety',
 'anxiousness',
 'apnea',
 'appetite',
 'arrhythmia',
 'belch',
 'belching',
 'blackout',
 'bleed',
 'bleeding',
 'blindness',
 'blister',
 'bloating',
 'bloody',
 'blueperspiration',
 'blurred',
 'bowels',
 'bruise',
 'burp',
 'burping',
 'chill',
 'chills',
 'chorea',
 'claustrophobia',
 'constipation',
 'contusion',
 'convulsion',
 'cough',
 'coughing',
 'cramp',
 'deformity',
 'delusion',
 'depression',
 'diarrhea',
 'diarrhoea',
 'dizziness',
 'dizzy',
 'double vision',
 'dropsy',
 'drowsiness',
 'dry mouth',
 'earache',
 'edema',
 'fainting',
 'fatigue',
 'fecal',
 'fever',
 'feverishness',
 'fit',
 'flatulence',
 'flatulency',
 'gas',
 'gimp',
 'gout',
 'haemorrhage',
 'hallucination',
 'headacheache',
 'hearing',
 'heartburn',
 'hemorrhage',
 'hives',
 'hunger',
 'hungriness',
 'hurting',
 'hyperthermia',
 'hyperventilation',
 'hypothermia',
 'hypoventilation',
 'impotence',
 'impotency',
 'incontinence',
 'indigestion',
 'infection

In [14]:
pickle.dump( combined_symptoms, open( "symptoms.p", "wb" ))