# Data Visualization of Section Identification

## Set up enviornment and import notebook modules

In [None]:
import itertools as it
from nbharness import AppNotebookHarness, create_subplots

text_cells = False
harness = AppNotebookHarness()
analyzer = harness.get_analyzer()

## Sample of a discharge summary note

In [None]:
from sidmodel import AnnotationNote, NoteFeatureDocument
doc: NoteFeatureDocument = next(iter(analyzer.doc_stash.values()))
anon: AnnotationNote = doc.anon_note
if text_cells:
    print('hadm:', anon.hadm_id, ', row_id:', anon.row_id, ', category:', anon.category)
    print(doc.text[:300])

## Sentence sample

Show a few sentences and the first three tokens of each.

In [None]:
if text_cells:
    for sent in it.islice(doc.sents, 2):
        print('<<' + sent.text + '>>')
        for tok in sent.token_iter(3):
            print(tok, tok.cui_, tok.pref_name_)
        print('-' * 80)

## Sample of header and section

In [None]:
if text_cells:
    for sec in it.islice(anon.sections, 2):
        toks = doc.get_section_tokens(sec)
        head_toks = ' '.join(map(lambda t: t.norm, filter(lambda t: t.is_header, toks)))
        body_toks = ' '.join(map(lambda t: t.norm, toks))
        print(head_toks)
        print('-' * 40)
        print(body_toks)
        print('=' * 80)

## Plot CUIs by Section 2D

Extract the CUIs from each section across all notes and plot across each section.

In [None]:
analyzer = harness.get_analyzer(
    tfidf_threshold = 0.01,
    outlier_proportion=0.1
)
analyzer.tfidf_sections = {'past-surgical-history', 'past-medical-history'}
analyzer.section_plot_2d()

## Plot Section 3D

24 hour events, addendum, history hospital: this is what patient's time in hospital--their story
asses and plan, and hosptial course are more overview of treatment plan

In [None]:
sec_comb = 0
analyzer = harness.get_analyzer(
    plot_components=True,
    outlier_proportion=0.1,
    calc_per_section=False,
    tfidf_threshold = 0.00001,
    tfidf_sections = set({0: 'past-medical-history past-surgical-history',
                          1: 'past-surgical-history',
                          2: 'past-medical-history',
                          6: 'code-status assessment-and-plan',
                          7: 'wet-read indication',
                          8: 'medication-history past-medical-history',
                          9: 'disposition labs-imaging',
                          10: 'impression image-type',
                          11: 'findings image-type',
                          12: 'chief-complaint discharge-condition',
                          13: 'patient-test-information procedure',
                          14: 'flowsheet-data-vitals indication',
                          15: '24-hour-events patient-test-information',
                          16: 'clinical-implications code-status'
                         }[sec_comb].split()))
#analyzer.set_tfidf_section_by_index(-1)
#analyzer.dim_reduction_meth = 'svd'
#analyzer.height = 800
if 1:
#    analyzer.normalize = 'unit'
#    analyzer.normalize = 'standardize'
    analyzer.axis_range = 2.7
else:
    analyzer.plot_type = 'rand'
    analyzer.axis_range = 200
    analyzer.tfidf_sections = {'past-medical-history'}
analyzer.section_plot_3d()
for dr in analyzer.dim_reducers:
    dr.write()

## Plot section, Adam's idea

In [None]:
# current meds, discharge meds, med history, past{med,sur}
#
# great graph: orthogonal across past surgical history and past medical history
# adam: past medical history might be covariate with surgical history because someone with vascular dis. might later need (adam to send writeup)

# kunal on non-covariate: past med history are all desease that don't need surgical intervention for management;
# past medical terms; data shows which terms are more likely to have surgical intervention and which do not;

# Example: someone has lung cancer they will require; some cancers in blood that didn't 

# separates out which cancer are amenable to surgical correction
# separates those types of issues that require medication and NOT surgury (past-medical-history)  vs. surgical intervention (past-surgical-history)

analyzer = harness.get_analyzer(
    outlier_significance=0.1,
    tfidf_threshold = 0.01,
#    normalize = 'unit',
)
analyzer.section_plot_3d()

## Plot by Age Type

In [None]:
analyzer = harness.get_analyzer(
    plot_type='age'
    #normalize = True
)
analyzer.section_plot_3d()

## TSNE on Sections

In [None]:
import itertools as it
from nbharness import AppNotebookHarness, create_subplots

text_cells = False
harness = AppNotebookHarness()
analyzer = harness.get_analyzer(
    dim_reduction_meth = 'tsne',
    tfidf_threshold = 0.1,
    outlier_significance=0.3,
#    normalize='unit',
)
#analyzer.tfidf_sections = {'labs-imaging', 'past-surgical-history', 'physical-examination'}
#analyzer.tfidf_sections = {'past-medical-history', 'past-surgical-history'}
analyzer.section_plot_3d()

## Plot discharge summary against radiology

In [None]:
analyzer = harness.get_analyzer(
    plot_type='shared',
    tfidf_threshold = 0.1,
    outlier_significance=0.1,
)
analyzer.section_plot_3d()