Script to generate and push MeasureReport resources

In [225]:
import json
import requests
from fhirpathpy import compile
from jsonpath_ng import parse
from copy import copy, deepcopy
from datetime import datetime
from dateutil.relativedelta import relativedelta
from dateutil import parser
import random
import uuid
from typing import List
from re import findall
import os

In [226]:
class MeasureReport:
    def __init__(self, template:str):
        self.template = template
        # Get all text from pattern like "$string123"
        self.fields = findall("\"\$([A-Za-z\d]+?)\"", self.template)

    def subject(self, subject, contained=None):
        template = json.loads(self.template)
        if contained is not None:
            if 'contained' in template:
                template['contained'].append(contained)
            else:
                template['contained'] = [contained]
        template['subject'] = subject
        self.template = json.dumps(template)

    def report(self, values:dict):
        temp = str(self.template)
        for field in self.fields:
            value = values.get(field, 0)
            temp = temp.replace(f'"${field}"', str(value))
        return temp

class MeasureReportSubject:
    def __init__(self, subject=None, beds=None, icu_beds=None, ventilators=None, reports:List[MeasureReport]=[]):
        if icu_beds is not None and beds is not None and icu_beds > beds:
            raise ValueError("Total beds must be greater than or equal to beds of a specific type")
        
        beds_min = 50
        beds_max = 500
        if beds is None:
            beds = random.randint(beds_min, beds_max)
        
        icu_beds_min = 2
        icu_beds_max = round(beds * 0.75)
        if icu_beds is None:
            icu_beds = random.randint(icu_beds_min, icu_beds_max)
        
        ventilators_min = 2
        ventilators_max = round(beds * 0.5)
        if ventilators is None:
            ventilators = random.randint(ventilators_min, ventilators_max)

        subject_name = ""
        subject_contained = None
        subject_reference = None

        if subject is None:
                subject = str(uuid.uuid4())
        if isinstance(subject, str) or isinstance(subject, int):
            if isinstance(subject, str) and "/" in subject:
                subject_name = subject.split("/_history")[0].split("/")[-1]
                subject_reference = {
                    "reference": subject,
                    "display": f"Hospital {subject_name}"
                }
            else:
                subject_id = f"hospital{subject}"
                subject_name = f"Hospital {subject}"
                subject_contained = {
                    "resourceType": "Location",
                    "id": subject_id,
                    "name": subject_name,
                    "managingOrganization": {
                        "display": "WA State Department of Health"
                    }
                }

                subject_reference = {
                    "reference": f"#{subject_id}",
                    "display": subject_name
                }
        else:
            subject_contained = subject
            subject_reference = {
                "reference": subject["id"],
                "display": subject["name"]
            }
            subject_name = subject["name"]

        for report in reports:
            report.subject(subject_reference, subject_contained)

        self.subject_name = subject_name
        self.beds = beds # All beds
        self.icu_beds = icu_beds # ICU subset
        self.ventilators = ventilators
        self.reports = reports
        self.values = {}

    def update(self, date, start, end): 
        values = {}

        values['date'] = date
        values['start'] = start
        values['end'] = end

        values['numVent'] = self._try_or_value(lambda: random.randint(0, self.ventilators), 0)
        values['numVentUse'] = self.ventilators - values['numVent']

        values['numBedsOcc'] = self._try_or_value(lambda: random.randint(values['numVentUse'], self.beds), values['numVentUse'])
        values['numICUBedsOcc'] = self._try_or_value(lambda: random.randint(0, min(self.icu_beds, values['numBedsOcc'])), 0)
        values['numICUBedsAvail'] = self.icu_beds - values['numICUBedsOcc']
        values['numNonICUBedsOcc'] = values['numBedsOcc'] - values['numICUBedsOcc']

        values['numC19MechVentPats'] = self._try_or_value(lambda: random.randint(0, values['numVentUse']), 0)
        values['numC19HospPats'] = self._try_or_value(lambda: random.randint(values['numC19MechVentPats'], values['numBedsOcc']), values['numC19MechVentPats'])
        values['numC19HOPats'] = self._try_or_value(lambda: random.randint(0, values['numC19HospPats']), 0)
        values['numC19OFMechVentPats'] = self._try_or_value(lambda: random.randint(0, min(values['numNonICUBedsOcc'], values['numVentUse'] - values['numC19MechVentPats'])), 0)
        values['numC19OverflowPats'] = self._try_or_value(lambda: random.randint(values['numC19OFMechVentPats'], values['numNonICUBedsOcc']), values['numC19OFMechVentPats'])
        values['numC19Died'] = random.randint(0, 20)
        values['numC19Pats'] = self._try_or_value(lambda: random.randint(values['numC19HospPats'], self.beds), self.beds)
        values['numC19VentPats'] = values['numC19MechVentPats'] + values['numC19OFMechVentPats']

        values['totalOrdersIncrease'] = random.randint(5, self.beds * 3)
        values['totalTestResultsIncrease'] = max(1, round(values['totalOrdersIncrease'] * round(random.randint(10, 70)/100, 2)))
        values['positiveIncrease'] = self._try_or_value(lambda: random.randint(0, values['totalTestResultsIncrease']), 0)
        values['positiveIncreasePercent'] = round(values['positiveIncrease'] / values['totalTestResultsIncrease'], 2)

        # TODO accumulate over time
        values['totalOrders'] = values['totalOrdersIncrease']
        values['rejected'] = values['totalOrders'] - values['totalTestResultsIncrease']
        values['totalTestResults'] = values['totalTestResultsIncrease'] 
        values['positive'] = values['positiveIncrease']
        values['positivePercent'] = round(values['positive'] / values['totalTestResults'], 2)

        self.values = values

    def report(self, date:str, start:str, end:str):
        measure_reports = []
        self.update(date, start, end)
        for report in self.reports:
            measure_reports.append(report.report(self.values))
        return measure_reports
    
    def _try_or_value(self, lam, val):
        try:
            return lam()
        except ValueError:
            return val


In [227]:
def write_entries(entries, filename):
    with open(filename, 'a') as f:
        entries_string = ""
        entries_string = "\n".join(entries)
        f.write(entries_string+"\n")

In [228]:
def generateMeasureReports(subjects:List[MeasureReportSubject], start:datetime, end:datetime, period:relativedelta, folder):
    dt_format = "%Y-%m-%dT%H:%M:%S+00:00"
    generation_time = f'"{datetime.now().strftime(dt_format)}"'
    end_period = period - relativedelta(seconds=1)
    for subject in subjects:
        entries = []
        iter_start = copy(start)
        iter_end = iter_start + end_period
        outfile = f"{folder}/{subject.subject_name.replace(' ', '_')}.ndjson"
        while iter_start < end:
            entries.extend(subject.report(generation_time, f'"{iter_start.strftime(dt_format)}"', f'"{iter_end.strftime(dt_format)}"'))
            iter_start = iter_start + period
            iter_end = iter_start + end_period
        write_entries(entries, outfile)

In [229]:
template_files = [
    "template_CDCPatientImpactAndHospitalCapacity.json",
    "template_FEMADailyHospitalCOVID19Reporting.json",
    "template_ICUBedsReporting.json",
    "template_ICUBedCurrentOccupancy.json",
    "template_ICUBedStaffedCapacity.json",
]

template_json = []

for filename in template_files:
    with open(filename, 'r') as file:
        template_json.append(file.read())

In [230]:
reports = [ MeasureReport(template) for template in template_json ]
start = parser.parse("2021-01-01T00:00:00+00:00")
end = parser.parse("2022-01-01T00:00:00+00:00")
period = relativedelta(days=1)
num_subjects = 100
subjects = [ MeasureReportSubject(subject, reports=deepcopy(reports)) for subject in range(num_subjects) ]

In [231]:
folder = f"test_data/entries_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
if not os.path.isdir(folder):
    os.makedirs(folder)
with open(f'{folder}/Subjects.json', 'w') as file:
    subject_info_list = []
    for subject in subjects:
        subject_info = {
            "name": subject.subject_name,
            "beds": subject.beds,
            "icu_beds": subject.icu_beds,
            "ventilators": subject.ventilators,
        }
        subject_info_list.append(subject_info)
    file.write(json.dumps(subject_info_list, indent=4))
generateMeasureReports(subjects, start, end, period, folder)

In [232]:
raise SystemExit("Execution stopped.")

SystemExit: Execution stopped.

In [None]:
ndjson = True

with open(f'entries_{datetime.now().strftime("%Y%m%d_%H%M%S")}.{"nd" if ndjson else ""}json', 'w') as f:
    entries_string = ""
    if ndjson:
        json_entries = [json.dumps(entry) for entry in entries]
        entries_string = "\n".join(json_entries)
    else:
        entries_string = json.dumps(entries, indent=None, separators=(',', ':'))

    f.write(entries_string)

TypeError: 'NoneType' object is not iterable

### Access MeasureReports on agg-data server

In [None]:
base_url = "https://fhir.agg-data.connectathon.dev.cirg.uw.edu"
fhir_endpoint = f'{base_url}/fhir'
resource_type = "MeasureReport"
search_params = {
    "date": "ge2020-01-01",
    "_count": "1000",
}
headers = {
    "Content-Type": "application/json+fhir"
}
full_url = f'{fhir_endpoint}/{resource_type}'

In [None]:
cred_manager = None

In [None]:
# phdi attempt: SLOW performance, moved to requests library
# entries = []
# tmp_search_url = full_search_url
# while tmp_search_url is not None:
#     (page_entries, search_url) = extract_data_from_fhir_search_incremental(search_url=tmp_search_url, cred_manager=cred_manager)
#     entries.append(page_entries)


In [None]:
entries = []
url = full_url

get_entries = compile("Bundle.entry")

next_url_exists = compile("Bundle.link.where(relation='next').exists()")
check_next_url = lambda x: next_url_exists(x)[0]

next_url_path = compile("Bundle.link.where(relation='next').url")
get_next_url = lambda x: next_url_path(x)[0]

while True:
    if (url == full_url):
        response = requests.get(url=full_url, params=search_params, headers=headers)
    else:
        response = requests.get(url=url, headers=headers)
    content = json.loads(response.content)
    entries.extend(get_entries(content))
    #update the url
    if check_next_url(content):
        url = get_next_url(content)
        print(url)
    else:
        break

In [None]:
print(response.status_code)
if response.status_code >= 400:
        print(response.text)

500
{
  "resourceType": "OperationOutcome",
  "text": {
    "status": "generated",
    "div": "<div xmlns=\"http://www.w3.org/1999/xhtml\"><h1>Operation Outcome</h1><table border=\"0\"><tr><td style=\"font-weight: bold;\">ERROR</td><td>[]</td><td><pre>Failed to call access method: org.springframework.transaction.CannotCreateTransactionException: Could not open JPA EntityManager for transaction; nested exception is org.hibernate.exception.JDBCConnectionException: Unable to acquire JDBC Connection</pre></td>\n\t\t\t</tr>\n\t\t</table>\n\t</div>"
  },
  "issue": [ {
    "severity": "error",
    "code": "processing",
    "diagnostics": "Failed to call access method: org.springframework.transaction.CannotCreateTransactionException: Could not open JPA EntityManager for transaction; nested exception is org.hibernate.exception.JDBCConnectionException: Unable to acquire JDBC Connection"
  } ]
}


In [None]:
len(entries)

2715

In [None]:
measureSet = set()
for entry in entries:
    measureSet.add(entry['resource']['measure'])
list(measureSet)

['http://hl7.org/fhir/uv/saner/Measure/CDCPatientImpactAndHospitalCapacity',
 'https://cqf-ruler.ecqm.icfcloud.com/fhir/Measure/ICUBedsReporting',
 'https://cqf-ruler.ecqm.icfcloud.com/fhir/Measure/ICUBedStaffedCapacity',
 'http://hl7.org/fhir/uv/saner/Measure/FEMADailyHospitalCOVID19Reporting',
 'https://cqf-ruler.ecqm.icfcloud.com/fhir/Measure/ICUBedCurrentOccupancy']

In [None]:
measures_to_replace = []
#     'ICUBedStaffedCapacity',
#     'ICUBedCurrentOccupancy',
#     'ICUBedsReporting'
# ]
updateBundle = {
    "resourceType": "Bundle",
	"id": "mr-update",
	"type": "transaction",
	"entry": []
}
measure_report_measure = compile("MeasureReport.measure")
get_measure = lambda x: measure_report_measure(x)[0]

In [None]:
for measure in measureSet:
    for entry in entries:
        if get_measure(entry['resource']) == measure:
            print(entry['resource'])
            break


NameError: name 'measureSet' is not defined

In [None]:
fields = {
    "ICUBedCurrentOccupancy": {
        "template": "template_ICUBedCurrentOccupancy.json",
        "parameters": {
            # Datetime of first MeasureReport
            "reporting_start": "2023-01-01T00:00:00+00:00",
            # Maximum datetime of MeasureReports
            "reporting_end": "2023-12-31T23:00:00+00:00",
            # Reporting period time delta
            "period": "0000-00-00T00:00:00+00:00",
            "subject": {
                "reference": "Location/helios1",
                "identifier": {
                    "value": "Helios1",
                },
                "display": "Helios Test Facility 1",
            },
        }
    }
}

In [None]:
len(entries)

18250

In [None]:
entries[0]

{'resourceType': 'MeasureReport',
 'status': 'complete',
 'type': 'summary',
 'measure': 'http://hl7.org/fhir/uv/saner/Measure/CDCPatientImpactAndHospitalCapacity',
 'subject': {'reference': 'Location/hospital0',
  'identifier': {'value': 'hospital0'},
  'display': 'hospital0'},
 'date': '2023-05-25T15:04:30+00:00',
 'reporter': {'reference': 'Organization/WA-DOH',
  'display': 'Washington State Department of Health'},
 'period': {'start': '2021-01-01T00:00:00+00:00',
  'end': '2021-01-01T23:59:59+00:00'},
 'group': [{'code': {'coding': [{'system': 'http://hl7.org/fhir/uv/saner/CodeSystem/MeasureGroupSystem',
      'code': 'Beds'}],
    'text': 'Hospital Bed Reporting'},
   'population': [{'code': {'coding': [{'system': 'http://hl7.org/fhir/uv/saner/CodeSystem/MeasuredValues',
        'code': 'numTotBeds'},
       {'system': 'http://terminology.hl7.org/CodeSystem/measure-population',
        'code': 'initial-population'}],
      'text': 'Total number of all Inpatient and outpatient bed

In [None]:
for entry in entries:
    measure = str.split(get_measure(entry['resource']), "/")[-1]
    if get_measure(entry['resource']) in measures_to_replace:
        updateEntry = deepcopy(entry)
        updateEntry['resource']['measure'] = 'https://cqf-ruler.ecqm.icfcloud.com/fhir/Measure/' + measure
        updateEntry.pop('search', None)
        updateEntry['request'] = {
            "method": "PUT",
            "url": f"{updateEntry['resource']['resourceType']}/{updateEntry['resource']['id']}"
        }
        updateBundle['entry'].append(updateEntry)

KeyError: 'resource'

In [None]:
if len(updateBundle['entry']) > 0:
    url = base_url
    hed = {'Content-Type': 'application/json+fhir'}
    data = json.dumps(updateBundle)

    response = requests.post(url=url, data=data, headers=hed)
    print(response.text)
    print(response.status_code)

{
  "resourceType": "Bundle",
  "id": "29ee2539-8a15-4098-a443-e2364d2276d6",
  "type": "transaction-response",
  "link": [ {
    "relation": "self",
    "url": "https://fhir.agg-data.connectathon.dev.cirg.uw.edu/fhir"
  } ],
  "entry": [ {
    "response": {
      "status": "200 OK",
      "location": "MeasureReport/1761/_history/4",
      "etag": "4",
      "lastModified": "2023-05-10T19:43:16.233+00:00"
    }
  }, {
    "response": {
      "status": "200 OK",
      "location": "MeasureReport/1763/_history/4",
      "etag": "4",
      "lastModified": "2023-05-10T19:43:16.233+00:00"
    }
  }, {
    "response": {
      "status": "200 OK",
      "location": "MeasureReport/1764/_history/4",
      "etag": "4",
      "lastModified": "2023-05-10T19:43:16.233+00:00"
    }
  }, {
    "response": {
      "status": "200 OK",
      "location": "MeasureReport/1765/_history/4",
      "etag": "4",
      "lastModified": "2023-05-10T19:43:16.233+00:00"
    }
  }, {
    "response": {
      "status": "