In [None]:
%env JAVA_TOOL_OPTIONS='--add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED'
%env FLINK_CONF_DIR=/Users/soufiane/Workspace/agile/medical-dashboard/back-end-synthea-flink/config

In [None]:
%load_ext streaming_jupyter_integrations.magics

In [None]:
%flink_connect --execution-target remote --remote-hostname localhost --remote-port 8081
#%flink_connect 

In [None]:
%%flink_execute_sql
CREATE TABLE patient (
  `id` STRING,
  `name` ROW(`family` STRING, `given` STRING ARRAY) ARRAY,
  `gender` STRING,
  `birthDate` STRING,
  `telecom` ROW ( `value` STRING ) ARRAY,
  `address` ROW (
    `line` STRING ARRAY,
    `city` STRING,
    `state` STRING,
    `postalCode` STRING,
    `country` STRING
  ) ARRAY,
  `maritalStatus` ROW (
    `text` STRING
  )
) WITH (
  'connector' = 'kafka',
  'topic' = 'Patient',
  'properties.bootstrap.servers' = 'kafka-edge1:9092',
  'properties.group.id' = 'testGroup',
  'scan.startup.mode' = 'earliest-offset',
  'format' = 'json'
)

In [None]:
%%flink_execute_sql
CREATE TABLE encounter (
  `id` STRING,
  `status` STRING,
  `type` ROW (
    `coding` ROW (
      `code` STRING,
      `display` STRING
    ) ARRAY  ) ARRAY,
  `subject` ROW (
    `reference` STRING,
    `display` STRING
  ),
  `participant` ROW (
    `individual` ROW (
      `reference` STRING,
      `display` STRING
    )
  ) ARRAY,
  `period` ROW (
    `start` STRING,
    `end` STRING
  ),
  `serviceProvider` ROW (
    `reference` STRING
  ) 
) WITH (
  'connector' = 'kafka',
  'topic' = 'Encounter',
  'properties.bootstrap.servers' = 'kafka-edge1:9092',
  'properties.group.id' = 'testGroup',
  'scan.startup.mode' = 'earliest-offset',
  'format' = 'json'
)

In [None]:
%%flink_execute_sql
CREATE TABLE hospital_stats (
  hospital_id STRING,
  visit_count BIGINT,
  patient_count BIGINT,
  most_common_disease STRING,
  disease_count BIGINT,
  PRIMARY KEY (hospital_id) NOT ENFORCED
) WITH (
  'connector' = 'jdbc',
  'url' = 'jdbc:postgresql://ep-dark-sea-a20skpa3-pooler.eu-central-1.aws.neon.tech:5432/verceldb',
  'table-name' = 'public.hospital_stats',
  'username' = 'default',
  'password' = 'tUyXvsaMq07d'
);

In [None]:
%%flink_execute_sql
CREATE VIEW encounter_view AS
SELECT
  id,
  status,
  type,
  subject.reference AS patient_id,
  serviceProvider.reference AS hospital_id
FROM encounter;


In [None]:
%%flink_execute_sql
INSERT INTO hospital_stats
WITH max_disease_code AS (
  SELECT
    v.serviceProvider.reference AS hospital_id,
    MAX(v.type[1].coding[1].display) AS max_disease_code
  FROM
    encounter v
  GROUP BY
    v.serviceProvider.reference
)
SELECT
  REPLACE(
    v.serviceProvider.reference, 
    'Organization?identifier=https://github.com/synthetichealth/synthea|', 
    ''
  ) AS hospital_id,
  COUNT(DISTINCT v.subject.reference) AS patient_count,
  COUNT(*) AS visit_count,
  MAX(v.type[1].coding[1].display) AS most_common_disease,
  COUNT(*) FILTER (WHERE v.type[1].coding[1].display = m.max_disease_code) AS disease_count
FROM
  encounter v
JOIN
  max_disease_code m
ON
  v.serviceProvider.reference = m.hospital_id
GROUP BY
  v.serviceProvider.reference;
