In [None]:
%env JAVA_TOOL_OPTIONS='--add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED'
%env FLINK_CONF_DIR=/Users/soufiane/Workspace/agile/medical-dashboard/back-end-synthea-flink/config

In [None]:
%load_ext streaming_jupyter_integrations.magics

In [None]:
%flink_connect --execution-target remote --remote-hostname localhost --remote-port 8081


In [None]:
%%flink_execute_sql
CREATE TABLE patient (
  `id` STRING,
  `name` ROW(`family` STRING, `given` STRING ARRAY) ARRAY,
  `gender` STRING,
  `birthDate` STRING,
  `telecom` ROW ( `value` STRING ) ARRAY,
  `address` ROW (
    `line` STRING ARRAY,
    `city` STRING,
    `state` STRING,
    `postalCode` STRING,
    `country` STRING
  ) ARRAY,
  `maritalStatus` ROW (
    `text` STRING
  )
) WITH (
  'connector' = 'kafka',
  'topic' = 'Patient',
  'properties.bootstrap.servers' = 'kafka-edge1:9092',
  'properties.group.id' = 'testGroup',
  'scan.startup.mode' = 'earliest-offset',
  'format' = 'json'
)

In [None]:
%%flink_execute_sql
CREATE TABLE encounter (
  `id` STRING,
  `status` STRING,
  `type` ROW (
    `coding` ROW (
      `code` STRING,
      `display` STRING
    ) ARRAY  ) ARRAY,
  `subject` ROW (
    `reference` STRING,
    `display` STRING
  ),
  `participant` ROW (
    `individual` ROW (
      `reference` STRING,
      `display` STRING
    )
  ) ARRAY,
  `period` ROW (
    `start` STRING,
    `end` STRING
  ),
  `serviceProvider` ROW (
    `reference` STRING
  ) 
) WITH (
  'connector' = 'kafka',
  'topic' = 'Encounter',
  'properties.bootstrap.servers' = 'kafka-edge1:9092',
  'properties.group.id' = 'testGroup',
  'scan.startup.mode' = 'earliest-offset',
  'format' = 'json'
)

In [None]:
%%flink_execute_sql
CREATE TABLE practitioner (
  `identifier` ROW (
    `value` STRING
  ) ARRAY,
  `active` BOOLEAN,
  `name` ROW (
    `family` STRING,
    `given` STRING ARRAY
  ) ARRAY,
  `telecom` ROW (
    `value` STRING
  ) ARRAY,
  `address` ROW (
    `line` STRING ARRAY,
    `city` STRING,
    `state` STRING,
    `postalCode` STRING,
    `country` STRING
  ) ARRAY,
  `gender` STRING
) WITH (
  'connector' = 'kafka',
  'topic' = 'Practitioner',
  'properties.bootstrap.servers' = 'kafka-edge1:9092',
  'properties.group.id' = 'testGroup',
  'scan.startup.mode' = 'earliest-offset',
  'format' = 'json'
)

In [None]:
%%flink_execute_sql
CREATE TABLE practitioner_stats_psql (
   `id_practitioner` STRING,
    `average_age` BIGINT,
    `min_age` BIGINT,
    `max_age` BIGINT,
    `total_patients` BIGINT,
  PRIMARY KEY (`id_practitioner`) NOT ENFORCED
) WITH (
  'connector' = 'jdbc',
  'url' = 'jdbc:postgresql://ep-dark-sea-a20skpa3-pooler.eu-central-1.aws.neon.tech:5432/verceldb',
  'table-name' = 'public.practitioner_stats',
  'username' = 'default',
  'password' = 'tUyXvsaMq07d'
);

In [None]:
%%flink_execute_sql
INSERT INTO practitioner_stats_psql
WITH patient_ages AS (
    SELECT 
        p.id AS patient_id,
        YEAR(CAST(NOW() AS DATE)) - YEAR(CAST(p.birthDate AS DATE)) - 
        CASE 
            WHEN EXTRACT(MONTH FROM CAST(NOW() AS DATE)) < EXTRACT(MONTH FROM CAST(p.birthDate AS DATE)) 
                OR (EXTRACT(MONTH FROM CAST(NOW() AS DATE)) = EXTRACT(MONTH FROM CAST(p.birthDate AS DATE)) 
                AND EXTRACT(DAY FROM CAST(NOW() AS DATE)) < EXTRACT(DAY FROM CAST(p.birthDate AS DATE))) 
            THEN 1 
            ELSE 0 
        END AS age
    FROM 
        patient p
),
encounter_data AS (
    SELECT 
        REPLACE(e.subject.reference, 'urn:uuid:', '') AS patient_id,
        REPLACE(e.participant[1].individual.reference, 'Practitioner?identifier=http://hl7.org/fhir/sid/us-npi|', '') AS practitioner_id
    FROM 
        encounter e
),
combined_data AS (
    SELECT 
        ed.practitioner_id,
        pa.age,
        pa.patient_id
    FROM 
        encounter_data ed
    JOIN 
        patient_ages pa ON ed.patient_id = pa.patient_id
),
practitioner_data AS (
    SELECT 
        REPLACE(p.identifier[1][1], 'http://hl7.org/fhir/sid/us-npi|', '') AS id_practitioner
    FROM 
        practitioner p
)
SELECT 
    cd.practitioner_id AS id_practitioner,
    ROUND(AVG(cd.age)) AS average_age,
    MIN(cd.age) AS min_age,
    MAX(cd.age) AS max_age,
    COUNT(DISTINCT cd.patient_id) AS total_patients
FROM 
    combined_data cd
JOIN 
    practitioner_data pd ON cd.practitioner_id = pd.id_practitioner
GROUP BY 
    cd.practitioner_id;
