## This notebooks gets the JIVI new writer HCPs flagged from the Notebook 03_feature_engineering/01_hcp_target_monthly_spine_with_incl_crit and explore their corresponding data.

In [0]:
# Importing packages
from pyspark.sql import functions as F  # Importing functions from pyspark.sql
import pandas as pd

In [0]:
%run "../00_config/set-up"

In [0]:
# Reading the HCP monthly target spine from Hivestore
hcp_target_spine_sdf = spark.sql("SELECT * FROM jivi_new_writer_model.hcp_target_spine")
print(
    "Row count: ",
    hcp_target_spine_sdf.count(),
    "Column Count: ",
    len(hcp_target_spine_sdf.columns),
)

In [0]:
# Reading the Raw Overlap data from Hivestore
overlap_raw_data = spark.sql("SELECT * FROM heme_data.overlap_rx")
print('Row count: ', overlap_raw_data.count(), 'Column Count: ', len(overlap_raw_data.columns))

In [0]:
# Filter hcp_target_spine_sdf to get the list of BH_ID where JIVI_NEW_WRITER_FLG is 1
jivi_new_writer_hcp_target_spine_sdf = hcp_target_spine_sdf.filter(hcp_target_spine_sdf.JIVI_NEW_WRITER_FLG == 1)
bh_id_list = [row.BH_ID for row in jivi_new_writer_hcp_target_spine_sdf.select("BH_ID").distinct().collect()]
print(jivi_new_writer_hcp_target_spine_sdf.count())

In [0]:
# Filter overlap_raw_data where BHI_ID is in the list of BH_ID
jivi_new_writer_overlap_raw_data = overlap_raw_data.filter(overlap_raw_data.BH_ID.isin(bh_id_list))

# ordering the resultant df
jivi_new_writer_overlap_raw_data = jivi_new_writer_overlap_raw_data.orderBy("BH_ID", "PATIENT_ID", "SHP_DT")

# print row count
print(jivi_new_writer_overlap_raw_data.count())
display(jivi_new_writer_overlap_raw_data)


In [0]:
# Specify the columns to move to the start
columns_to_move = ["BH_ID", "PATIENT_ID", "SHP_DT", "PRD_NM", "IU", "SRC_SP", "SOURCE_TYPE", "DRUG_NM", "BV1_1", "BV2_1"]

# Get the remaining columns
remaining_columns = [col for col in jivi_new_writer_overlap_raw_data.columns if col not in columns_to_move]

# Combine the columns to move and the remaining columns
new_column_order = columns_to_move + remaining_columns

# Reorder the DataFrame
jivi_new_writer_overlap_raw_data = jivi_new_writer_overlap_raw_data.select(*new_column_order)

In [0]:
# Display the reordered DataFrame
display(jivi_new_writer_overlap_raw_data)

In [0]:
# Filter PRD_NM == JIVI AND SHP_DT between study period
# Get the PATIENT_ID list
jivi_new_writer_overlap_raw_data


### Save the resultant data

In [0]:
save_sdf(jivi_new_writer_overlap_raw_data, 'jivi_new_writer_model', 'jivi_new_writers_overlap_raw_data')

In [0]:
# # Save the DataFrame to a CSV file
# output_path = "/Workspace/Repos/yuan.niu@bayer.com/heme_new_writer_models_dev_repo/02_data_processing/jivi_new_writer_overlap_raw_data.csv"  # Replace with your desired output path
# jivi_new_writer_overlap_raw_data.write.option("header", "true").csv(output_path)

### Analyze Jivi patients from Jivi new writers

In [0]:
df = spark.sql('select BH_ID, PATIENT_ID, BRTH_YR, SHP_DT, PRD_NM, SRC_SP, SOURCE_TYPE, DRUG_NM, SOURCE, RX_TYP, PTNT_WGT, SEVRTY, PRPHY from jivi_new_writer_model.jivi_new_writers_overlap_raw_data')
df.display()