# Objectives

- To merge Janssen pharmacy file and create pharmacy cost

In [17]:
## Import required libraries 

import snowflake.connector
import getpass
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 200)
pd.set_option('display.width', 1000)
import plotly.graph_objects as go


In [2]:
import sys
sys.path.append('/home/jovyan/credentials')
import jupytertoolz_credentials as jt

In [21]:
## Snowflake tables 
ROLE = 'TEM_ROLE'
WAREHOUSE = 'XLARGE_WH'
DATABASE = 'SANDBOX_KOMODO'  
SCHEMA = 'PROJECT_CURRENCY'

connection = snowflake.connector.connect(user='YWEI', password=getpass.getpass(), account='komodohealth')
connection.cursor().execute('USE ROLE ' + ROLE)
connection.cursor().execute('USE WAREHOUSE ' + WAREHOUSE)

def read_sql(sql, connection = connection):
    df = pd.read_sql(sql, connection)
    return df

def execute_sql(sql, connection = connection):
    #pd.io.sql.execute(sql, connection)
    connection.cursor().execute(sql)
    
execute_sql(f"USE ROLE {ROLE}")
execute_sql(f"USE database {DATABASE}")
execute_sql(f"USE SCHEMA {SCHEMA}")

········


### Imputation Logic


In [32]:
# input_pharm_claim = 'DSVC_ANALYSISGROUP_AA_PROD.ANALYSISGROUP_JANS.PHARMACY'
# input_pharm_claim = 'DSVC_JANSSENSA_PROD.COHORTS_20220405.PHARMACY where cohort_id= 161155'
input_pharm_claim = 'sandbox_komodo.pzakas.janssen_pc_cost_claims_20220405'

input_source = 'claim'
# input_source = 'encounter'

if input_source == 'encounter':
    prefix = f"RX_ENCOUNTERS_{rx_version}"
    rx_impute = f"{prefix}_CLAIM_IMPUTED"
elif input_source == 'claim':
    prefix = 'RX_CERTIFIED_CLAIMS'
    rx_impute = f"{prefix}_IMPUTED"

# define different variable names for different input tables
if input_source == 'encounter':
    patient_key = 'upk_key2'
    claim_key = 'encounter_key'
    claim_date = 'claim_date'
    ndc_var = 'ndc'
    source_var = 'sources'
elif input_source == 'claim':
    patient_key = 'upk_key2'
    claim_key = 'claim_id'
    claim_date = 'date_of_service'
    ndc_var = 'ndc11'
    source_var = 'source'
    
output_pharm_claim_all = 'PHARMACY_COST_JANS_ALL'
output_pharm_claim = 'JANSSEN_PHARMACY_COST'

In [30]:
%%time
# merge by claim id to get more than needed
sql = f"""
    CREATE OR REPLACE TABLE {output_pharm_claim_all} AS
    SELECT c.claim_id, {ndc_var}, 
        plan_pay_clean,
        round(cost_impute_adj, 2) as cost_impute_adj,
        round(cost_final, 2) as cost_final
    from {rx_impute} c
    where c.claim_id in (select UNHASHED_CLAIM_ID from {input_pharm_claim})
    ;
"""
execute_sql(sql)

CPU times: user 80.7 ms, sys: 2.86 ms, total: 83.6 ms
Wall time: 11min 40s


In [33]:
%%time
# merge by claim id to get more than needed
sql = f"""
    CREATE OR REPLACE TABLE {output_pharm_claim} AS
    SELECT claim_id AS UNHASHED_CLAIM_ID, round(cost_impute_adj, 2) as PLAN_PAY
    from {output_pharm_claim_all}
    where plan_pay_clean is null and cost_impute_adj > 0
    ;
"""
execute_sql(sql)

CPU times: user 2.82 ms, sys: 4.88 ms, total: 7.71 ms
Wall time: 43.4 s


In [15]:
%%time
# merge by claim id to get more than needed
sql = f"""
    SELECT c.claim_id, c.{ndc_var}, 
        plan_pay_clean,
        round(cost_impute_adj, 2) as cost_impute_adj,
        round(cost_final, 2) as cost_final
    from {rx_impute} c
    limit 100
    ;
"""
read_sql(sql)

CPU times: user 314 ms, sys: 15.9 ms, total: 330 ms
Wall time: 3.17 s


Unnamed: 0,CLAIM_ID,NDC11,PLAN_PAY_CLEAN,COST_IMPUTE_ADJ,COST_FINAL
0,5b06bec47cf24b26a16ee1304d5dec4fe07cd2a9,60505014200,0.0,6.0,0.0
1,38eee650628dde8ed1eb62c88571f1e43e4efce8,69315011601,,2.8,2.8
2,4850867e7f7fae088771930989bf6281870be091,55111012305,,16.0,16.0
3,ef15a75eff5ebce193e056551c1fdc607bb950a1,00536106505,6.0,3.5,6.0
4,59dc08a4b765ea7f95e07375e6076da09240a829,00574220545,,97.0,97.0
5,700cc608d557410d7917b03b83eea52c2313bfcf,49884012401,,132.0,132.0
6,9e0e199c473216292a76bcd6cfaa5a38e62fa903,21922000909,,19.0,19.0
7,067fa0c2a9c79292e518f7dd18a9ca7badf58fcc,67877022205,6.0,14.0,6.0
8,163354663ef17e354e08a2c46a84cfe9ffbd9c9c,16729000617,12.0,8.0,12.0
9,7520c2755405864e5dd23698aa04fb5360465b9e,68180038909,0.0,36.0,0.0


In [29]:
%%time
# merge by claim id to get more than needed
sql = f"""
    SELECT *
    from {input_pharm_claim}
    limit 100
    ;
"""
read_sql(sql)

CPU times: user 7.33 ms, sys: 59 µs, total: 7.39 ms
Wall time: 932 ms


Unnamed: 0,UNHASHED_CLAIM_ID
0,edf6fdff1f662f92e9594bd3893a8e4024f0a887
1,91336650c07920d744c1dbacd9d4ccae979247a2
2,ed177c77a2f9b9db04d722780d2df2adaffbc386
3,2c428d119ebb24d3500d34b8f825c6945678a18d
4,e1bd0b6a756dbe93b3dfa152f9536c8803c544ff
5,3f9de133aa506941ef307c74b3f5e0ddd308e160
6,68c09ba5c9d9ec42c4c20892d0961b985b0681e0
7,28f0494eb81e9d4c405648738c2fcb71efcc04c4
8,4d975d0a6e1195d19897ad95d7b9b40b0ebf9da0
9,81692bdaf88047a728abadae5e7bf6125925bb59


In [19]:
%%time
# merge by claim id to get more than needed
sql = f"""
    SELECT len(claim_id) as length, claim_id, {ndc_var}
    from MAP_CERTIFIED_CLAIMS.PUBLIC.VIEW_CERTIFIED_CLAIMS_PHARMACY_WITH_JENNER
    limit 100
    ;
"""
read_sql(sql)

CPU times: user 223 ms, sys: 1.34 ms, total: 224 ms
Wall time: 3.46 s


Unnamed: 0,CLAIM_ID,NDC11
0,410fe88be35846a4d80b0141700d8d62cafff676,168014630
1,7c854d2390cf417ce9e5827f064f91b5af428ca3,60505257808
2,7619fee23c56e1463b508c807efe4e95d4b3ad79,591024101
3,47c8cb9b996dd85743b4aa2e9823fd72f49c1bbd,53885024510
4,99bfe21d9698d2a53a6be3ed06a92267f806cd2f,45802049535
5,16e6ccdf2e6ee148dd2491739a7c4c78ae85f7d5,64380073706
6,f245bc8efa83a3497991bab2a6e6f2b33122d52f,57237000830
7,bddee91dfff8637623139d04555e4a22064d87e6,50111078810
8,4184dc38a165ef45953557a3b06e9eef4e974bd7,65862001501
9,b5e55b0dd84c8ecf95d2303a8140423250d42900,43547035211
