# Objectives

- To verify imputed cost for Rx encounters


In [1]:
## Import required libraries 

import snowflake.connector
import getpass
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 200)
import plotly.graph_objects as go
pd.set_option("display.max_columns", 100)

In [2]:
import sys
sys.path.append('/home/jovyan/credentials')
import jupytertoolz_credentials as jt

In [14]:
## Snowflake tables 
ROLE = 'ANALYST'
WAREHOUSE = 'XLARGE_WH'
DATABASE = 'SANDBOX_KOMODO'  
SCHEMA = 'AYWEI'

## Table reference
rx_version = '20220511'
rx_enc = f"MAP_ENCOUNTERS.RX_ENCOUNTERS_{rx_version}.RX_ENCOUNTER_LS_GA"
rx_cost = f"SANDBOX_KOMODO.PROJECT_CURRENCY.RX_ENCOUNTERS_{rx_version}_IMPUTED"
mx_version = '20220509'
mx_bene = f"MAP_ENCOUNTERS.MX_ENCOUNTERS_{mx_version}.BENEFICIARY_LS_GA"
ce_table = f"ELIG_KH_{mx_version}"
ce_table_by_year = f"ELIG_KH_BY_YEAR_{mx_version}"

In [26]:
connection = snowflake.connector.connect(user='YWEI', password=getpass.getpass(), account='komodohealth')
connection.cursor().execute('USE ROLE ' + ROLE)
connection.cursor().execute('USE WAREHOUSE ' + WAREHOUSE)

def read_sql(sql, connection = connection):
    df = pd.read_sql(sql, connection)
    return df

def execute_sql(sql, connection = connection):
    #pd.io.sql.execute(sql, connection)
    connection.cursor().execute(sql)
    
execute_sql(f"USE ROLE ANALYST")
execute_sql(f"USE database {DATABASE}")
execute_sql(f"USE SCHEMA {SCHEMA}")

········


### Summarize Rx Cost to compare with MEPS

1. summarize total annual Rx Cost by year, patient age, for payer complete patients from year xxxx to xxxx.
2. summarize MEPS cost by year/age.

In [10]:
%%time
# create continuous enrollment table
def get_sql_split_fun(grace_period = 45):
    sql_split = f"""
        create or replace function SPLIT_RANGES(dates variant)
           returns variant
           language javascript
        as '
        return DATES
                    .sort(function (a, b) {{ return a.start - b.start || a.end - b.end; }})
                    .reduce(function (r, a) {{
                          var last = r[r.length - 1] || [];
                          if (last.start <= a.start && a.start <= last.end + {grace_period}*24*60*60) {{
                                if (last.end < a.end) {{
                                      last.end = a.end;
                                }}
                                return r;
                          }}
                          return r.concat(a);
                    }}, []);
        '
        ;
        """
    return sql_split

def get_sql_ce_by_type_kh(type, grace_period = 45):
    if type == 'mx':
        cov_ind = 'MEDICAL_COVERAGE_INDICATOR'
    elif type == 'rx':
        cov_ind = 'PHARMACY_COVERAGE_INDICATOR'

    sql_ce = f"""    
        with b as (
            SELECT distinct upk_key2, 
            closed_start_date AS start_date_date, 
            date_part('EPOCH_SECOND', to_timestamp(start_date_date)) as start_date, 
            least(closed_end_date, current_date) as end_date_date,
            date_part('EPOCH_SECOND', to_timestamp(end_date_date)) as end_date, 
            max(end_date) over (partition by upk_key2, start_date) as max_end_date, 
            min(start_date) over (partition by upk_key2, end_date) as min_start_date
        FROM {mx_bene}
        WHERE {cov_ind} 
            and closed_start_date IS NOT NULL 
            AND closed_end_date IS NOT NULL 
            AND start_date <= end_date 
            AND start_date <= date_part('EPOCH_SECOND', to_timestamp(current_date)) 
            and closed_indicator 
            qualify max_end_date=end_date and min_start_date=start_date
        ),
        g as (
            select upk_key2, 
                arrayagg(object_construct('start', start_date, 'end', end_date)) as ranges 
                from b
                group by upk_key2
        ), 
        non_overlap as (
            select upk_key2, ranges, SPLIT_RANGES(ranges) as clean_ranges from g
        )
        select upk_key2, 
            dateadd(day, 1, to_date(to_timestamp(clean_range.value:start))) as start_date,
            dateadd(day, 1, to_date(to_timestamp(clean_range.value:end))) as end_date
        from non_overlap, lateral flatten(input => clean_ranges) as clean_range
    """
    return sql_ce

grace_period = 45
sql_split = get_sql_split_fun(grace_period = grace_period)
sql_ce_mx = get_sql_ce_by_type_kh(type = "mx", grace_period = grace_period)
sql_ce_rx = get_sql_ce_by_type_kh(type = "rx", grace_period = grace_period)

sql_ce = f"""  
    create or replace table {ce_table} as
    with mx as ({sql_ce_mx}), rx as ({sql_ce_rx})
    select mx.upk_key2, 
        greatest(mx.start_date, rx.start_date) as start_date,
        least(mx.end_date, rx.end_date) as end_date
        from mx inner join rx
        on mx.upk_key2 = rx.upk_key2
        where mx.start_date <= rx.end_date and mx.end_date >= rx.start_date
    ;
"""
execute_sql(sql_split)
execute_sql(sql_ce)

CPU times: user 122 ms, sys: 7.45 ms, total: 130 ms
Wall time: 18min 43s


In [28]:
%%time
sql_ce = f"""  
    create or replace table {ce_table_by_year} as
    select 2017 as year, upk_key2
    from {ce_table}
    where start_date <= '2017-01-01' and end_date >= '2017-12-31'
    union all
    select 2018 as year, upk_key2
    from {ce_table}
    where start_date <= '2018-01-01' and end_date >= '2018-12-31'
    union all
    select 2019 as year, upk_key2
    from {ce_table}
    where start_date <= '2019-01-01' and end_date >= '2019-12-31'
    ;
"""
execute_sql(sql_ce)

CPU times: user 227 ms, sys: 0 ns, total: 227 ms
Wall time: 44.2 s


In [66]:
%%time
sql = f"""  
    create or replace table {rx_cost}_by_bene_year as
    select ce.upk_key2, year, 
        sum(cost_final) as cost_final, 
        sum(cost_impute_adj) as cost_impute_adj,
        count(*) as n_claim
    from {ce_table_by_year} ce
    left join {rx_cost} rx
    on year(rx.claim_date) = ce.year and rx.upk_key2 = ce.upk_key2
    group by ce.upk_key2, year
    ;
"""
execute_sql(sql)

CPU times: user 250 ms, sys: 0 ns, total: 250 ms
Wall time: 4min 26s


In [67]:
%%time
sql = f"""  
    create or replace table {rx_cost}_by_bene_year_age as
    select rx.upk_key2, year, 
        coalesce(cost_final, 0) as cost_final, 
        coalesce(cost_impute_adj, 0) as cost_impute_adj,
        coalesce(n_claim, 0) as n_claim,
        patient_dob,
        year - year(patient_dob) as age,
        case
            when age <= 10 then '1. 0-10'
            when age <= 20 then '2. 10-20'
            when age <= 30 then '3. 20-30'
            when age <= 40 then '4. 30-40'
            when age <= 50 then '5. 40-50'
            when age <= 60 then '6. 50-60'
            when age <= 70 then '7. 60-70'
            when age <= 80 then '8. 70-80'
            when age > 80 then '9. 80+'
            else 'missing'
        end as age_cat
    from {rx_cost}_by_bene_year rx
    left join {mx_bene} bene
    on rx.upk_key2 = bene.upk_key2
    ;
"""
execute_sql(sql)

CPU times: user 12 ms, sys: 0 ns, total: 12 ms
Wall time: 1min 12s


In [68]:
def get_dist_cts(var, table, filter = 'TRUE', filter_print = None, group_by_list = None):
    if filter_print is None:
        filter_print = filter
    group_by_str = ''
    order_by_str = ''
    group_by_select_str = ''
    if group_by_list is not None:
        group_by = ",".join(group_by_list)
        group_by_select_str = group_by + ','
        group_by_str = 'group by ' + group_by
        order_by_str = 'order by ' + group_by
    return f"""
        select 
            {group_by_select_str}
            '{var}' as var, 
            '{filter_print}' as filter,
            count(*) as n,
            count_if({var} is null) as n_null,
            count_if({var} < 0) as n_neg,
            count_if({var} = 0) as n_zero,
            count_if({var} > 0) as n_pos,
            avg({var}) as mean,
            variance({var}) as variance,
            min({var}) as min,
            PERCENTILE_CONT(0.01) WITHIN GROUP (ORDER BY {var}) as p_01,
            PERCENTILE_CONT(0.05) WITHIN GROUP (ORDER BY {var}) as p_05,
            PERCENTILE_CONT(0.10) WITHIN GROUP (ORDER BY {var}) as p_10,
            PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {var}) as p_25,
            PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {var}) as median,
            PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {var}) as p_75,
            PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {var}) as p_90,
            PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {var}) as p_95,
            PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {var}) as p_99,
            max({var}) as max
        from {table}
        where {filter}
        {group_by_str}
        {order_by_str}
    """

In [69]:
sql = f"""  
    {get_dist_cts('cost_final', f"{rx_cost}_by_bene_year_age", group_by_list = ['year'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,YEAR,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,2017,cost_final,True,373636230,0,0,122504685,251131545,1064.70738,63185030.0,0.0,0.0,0.0,0.0,0.0,37.333333,339.722222,1733.977812,4166.416667,17581.109074,11001560.0
1,2018,cost_final,True,347387678,0,0,115262957,232124721,1086.385891,62915780.0,0.0,0.0,0.0,0.0,0.0,35.0,299.666667,1591.333333,4112.0,18784.888889,6004716.0
2,2019,cost_final,True,329494387,0,0,107116600,222377787,1163.768938,71412320.0,0.0,0.0,0.0,0.0,0.0,36.0,295.156714,1645.0,4418.0,20391.080222,7113644.0


In [70]:
sql = f"""  
    {get_dist_cts('cost_impute_adj', f"{rx_cost}_by_bene_year_age", group_by_list = ['year'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,YEAR,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,2017,cost_impute_adj,True,373636230,0,0,100230516,273405714,1183.759938,68537480.0,0.0,0.0,0.0,0.0,0.0,60.75,413.25,1989.701515,4641.083333,19441.128788,11001560.0
1,2018,cost_impute_adj,True,347387678,0,0,92691562,254696116,1219.284356,70403990.0,0.0,0.0,0.0,0.0,0.0,59.2,372.760317,1869.891941,4709.382341,21023.0,6004716.0
2,2019,cost_impute_adj,True,329494387,0,0,83048836,246445551,1334.708511,82713260.0,0.0,0.0,0.0,0.0,0.0,63.0,381.047619,2011.0,5160.266667,23514.045509,7113644.0


In [71]:
sql = f"""  
    {get_dist_cts('n_claim', f"{rx_cost}_by_bene_year_age", group_by_list = ['year'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,YEAR,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,2017,n_claim,True,373636230,0,0,0,373636230,13.915747,604.99801,1,1.0,1.0,1.0,1.0,4.0,16.0,38.0,60.0,119.0,1755
1,2018,n_claim,True,347387678,0,0,0,347387678,13.696317,599.80333,1,1.0,1.0,1.0,1.0,4.0,15.0,37.0,58.0,118.0,1506
2,2019,n_claim,True,329494387,0,0,0,329494387,14.063843,618.008235,1,1.0,1.0,1.0,1.0,4.0,16.0,38.0,59.0,119.0,2930


In [72]:
sql = f"""  
    {get_dist_cts('cost_final', f"{rx_cost}_by_bene_year_age", group_by_list = ['age_cat'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,AGE_CAT,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,1. 0-10,cost_final,True,165968552,0,0,71096194,94872358,246.886674,18011120.0,0.0,0.0,0.0,0.0,0.0,8.0,81.0,355.933333,794.714646,2938.383333,6004716.0
1,2. 10-20,cost_final,True,178356715,0,0,77300039,101056676,491.468272,46383450.0,0.0,0.0,0.0,0.0,0.0,7.6,138.711111,714.333333,1604.983333,5920.872904,6381068.0
2,3. 20-30,cost_final,True,142312016,0,0,52898883,89413133,684.066084,59255560.0,0.0,0.0,0.0,0.0,0.0,18.2,177.666667,867.0,2017.695833,11230.408883,9033226.0
3,4. 30-40,cost_final,True,134725798,0,0,43387739,91338059,994.249819,62719300.0,0.0,0.0,0.0,0.0,0.0,32.0,242.0,1288.51,3276.589762,19109.031996,5043713.0
4,5. 40-50,cost_final,True,131984704,0,0,37374102,94610602,1400.622002,74581300.0,0.0,0.0,0.0,0.0,0.0,57.5,393.409091,2236.222222,5500.690476,27474.55129,8396428.0
5,6. 50-60,cost_final,True,154516793,0,0,35237680,119279113,1942.874352,96217100.0,0.0,0.0,0.0,0.0,4.25,119.0,727.0,3871.0,8243.0,34836.9,6965014.0
6,7. 60-70,cost_final,True,97197067,0,0,18654415,78542652,2242.437058,119008300.0,0.0,0.0,0.0,0.0,17.0,210.666667,1117.0,4971.0,9374.704667,34385.0,11001560.0
7,8. 70-80,cost_final,True,30647836,0,0,5794394,24853442,2219.233977,103044200.0,0.0,0.0,0.0,0.0,31.4,319.035714,1579.0,5348.0,8994.0,25119.92863,8035088.0
8,9. 80+,cost_final,True,14808814,0,0,3140796,11668018,1869.915055,61224620.0,0.0,0.0,0.0,0.0,17.7,314.285714,1532.138667,4951.0,7625.609524,17958.91577,5706694.0


In [73]:
sql = f"""  
    {get_dist_cts('cost_impute_adj', f"{rx_cost}_by_bene_year_age", group_by_list = ['age_cat'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,AGE_CAT,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,1. 0-10,cost_impute_adj,True,165968552,0,0,55721509,110247043,299.86688,21383990.0,0.0,0.0,0.0,0.0,0.0,18.0,122.133333,455.235589,990.5,3324.991398,6004716.0
1,2. 10-20,cost_impute_adj,True,178356715,0,0,64211275,114145440,571.950955,53792800.0,0.0,0.0,0.0,0.0,0.0,16.366667,186.666667,882.0,1877.0,6938.337004,6381068.0
2,3. 20-30,cost_impute_adj,True,142312016,0,0,44320073,97991943,777.734218,65999170.0,0.0,0.0,0.0,0.0,0.0,32.0,215.0,1004.633333,2307.188755,13067.518573,9033226.0
3,4. 30-40,cost_impute_adj,True,134725798,0,0,35170168,99555630,1125.921338,70716010.0,0.0,0.0,0.0,0.0,0.0,50.666667,299.8,1522.0,3833.596143,21895.0,5043193.0
4,5. 40-50,cost_impute_adj,True,131984704,0,0,29508835,102475869,1573.539939,82806490.0,0.0,0.0,0.0,0.0,4.285714,86.357143,481.166667,2633.5,6268.933333,30794.142857,8396425.0
5,6. 50-60,cost_impute_adj,True,154516793,0,0,26088976,128427817,2183.555445,107265300.0,0.0,0.0,0.0,0.0,20.0,172.190476,897.92381,4503.5,9275.08381,37960.184267,6965014.0
6,7. 60-70,cost_impute_adj,True,97197067,0,0,13649348,83547719,2495.467748,131510400.0,0.0,0.0,0.0,0.0,46.666667,283.5,1344.690476,5522.827778,10274.357143,37708.309524,11001560.0
7,8. 70-80,cost_impute_adj,True,30647836,0,0,4687518,25960318,2410.021623,110660300.0,0.0,0.0,0.0,0.0,67.0,392.166667,1794.519345,5754.9,9579.757143,26773.212323,8038303.0
8,9. 80+,cost_impute_adj,True,14808814,0,0,2613212,12195602,2025.435842,65358530.0,0.0,0.0,0.0,0.0,53.952381,388.8,1734.0,5293.0,8061.25,18690.333333,5706694.0


In [74]:
sql = f"""  
    {get_dist_cts('n_claim', f"{rx_cost}_by_bene_year_age", group_by_list = ['age_cat'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,AGE_CAT,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,1. 0-10,n_claim,True,165968552,0,0,0,165968552,4.704642,72.104386,1,1.0,1.0,1.0,1.0,2.0,5.0,12.0,18.0,38.0,2855
1,2. 10-20,n_claim,True,178356715,0,0,0,178356715,6.0984,121.7284,1,1.0,1.0,1.0,1.0,2.0,7.0,16.0,25.0,52.0,2812
2,3. 20-30,n_claim,True,142312016,0,0,0,142312016,8.467317,196.55983,1,1.0,1.0,1.0,1.0,3.0,11.0,22.0,33.0,66.0,2930
3,4. 30-40,n_claim,True,134725798,0,0,0,134725798,11.942481,400.67624,1,1.0,1.0,1.0,1.0,4.0,14.0,32.0,48.0,96.0,2809
4,5. 40-50,n_claim,True,131984704,0,0,0,131984704,16.936615,720.178061,1,1.0,1.0,1.0,1.0,7.0,21.0,45.0,67.0,129.0,2802
5,6. 50-60,n_claim,True,154516793,0,0,0,154516793,23.447899,1082.644179,1,1.0,1.0,1.0,2.0,12.0,31.0,61.0,87.0,155.0,2858
6,7. 60-70,n_claim,True,97197067,0,0,0,97197067,27.687294,1256.740684,1,1.0,1.0,1.0,4.0,16.0,38.0,69.0,96.0,165.0,1955
7,8. 70-80,n_claim,True,30647836,0,0,0,30647836,29.196876,1307.277016,1,1.0,1.0,1.0,4.0,18.0,40.0,70.0,96.0,167.0,2093
8,9. 80+,n_claim,True,14808814,0,0,0,14808814,31.361646,1540.107459,1,1.0,1.0,1.0,4.0,20.0,43.0,76.0,104.0,178.0,1008


### Summarize MEPS Cost

In [None]:
def get_meps_cost(year, type):
    yy = str(year)[-2:]
    db_schema = 'SANDBOX_KOMODO.MEPS'
    tbl = f"MEPS_{year}_{type}"
    master = f"MEPS_{year}_MASTER"
    var = f"{type}XP{yy}X"
#     MEPS_2017_RX
#     RXXP17X

    sql = f"""
        select m{yy}.dupersid, m{yy}.agelast as age, year,
            case
                when age <= 10 then '1. 0-10'
                when age <= 20 then '2. 10-20'
                when age <= 30 then '3. 20-30'
                when age <= 40 then '4. 30-40'
                when age <= 50 then '5. 40-50'
                when age <= 60 then '6. 50-60'
                when age <= 70 then '7. 60-70'
                when age <= 80 then '8. 70-80'
                when age > 80 then '9. 80+'
                else 'missing'
            end as age_cat,
            coalesce({type}_cost, 0) as {type}_cost,
            coalesce(n_claim, 0) as n_claim
        from 
        (
            select {year} as year, dupersid, sum({var}) as {type}_cost, count(*) as n_claim
            from {db_schema}.{tbl}
            group by year, dupersid
        ) c{yy}
        left join {db_schema}.{master} m{yy}
        on c{yy}.dupersid = m{yy}.dupersid
    """
    return sql

sql = f"""
    create or replace table RX_MEPS as
    {get_meps_cost(2017, 'RX')}
    union all
    {get_meps_cost(2018, 'RX')}
    union all
    {get_meps_cost(2019, 'RX')}
    ;
"""
execute_sql(sql)

In [49]:
sql = f"""  
    {get_dist_cts('rx_cost', f"RX_MEPS", group_by_list = ['year'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,YEAR,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,2017,rx_cost,True,18107,0,0,17,18090,2122.67002,55599050.0,0.0,2.35,6.006,12.42,52.47,283.6,1377.45,4589.394,8872.385,32135.146,213189.74
1,2018,rx_cost,True,18145,0,0,22,18123,2363.70698,68294310.0,0.0,2.7744,8.0,15.984,65.15,329.23,1564.31,5324.11,10105.09,32766.2284,337880.71
2,2019,rx_cost,True,17166,0,0,10,17156,2400.506174,71611200.0,0.0,2.5665,7.85,15.145,65.0,309.84,1467.1525,5504.91,10118.7275,34082.1355,234970.14


In [62]:
sql = f"""  
    {get_dist_cts('n_claim', f"RX_MEPS", group_by_list = ['year'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,YEAR,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,2017,n_claim,True,18107,0,0,0,18107,17.147346,587.806632,1,1.0,1.0,1.0,3.0,8.0,22.0,44.0,64.0,117.0,431
1,2018,n_claim,True,18145,0,0,0,18145,17.617305,576.228206,1,1.0,1.0,1.0,3.0,9.0,22.0,45.0,64.0,119.0,275
2,2019,n_claim,True,17166,0,0,0,17166,17.075906,534.957186,1,1.0,1.0,1.0,3.0,9.0,22.0,42.5,61.0,109.0,385


In [50]:
sql = f"""  
    {get_dist_cts('rx_cost', f"RX_MEPS", group_by_list = ['age_cat'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,AGE_CAT,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,1. 0-10,rx_cost,True,5358,0,0,12,5346,497.189,5194030.0,0.0,1.5457,4.0,5.9,14.295,48.985,234.045,913.311,2229.7225,7530.9047,63674.63
1,2. 10-20,rx_cost,True,4838,0,0,4,4834,1021.896618,24992580.0,0.0,1.8622,4.2255,7.747,24.3675,110.58,595.7225,2323.316,3965.2215,11963.7082,234970.14
2,3. 20-30,rx_cost,True,4434,0,0,7,4427,1103.989402,51163660.0,0.0,1.7199,4.443,8.06,24.99,107.505,439.3025,1679.121,3636.927,17100.1095,337880.71
3,4. 30-40,rx_cost,True,5827,0,0,5,5822,1630.473659,52936030.0,0.0,2.0408,5.56,10.0,32.69,138.89,649.055,2799.464,6950.052,27761.0546,234342.12
4,5. 40-50,rx_cost,True,6409,0,0,9,6400,2237.040165,76830880.0,0.0,2.6108,7.86,15.378,56.51,230.85,1113.6,4414.824,9356.922,37039.6196,184524.92
5,6. 50-60,rx_cost,True,8499,0,0,5,8494,3183.235707,102982300.0,0.0,3.8384,12.449,26.272,100.755,437.46,2078.84,7267.022,13830.799,45332.9772,219097.24
6,7. 60-70,rx_cost,True,8987,0,0,3,8984,3446.579718,95091600.0,0.0,5.0,24.0,53.59,186.49,693.78,2672.645,8084.298,14379.554,47763.1962,213189.74
7,8. 70-80,rx_cost,True,5995,0,0,2,5993,3206.622309,59482470.0,0.0,7.9904,42.46,95.142,299.135,940.62,3321.36,7562.584,11942.579,31873.7084,179969.55
8,9. 80+,rx_cost,True,3071,0,0,2,3069,2910.576789,50352440.0,0.0,6.425,50.385,109.79,314.105,961.54,3165.325,7085.69,10636.25,23084.917,192244.46


In [63]:
sql = f"""  
    {get_dist_cts('n_claim', f"RX_MEPS", group_by_list = ['age_cat'])}
    ;
"""
df = read_sql(sql)
display(df)

Unnamed: 0,AGE_CAT,VAR,FILTER,N,N_NULL,N_NEG,N_ZERO,N_POS,MEAN,VARIANCE,MIN,P_01,P_05,P_10,P_25,MEDIAN,P_75,P_90,P_95,P_99,MAX
0,1. 0-10,n_claim,True,5358,0,0,0,5358,4.681411,56.215264,1,1.0,1.0,1.0,1.0,2.0,5.0,11.0,18.0,40.0,98
1,2. 10-20,n_claim,True,4838,0,0,0,4838,7.241835,110.726701,1,1.0,1.0,1.0,1.0,3.0,9.0,18.0,27.0,51.0,141
2,3. 20-30,n_claim,True,4434,0,0,0,4434,7.652007,140.005425,1,1.0,1.0,1.0,2.0,4.0,9.0,18.0,26.0,53.67,226
3,4. 30-40,n_claim,True,5827,0,0,0,5827,11.510039,315.722307,1,1.0,1.0,1.0,2.0,5.0,13.0,28.0,44.0,93.739,229
4,5. 40-50,n_claim,True,6409,0,0,0,6409,16.737088,585.221909,1,1.0,1.0,1.0,3.0,8.0,20.0,43.0,62.0,121.0,242
5,6. 50-60,n_claim,True,8499,0,0,0,8499,22.657607,856.392049,1,1.0,1.0,2.0,5.0,12.0,29.0,56.0,80.0,139.0,385
6,7. 60-70,n_claim,True,8987,0,0,0,8987,25.011127,784.253604,1,1.0,2.0,3.0,7.0,16.0,32.0,57.0,80.0,135.0,431
7,8. 70-80,n_claim,True,5995,0,0,0,5995,25.51593,597.24178,1,1.0,2.0,4.0,9.0,18.0,34.0,55.0,74.0,119.0,226
8,9. 80+,n_claim,True,3071,0,0,0,3071,27.542494,610.429383,1,1.0,3.0,5.0,10.0,21.0,37.0,59.0,78.0,122.0,175
