In [30]:
from pathlib import Path
from pandas import DataFrame

from utils.gem import GEMTransform
import pandas as pd

dtypes = dict(icd10cm='string', icd9cm='string', flags='string', approximate=bool, no_map=bool, combination=bool,
              scenario='int8', choice_list='int8')
data_dir = Path('../../mimic4-pg/data/')

# Data Loading

In [31]:
icd10cmtoicd9gem = pd.read_csv(data_dir / 'gem/icd10cmtoicd9gem.csv', dtype=dtypes, index_col='icd10cm')
icd10pcstoicd9gem = pd.read_csv(data_dir / 'gem/icd10pcstoicd9gem.csv', dtype=dtypes, index_col='icd10cm')
icd10cmtoicd9gem.index.nunique(), icd10pcstoicd9gem.index.nunique()

(69832, 71920)

In [32]:
icd9toicd10cmgem = pd.read_csv(data_dir / 'gem/icd9toicd10cmgem.csv', dtype=dtypes, index_col='icd9cm')
icd9toicd10pcsgem = pd.read_csv(data_dir / 'gem/icd9toicd10pcsgem.csv', dtype=dtypes, index_col='icd9cm')
icd9toicd10cmgem.index.nunique(), icd9toicd10pcsgem.index.nunique()

(14567, 3878)

In [33]:
dia_codes = pd.read_csv(data_dir / 'mimic-iv-2.2/hosp/diagnoses_icd.csv.gz',
                        dtype={'icd_version': int, 'subject_id': int, 'icd_code': str},
                        usecols=('icd_version', 'subject_id', 'icd_code', 'hadm_id'))
pcs_codes = pd.read_csv(data_dir / 'mimic-iv-2.2/hosp/procedures_icd.csv.gz',
                        dtype={'icd_version': int, 'subject_id': int, 'icd_code': str},
                        usecols=('icd_version', 'subject_id', 'icd_code', 'hadm_id'))

In [34]:
# icu_stays = pd.read_csv(data_dir / 'mimic-iv-2.2/icu/icustays.csv.gz', usecols=['hadm_id', 'subject_id'])
# dia_codes = pd.merge(dia_codes, icu_stays, on=['hadm_id', 'subject_id'])
# pcs_codes = pd.merge(pcs_codes, icu_stays, on=['hadm_id', 'subject_id'])

In [35]:
# dia_desc = pd.read_csv(f'../../mimic4-pg/data/mimic-iv-2.2/hosp/d_icd_diagnoses.csv.gz',
#                        dtype={'icd_version': int, 'subject_id': int, 'icd_code': str}, index_col='icd_code')
# pro_desc = pd.read_csv(f'../../mimic4-pg/data/mimic-iv-2.2/hosp/d_icd_procedures.csv.gz',
#                        dtype={'icd_version': int, 'subject_id': int, 'icd_code': str}, index_col='icd_code')

# Mapping Types for each Code

In [36]:
def split_single_scenario_to_ident_approx_comb(df: pd.DataFrame, icd_col: str, suffix: str):
    comb = df['combination'].groupby(icd_col, dropna=False).max().rename('comb' + suffix)
    approx = (df['approximate'].groupby(icd_col, dropna=False).max() & ~comb).rename('approx' + suffix)
    identical = (~approx & ~comb).rename('ident' + suffix)
    assert not any(approx & comb & identical)
    assert all(approx | comb | identical)
    if identical.sum() != 0:
        return pd.concat([approx, comb, identical], axis=1)
    else:
        return pd.concat([approx, comb], axis=1)


BREAKDOWN_TYPES = ['ident', 'approx', 'comb', 'approx_alts', 'comb_alts', 'multi_scenario', 'no_map']
SOLVABLE_TYPES = ['ident', 'approx', 'comb']
UNSOLVABLE_TYPES = ['approx_alts', 'comb_alts', 'multi_scenario', 'no_map']


def break_down_per_code(df: pd.DataFrame):
    icd_col = df.index.name

    mappings = df[~df['no_map']]

    mask_ms = (mappings.groupby(icd_col, dropna=False).scenario.nunique() > 1).rename('multi_scenario')
    mappings_ss = mappings.loc[~mask_ms]

    options_per_choice_list = mappings_ss.groupby([icd_col, 'choice_list'], dropna=False).size()
    mask_ss_wo_alt = options_per_choice_list.groupby(icd_col, dropna=False).max() == 1

    mappings_ss_wo_alt = mappings_ss.loc[mask_ss_wo_alt]
    mappings_ss_w_alt = mappings_ss.loc[~mask_ss_wo_alt]

    codes_no_map = df['no_map'][df['no_map']]
    codes_ms = mask_ms[mask_ms]

    result = pd.concat([
        codes_no_map,
        codes_ms,
        split_single_scenario_to_ident_approx_comb(mappings_ss_wo_alt, icd_col, ''),
        split_single_scenario_to_ident_approx_comb(mappings_ss_w_alt, icd_col, '_alts')
    ], axis=1).fillna(False)

    assert all(result.any(axis=1))
    assert not any(
        result.no_map & result.multi_scenario & result.approx & result.comb & result.ident & result.approx_alts & result.comb_alts)

    return result[BREAKDOWN_TYPES]


def shares(df: pd.DataFrame):
    return pd.concat([
        df.sum().rename('total'),
        round(df.mean() * 100, 1).rename('freq')
    ], axis=1)


def break_down_per_code_with_simplified(df: pd.DataFrame):
    source_col = df.index.name
    target_col = 'icd10cm' if 'icd9cm' == source_col else 'icd9cm'
    df_4 = GEMTransform.simplify_mappings(df, 4, source_col, target_col)
    df_3 = GEMTransform.simplify_mappings(df, 3, source_col, target_col)

    bd = break_down_per_code(df)
    bd_4 = break_down_per_code(df_4)
    bd_3 = break_down_per_code(df_3)
    return {5: bd, 4: bd_4, 3: bd_3}


def shares_of_all_breakdowns(dfs: dict[int, pd.DataFrame]):
    return pd.concat((shares(df).add_suffix(f'_{num}') for num, df in dfs.items()), axis=1)

In [37]:
icd10cmtoicd9gem_bds = break_down_per_code_with_simplified(icd10cmtoicd9gem)
shares_of_all_breakdowns(icd10cmtoicd9gem_bds).loc[UNSOLVABLE_TYPES].sum()

total_5    4483.0
freq_5        6.4
total_4    2452.0
freq_4        3.5
total_3    1269.0
freq_3        1.9
dtype: float64

In [38]:
icd10pcstoicd9gem_bds = break_down_per_code_with_simplified(icd10pcstoicd9gem)
shares_of_all_breakdowns(icd10pcstoicd9gem_bds).loc[UNSOLVABLE_TYPES].sum()

total_5    5639.0
freq_5        7.8
total_4    5639.0
freq_4        7.8
total_3    3631.0
freq_3        5.1
dtype: float64

In [39]:
icd9toicd10cmgem_bds = break_down_per_code_with_simplified(icd9toicd10cmgem)
shares_of_all_breakdowns(icd9toicd10cmgem_bds).loc[UNSOLVABLE_TYPES].sum()

total_5    3298.0
freq_5       22.7
total_4    2104.0
freq_4       14.5
total_3     936.0
freq_3        6.4
dtype: float64

In [57]:
icd9toicd10pcsgem_bds = break_down_per_code_with_simplified(icd9toicd10pcsgem)
shares_of_all_breakdowns(icd9toicd10pcsgem_bds).loc[UNSOLVABLE_TYPES].sum()

total_5    3460.0
freq_5       89.2
total_4    2679.0
freq_4       69.1
total_3    1301.0
freq_3       33.5
dtype: float64

# Applied to Admissions

In [120]:
def compute_per_adm_probs_with_simpl(admissions: DataFrame, bd_dfs: dict[int, DataFrame]):
    dfs = {}
    all_shares = []
    for num, bd_df in bd_dfs.items():
        dfs[num] = pd.merge(admissions[['hadm_id', 'icd_code']], bd_df, right_index=True, left_on='icd_code')
        per_adm_shares, df = compute_per_adm_probs(dfs[num])
        all_shares.append(per_adm_shares)

    return pd.concat((per_adm_shares.add_suffix(f'_{num}') for per_adm_shares, num in zip(all_shares, bd_dfs.keys())),
                     axis=1), dfs


def compute_per_adm_probs(bd_df_with_admissions: DataFrame):
    group_by_admission = bd_df_with_admissions.groupby('hadm_id')[BREAKDOWN_TYPES]
    num_per_type = group_by_admission.sum()
    num_per_type['unsolvable'] = num_per_type[UNSOLVABLE_TYPES].sum(axis=1)
    ex1, ex2 = (num_per_type == 1).mean().rename('exact1'), (num_per_type == 2).mean().rename('exact2')
    least3 = (num_per_type >= 3).mean().rename('least3')
    least1 = (ex1 + ex2 + least3).rename('least1')
    return pd.concat([
        # round(num_per_type.sum(), 0).rename('total'),
        # bd_df_with_admissions[BREAKDOWN_TYPES].sum().rename('total').astype(int),
        least1, (ex2 + least3).rename('least2'),
        ex1, ex2, least3,
        (1 - least1).rename('none'),
        round(num_per_type.mean(), 1).rename('avg')
    ], axis=1), bd_df_with_admissions


def least_columns(df: DataFrame):
    return round(df[[col for col in df.columns if 'least' in col or 'avg' in col]] * 100, 1)


def exact_columns(df: DataFrame):
    return df[[col for col in df.columns if 'exact' in col or 'least3' in col or 'none' in col]]

In [122]:
dia10_codes = dia_codes[dia_codes.icd_version == 10]
icd10cmtoicd9adm_probs, icd10cmtoicd9adm_bd_dfs = compute_per_adm_probs_with_simpl(dia10_codes, icd10cmtoicd9gem_bds)
least_columns(icd10cmtoicd9adm_probs)

Unnamed: 0,least1_5,least2_5,least3_5,avg_5,least1_4,least2_4,least3_4,avg_4,least1_3,least2_3,least3_3,avg_3
ident,93.2,80.8,68.0,480.0,93.2,80.8,68.0,480.0,93.2,80.8,68.0,480.0
approx,96.5,88.4,77.7,530.0,97.8,90.9,80.8,560.0,98.6,93.5,86.0,670.0
comb,24.9,7.8,2.5,40.0,25.0,7.8,2.5,40.0,25.0,7.9,2.5,40.0
approx_alts,84.5,55.8,30.5,190.0,77.8,46.6,22.8,160.0,47.9,14.4,3.3,70.0
comb_alts,0.3,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.1,0.0,0.0,0.0
multi_scenario,17.3,1.3,0.0,20.0,17.3,1.3,0.0,20.0,3.9,0.0,0.0,0.0
no_map,6.7,0.6,0.4,10.0,6.7,0.6,0.4,10.0,6.7,0.6,0.4,10.0
unsolvable,85.5,60.1,37.1,220.0,81.2,51.8,29.3,190.0,53.5,18.1,5.1,80.0


In [123]:
pcs10_codes = pcs_codes[pcs_codes.icd_version == 10]
icd10pcstoicd9adm_probs, icd10pcstoicd9adm_bd_dfs = compute_per_adm_probs_with_simpl(pcs10_codes,
                                                                                     icd10pcstoicd9gem_bds)
least_columns(icd10pcstoicd9adm_probs)

Unnamed: 0,least1_5,least2_5,least3_5,avg_5,least1_4,least2_4,least3_4,avg_4,least1_3,least2_3,least3_3,avg_3
ident,2.5,0.1,0.0,0.0,2.5,0.1,0.0,0.0,2.5,0.1,0.0,0.0
approx,81.9,44.1,24.5,190.0,81.9,44.1,24.5,190.0,89.7,53.1,32.2,230.0
comb,5.1,1.0,0.2,10.0,5.1,1.0,0.2,10.0,6.7,1.4,0.4,10.0
approx_alts,53.0,22.3,8.5,90.0,53.0,22.3,8.5,90.0,35.8,8.4,2.6,50.0
comb_alts,2.2,0.3,0.1,0.0,2.2,0.3,0.1,0.0,0.5,0.0,0.0,0.0
multi_scenario,1.5,0.4,0.2,0.0,1.5,0.4,0.2,0.0,1.5,0.4,0.2,0.0
no_map,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
unsolvable,55.3,23.6,9.1,90.0,55.3,23.6,9.1,90.0,37.2,9.1,2.9,50.0


In [124]:
dia9_codes = dia_codes[dia_codes.icd_version == 9]
icd9toicd10cmadm_probs, icd9toicd10cmadm_bd_dfs = compute_per_adm_probs_with_simpl(dia9_codes, icd9toicd10cmgem_bds)
least_columns(icd9toicd10cmadm_probs)

Unnamed: 0,least1_5,least2_5,least3_5,avg_5,least1_4,least2_4,least3_4,avg_4,least1_3,least2_3,least3_3,avg_3
ident,88.1,70.6,55.6,370.0,88.1,70.6,55.6,370.0,88.1,70.6,55.6,370.0
approx,94.2,81.1,66.7,440.0,96.1,85.2,72.5,490.0,97.2,88.0,77.6,570.0
comb,2.1,0.7,0.2,0.0,2.3,0.9,0.4,0.0,2.7,0.9,0.4,0.0
approx_alts,75.9,47.5,26.1,170.0,64.5,33.1,14.6,120.0,30.2,6.2,1.1,40.0
comb_alts,1.1,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.1,0.0,0.0,0.0
multi_scenario,0.2,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
no_map,10.9,1.7,0.3,10.0,10.9,1.7,0.3,10.0,10.9,1.7,0.3,10.0
unsolvable,76.9,49.9,29.0,190.0,66.6,36.4,17.7,140.0,36.2,10.5,2.9,50.0


In [125]:
pcs9_codes = pcs_codes[pcs_codes.icd_version == 9]
icd9toicd10pcsadm_probs, icd9toicd10pcsadm_bd_dfs = compute_per_adm_probs_with_simpl(pcs9_codes,
                                                                                     icd9toicd10pcsgem_bds)
least_columns(icd9toicd10pcsadm_probs)

Unnamed: 0,least1_5,least2_5,least3_5,avg_5,least1_4,least2_4,least3_4,avg_4,least1_3,least2_3,least3_3,avg_3
ident,2.3,0.1,0.0,0.0,2.3,0.1,0.0,0.0,2.4,0.1,0.0,0.0
approx,28.4,7.8,1.4,40.0,52.9,20.9,10.9,90.0,76.7,36.6,18.8,150.0
comb,1.1,0.5,0.0,0.0,1.5,0.5,0.0,0.0,4.5,0.7,0.0,10.0
approx_alts,89.8,51.1,28.6,210.0,76.6,38.0,18.5,160.0,58.3,21.8,8.9,100.0
comb_alts,8.8,0.7,0.1,10.0,8.4,0.7,0.1,10.0,6.3,0.5,0.1,10.0
multi_scenario,2.9,0.2,0.0,0.0,2.9,0.2,0.0,0.0,1.8,0.1,0.0,0.0
no_map,8.4,5.2,4.5,20.0,8.4,5.2,4.5,20.0,8.4,5.2,4.5,20.0
unsolvable,92.6,55.5,32.7,240.0,80.1,42.9,23.6,190.0,63.5,28.0,14.6,130.0


In [46]:
def top_10_unsolvable(df: DataFrame, gem_df: DataFrame):
    any_unsolvable = df[UNSOLVABLE_TYPES].max(axis=1)
    vcs = df[any_unsolvable].icd_code.value_counts()
    return pd.merge(
        pd.concat([vcs, round(vcs / vcs.sum() * 100, 2).rename('rel')], axis=1),
        gem_df, left_index=True, right_index=True
    )

In [47]:
top_10_unsolvable(icd10cmtoicd9adm_bd_dfs[3], icd10cmtoicd9gem_bds[3])

Unnamed: 0,count,rel,ident,approx,comb,approx_alts,comb_alts,multi_scenario,no_map
F329,28787,23.92,False,False,False,True,False,False,False
I2510,25398,21.10,False,False,False,True,False,False,False
J449,11250,9.35,False,False,False,True,False,False,False
G92,4988,4.14,False,False,False,True,False,False,False
Z8249,4383,3.64,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...
Y742,1,0.00,False,False,False,False,False,False,True
R99,1,0.00,False,False,False,True,False,False,False
R899,1,0.00,False,False,False,True,False,False,False
L02619,1,0.00,False,False,False,True,False,False,False


In [48]:
top_10_unsolvable(icd10pcstoicd9adm_bd_dfs[3], icd10pcstoicd9gem_bds[3])

Unnamed: 0,count,rel,ident,approx,comb,approx_alts,comb_alts,multi_scenario,no_map
3E0G76Z,4871,13.11,False,False,False,True,False,False,False
0DJ08ZZ,3219,8.67,False,False,False,True,False,False,False
3E04305,2306,6.21,False,False,False,True,False,False,False
0KQM0ZZ,1649,4.44,False,False,False,True,False,False,False
0DJD8ZZ,1577,4.25,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...
0DJ0XZZ,1,0.00,False,False,False,True,False,False,False
06BR4ZZ,1,0.00,False,False,False,True,False,False,False
0SBL0ZZ,1,0.00,False,False,False,True,False,False,False
01Q30ZZ,1,0.00,False,False,False,True,False,False,False


In [49]:
top_10_unsolvable(icd9toicd10cmadm_bd_dfs[3], icd9toicd10cmgem_bds[3])

Unnamed: 0,count,rel,ident,approx,comb,approx_alts,comb_alts,multi_scenario,no_map
3572,8925,6.35,False,False,False,True,False,False,False
42789,8864,6.30,False,False,False,True,False,False,False
78791,7856,5.59,False,False,False,True,False,False,False
71590,7335,5.21,False,False,False,True,False,False,False
7840,5000,3.55,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...
E9480,1,0.00,False,False,False,False,False,False,True
8973,1,0.00,False,False,False,True,False,False,False
E9409,1,0.00,False,False,False,False,False,False,True
E9427,1,0.00,False,False,False,False,False,False,True


In [50]:
top_10_unsolvable(icd9toicd10pcsadm_bd_dfs[3], icd9toicd10pcsgem_bds[3])

Unnamed: 0,count,rel,ident,approx,comb,approx_alts,comb_alts,multi_scenario,no_map
3893,13928,7.12,False,False,False,True,False,False,False
3897,9843,5.03,False,False,False,False,True,False,False
8856,9043,4.62,False,False,False,True,False,False,False
0040,7158,3.66,False,False,False,False,False,False,True
5491,6245,3.19,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...
6499,1,0.00,False,False,False,True,False,False,False
8033,1,0.00,False,False,False,True,False,False,False
8699,1,0.00,False,False,False,True,False,False,False
2061,1,0.00,False,False,False,False,False,False,True


In [51]:
icd9toicd10pcsgem.loc['3897']

Unnamed: 0_level_0,icd10cm,flags,approximate,no_map,combination,scenario,choice_list
icd9cm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3897,05H533Z,10111,True,False,True,1,1
3897,05H633Z,10111,True,False,True,1,1
3897,05HM33Z,10111,True,False,True,1,1
3897,05HN33Z,10111,True,False,True,1,1
3897,05HP33Z,10111,True,False,True,1,1
3897,05HQ33Z,10111,True,False,True,1,1
3897,06HM33Z,10111,True,False,True,1,1
3897,06HN33Z,10111,True,False,True,1,1
3897,4A02X4A,10112,True,False,True,1,2
3897,B5130ZA,10112,True,False,True,1,2


In [52]:
icd9toicd10cmgem.loc['2724']

Unnamed: 0_level_0,icd10cm,flags,approximate,no_map,combination,scenario,choice_list
icd9cm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2724,E784,10000,True,False,False,0,0
2724,E785,10000,True,False,False,0,0


In [129]:
icd_shares = {
    "dia": {
        10: 41.8,
        9: 58.2,
    },
    'pro': {
        10: 33.3,
        9: 66.7
    }
}


def calculate_relative_shares(df: DataFrame, code_type: str, code_version: int):
    return round(df * icd_shares[code_type][code_version] / 100, 1)

In [137]:
calculate_relative_shares(exact_columns(icd10cmtoicd9adm_probs), 'dia', 10)

Unnamed: 0,exact1_5,exact2_5,least3_5,none_5,exact1_4,exact2_4,least3_4,none_4,exact1_3,exact2_3,least3_3,none_3
ident,5.2,5.3,28.4,2.9,5.2,5.3,28.4,2.9,5.2,5.3,28.4,2.9
approx,3.3,4.5,32.5,1.5,2.9,4.2,33.8,0.9,2.1,3.2,35.9,0.6
comb,7.1,2.2,1.0,31.4,7.2,2.2,1.0,31.4,7.2,2.2,1.0,31.3
approx_alts,12.0,10.6,12.7,6.5,13.0,10.0,9.5,9.3,14.0,4.6,1.4,21.8
comb_alts,0.1,0.0,0.0,41.7,0.1,0.0,0.0,41.7,0.1,0.0,0.0,41.7
multi_scenario,6.7,0.5,0.0,34.6,6.7,0.5,0.0,34.6,1.6,0.0,0.0,40.2
no_map,2.6,0.1,0.2,39.0,2.6,0.1,0.2,39.0,2.6,0.1,0.2,39.0
unsolvable,10.6,9.6,15.5,6.1,12.3,9.4,12.2,7.9,14.8,5.4,2.1,19.5


In [138]:
calculate_relative_shares(exact_columns(icd10pcstoicd9adm_probs), 'pro', 10)

Unnamed: 0,exact1_5,exact2_5,least3_5,none_5,exact1_4,exact2_4,least3_4,none_4,exact1_3,exact2_3,least3_3,none_3
ident,0.8,0.0,0.0,32.5,0.8,0.0,0.0,32.5,0.8,0.0,0.0,32.5
approx,12.6,6.5,8.2,6.0,12.6,6.5,8.2,6.0,12.2,7.0,10.7,3.4
comb,1.4,0.2,0.1,31.6,1.4,0.2,0.1,31.6,1.8,0.3,0.1,31.1
approx_alts,10.2,4.6,2.8,15.7,10.2,4.6,2.8,15.7,9.1,1.9,0.9,21.4
comb_alts,0.6,0.1,0.0,32.6,0.6,0.1,0.0,32.6,0.1,0.0,0.0,33.1
multi_scenario,0.3,0.1,0.1,32.8,0.3,0.1,0.1,32.8,0.3,0.1,0.1,32.8
no_map,0.0,0.0,0.0,33.3,0.0,0.0,0.0,33.3,0.0,0.0,0.0,33.3
unsolvable,10.5,4.8,3.0,14.9,10.5,4.8,3.0,14.9,9.4,2.1,1.0,20.9


In [139]:
calculate_relative_shares(exact_columns(icd9toicd10cmadm_probs), 'dia', 9)

Unnamed: 0,exact1_5,exact2_5,least3_5,none_5,exact1_4,exact2_4,least3_4,none_4,exact1_3,exact2_3,least3_3,none_3
ident,10.2,8.8,32.3,6.9,10.2,8.8,32.3,6.9,10.2,8.8,32.3,6.9
approx,7.7,8.3,38.8,3.4,6.4,7.4,42.2,2.3,5.3,6.1,45.2,1.6
comb,0.8,0.3,0.1,57.0,0.9,0.3,0.2,56.8,1.0,0.3,0.2,56.6
approx_alts,16.5,12.4,15.2,14.0,18.3,10.8,8.5,20.7,14.0,2.9,0.6,40.6
comb_alts,0.6,0.0,0.0,57.6,0.3,0.0,0.0,57.9,0.1,0.0,0.0,58.1
multi_scenario,0.1,0.0,0.0,58.1,0.0,0.0,0.0,58.1,0.0,0.0,0.0,58.2
no_map,5.4,0.8,0.2,51.9,5.4,0.8,0.2,51.9,5.4,0.8,0.2,51.9
unsolvable,15.8,12.1,16.9,13.4,17.6,10.9,10.3,19.5,15.0,4.4,1.7,37.1


In [140]:
calculate_relative_shares(exact_columns(icd9toicd10pcsadm_probs), 'pro', 9)

Unnamed: 0,exact1_5,exact2_5,least3_5,none_5,exact1_4,exact2_4,least3_4,none_4,exact1_3,exact2_3,least3_3,none_3
ident,1.5,0.0,0.0,65.2,1.5,0.0,0.0,65.2,1.6,0.0,0.0,65.1
approx,13.8,4.3,0.9,47.7,21.3,6.6,7.3,31.4,26.8,11.8,12.6,15.5
comb,0.4,0.3,0.0,66.0,0.6,0.3,0.0,65.7,2.6,0.4,0.0,63.7
approx_alts,25.8,15.0,19.1,6.8,25.7,13.0,12.3,15.6,24.4,8.6,5.9,27.8
comb_alts,5.4,0.4,0.1,60.8,5.2,0.4,0.1,61.1,3.9,0.3,0.0,62.5
multi_scenario,1.8,0.1,0.0,64.7,1.8,0.1,0.0,64.7,1.1,0.1,0.0,65.5
no_map,2.1,0.4,3.0,61.1,2.1,0.4,3.0,61.1,2.1,0.4,3.0,61.1
unsolvable,24.7,15.3,21.8,4.9,24.8,12.8,15.7,13.3,23.7,9.0,9.7,24.3
