In [25]:
## importing the libraries
import pandas as pd
import numpy as np
import os
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds

## setting display to show maximum 500 rows and columns
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

## ignoring the warnings
import warnings
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

In [26]:
## Load the data
master_data = pd.read_csv(r"../data/processed_data/master_data_for_modelling.csv")
df_features = pd.read_excel(r"C:../data/processed_data/all_features_for_modelling.xlsx")
data = master_data.iloc[:,2:]

In [27]:
master_data.head()

Unnamed: 0,BeneID,HCC,InscClaimAmtReimbursed,DeductibleAmtPaid,flag_ip,Gender,Race,RenalDiseaseIndicator,NoOfMonths_PartACov,NoOfMonths_PartBCov,ChronicCond_Alzheimer,ChronicCond_Heartfailure,ChronicCond_KidneyDisease,ChronicCond_Cancer,ChronicCond_ObstrPulmonary,ChronicCond_Depression,ChronicCond_Diabetes,ChronicCond_IschemicHeart,ChronicCond_Osteoporasis,ChronicCond_rheumatoidarthritis,ChronicCond_stroke,IPAnnualReimbursementAmt,IPAnnualDeductibleAmt,OPAnnualReimbursementAmt,OPAnnualDeductibleAmt,Age,Aged,claim_duration_num,community_institutional,Disability_condn,Benefits,medicaid_flag,Disease_intraction_DIABETES_HF,Disease_intraction_HF_CHR_LUNG,Disease_intraction_HF_KIDNEY,Disease_intraction_CHR_LUNG_CARD_RESP_FAIL,Disease_intraction_HF_HCC238,Disease_intraction_gSubUseDisorder_gPsych_,DISABLED_HF,DISABLED_ULCER_,DISABLED_CANCER,DISABLED_NEURO,DISABLED_CHR_LUNG,ACS_TOT_POP_WT_ZC,ACS_TOT_WORKER_HH_ZC,ACS_TOT_HH_ZC,WORKER_PER_HH,ACS_PCT_FEMALE_ZC,ACS_PCT_MALE_ZC,MALE_TO_FEMALE_RATIO,ACS_PCT_HH_LIMIT_ENGLISH_ZC,ACS_PCT_AGE_ABOVE65_ZC,ACS_PCT_AGE_ABOVE80_ZC,ACS_PCT_HH_NO_COMP_DEV_ZC,ACS_PCT_HH_SMARTPHONE_ZC,ACS_PCT_HH_PC_ZC,ACS_PCT_HH_NO_INTERNET_ZC,PCT_HAS_INTERNET,ACS_MEDIAN_HH_INC_ZC,ACS_PCT_INC50_ABOVE65_ZC,ACS_PCT_HEALTH_INC_BELOW137_ZC,ACS_PCT_HEALTH_INC_138_199_ZC,ACS_PCT_HEALTH_INC_200_399_ZC,ACS_PCT_HEALTH_INC_ABOVE400_ZC,ACS_PER_CAPITA_INC_ZC,ACS_PCT_COLLEGE_ASSOCIATE_DGR_ZC,ACS_PCT_BACHELOR_DGR_ZC,ACS_PCT_HS_GRADUATE_ZC,ACS_PCT_LT_HS_ZC,ACS_PCT_POSTHS_ED_ZC,ACS_PCT_HH_ABOVE65_ZC,ACS_PCT_HH_ALONE_ABOVE65_ZC,CEN_POPDENSITY_ZC,ACS_PCT_HU_NO_VEH_ZC,ACS_PCT_PUBL_TRANSIT_ZC,ACS_PCT_TAXICAB_2WORK_ZC,AVG_DIST_TO_MEDI_CARE,ACS_PCT_MEDICARE_ONLY_ZC,PCT_ANY_OTHER_INSUR,ACS_PCT_UNINSURED_ZC,disability,other_disease_mean_hcc_factor
0,BENE100001,238,10,0.0,out,0,0,0,12,12,0,1,1,1,1,1,1,0,0,1,0,0,0,2530,540,71.0,1,0,Community,Non_disabled,NonDual,0,0,0,0,0,0,0,0,0,0,0,0,25026.0,15495.0,13311.0,1.0,50.99,49.01,0.961169,7.12,12.64,4.02,5.89,88.64,87.75,5.66,94.34,96787.0,1.52,15.5,4.33,16.55,63.62,91461.0,10.28,43.84,9.95,7.56,82.49,19.61,13.25,40274.24,84.82,53.51,7.63,0.82,3.74,74.39,2.57,1,0.299
1,BENE100001,238,10,0.0,out,0,0,0,12,12,0,1,1,1,1,1,1,0,0,1,0,0,0,2530,540,70.0,1,0,Community,Non_disabled,NonDual,0,0,0,0,0,0,0,0,0,0,0,0,15608.0,7097.0,6171.0,1.0,46.71,53.29,1.140869,2.19,20.71,4.7,6.42,83.28,85.58,6.68,93.32,82842.0,3.39,8.23,7.21,22.53,62.03,42002.0,27.73,18.68,27.89,9.32,62.79,36.91,16.04,962.43,4.1,4.67,2.16,6.015,5.45,76.29,2.51,1,0.299
2,BENE100170,238,20,0.0,out,1,0,0,12,12,1,0,1,1,1,0,0,0,0,1,0,0,0,640,350,62.0,0,0,Community,Non_originally_disabled,FBDual,1,0,0,0,0,1,0,1,0,0,0,0,42140.0,21131.0,16265.0,1.0,52.24,47.76,0.914242,2.45,18.24,5.96,6.19,81.92,84.25,6.69,93.31,86155.0,1.93,10.8,5.32,25.48,58.41,43076.0,28.96,19.78,22.33,6.76,70.91,34.65,11.97,1303.57,4.45,5.58,0.92,7.1075,4.61,74.23,3.12,1,0.537
3,BENE100170,238,20,0.0,out,1,0,0,12,12,1,0,1,1,1,0,0,0,0,1,0,0,0,640,350,62.0,0,0,Community,Non_originally_disabled,NonDual,0,0,0,0,0,1,0,1,0,0,0,0,15870.0,5376.0,5962.0,1.0,47.07,52.93,1.124495,0.82,18.06,5.27,11.41,73.06,73.53,13.08,86.92,53915.0,1.81,28.87,8.23,33.02,29.88,25745.0,31.03,10.89,33.01,16.9,50.1,31.77,13.54,155.17,13.35,0.06,1.86,26.0025,4.01,52.4,6.99,1,0.442
4,BENE100170,226,20,0.0,out,1,0,0,12,12,1,0,1,1,1,0,0,0,0,1,0,0,0,640,350,62.0,0,0,Community,Non_originally_disabled,NonDual,0,0,0,0,0,1,0,1,0,0,0,0,29416.0,14084.0,11006.0,1.0,50.99,49.01,0.961169,0.64,16.31,3.86,8.82,79.91,78.9,9.82,90.18,68949.0,1.17,15.1,8.77,29.91,46.22,35337.0,32.32,17.34,30.12,8.11,61.76,32.5,13.16,1910.62,11.08,4.19,1.13,5.21875,3.18,63.61,3.8,1,0.296


In [28]:
## defining and converting columns into categorical columns
categorical_cols = ['flag_ip','Gender','Race', 'RenalDiseaseIndicator','ChronicCond_Alzheimer','ChronicCond_Heartfailure','ChronicCond_KidneyDisease','ChronicCond_Cancer',
'ChronicCond_ObstrPulmonary','ChronicCond_Depression','ChronicCond_Diabetes','ChronicCond_IschemicHeart','ChronicCond_Osteoporasis',
'ChronicCond_rheumatoidarthritis','ChronicCond_stroke','Aged','community_institutional','Disability_condn','Benefits',
'medicaid_flag','Disease_intraction_DIABETES_HF','Disease_intraction_HF_CHR_LUNG','Disease_intraction_HF_KIDNEY','Disease_intraction_CHR_LUNG_CARD_RESP_FAIL',
'Disease_intraction_HF_HCC238','DISABLED_HF','DISABLED_ULCER_','DISABLED_CANCER','DISABLED_CHR_LUNG','disability']
data[categorical_cols] = data[categorical_cols].astype(str)

In [29]:
## selecting improtant features from all_features_for_modelling excel file
selected_features = list(df_features[df_features["selection_for_cosinesimilarity"]==1]["Features"])
data = data[selected_features]

In [30]:
## One-hot encoding and standard scalling

# Define the column transformer
ct = make_column_transformer(
    (StandardScaler(), data.select_dtypes(include=['int', 'float']).columns.tolist()),
    (OneHotEncoder(drop = "first"), data.select_dtypes(include=['object']).columns.tolist()))

# Define the pipeline
pipeline = make_pipeline(ct)

# Transform the data
transformed_data = pipeline.fit_transform(data)

# Get the names of the encoded columns
encoded_columns = pipeline.named_steps['columntransformer'].transformers_[1][1].get_feature_names(data.select_dtypes(include=['object']).columns.tolist())

# Combine the column names
column_names = data.select_dtypes(include=['int', 'float']).columns.tolist() + encoded_columns.tolist()

# Convert the transformed data to a DataFrame with column names
transformed_data_df = pd.DataFrame(transformed_data, columns=column_names)

In [31]:
## filling the missing values with median
transformed_data_df = transformed_data_df.fillna(transformed_data_df.median())

## adding Beneficiary Id in transformed data
transformed_data_df = pd.concat([master_data[['BeneID']] , transformed_data_df], axis = 1)

In [32]:
## checking head of transformed data
transformed_data_df.head()

Unnamed: 0,BeneID,InscClaimAmtReimbursed,Age,claim_duration_num,ACS_TOT_POP_WT_ZC,ACS_TOT_WORKER_HH_ZC,ACS_PCT_AGE_ABOVE65_ZC,ACS_PCT_HH_NO_INTERNET_ZC,ACS_MEDIAN_HH_INC_ZC,ACS_PCT_LT_HS_ZC,CEN_POPDENSITY_ZC,ACS_PCT_HU_NO_VEH_ZC,ACS_PCT_PUBL_TRANSIT_ZC,ACS_PCT_MEDICARE_ONLY_ZC,PCT_ANY_OTHER_INSUR,ACS_PCT_UNINSURED_ZC,other_disease_mean_hcc_factor,flag_ip_out,Gender_1,Race_1,Race_3,Race_5,RenalDiseaseIndicator_Y,ChronicCond_Alzheimer_1,ChronicCond_Heartfailure_1,ChronicCond_KidneyDisease_1,ChronicCond_Cancer_1,ChronicCond_ObstrPulmonary_1,ChronicCond_Depression_1,ChronicCond_Diabetes_1,ChronicCond_IschemicHeart_1,ChronicCond_Osteoporasis_1,ChronicCond_rheumatoidarthritis_1,ChronicCond_stroke_1,community_institutional_Institutional,Disability_condn_Non_originally_disabled,Disability_condn_Originally_disabled,Benefits_NonDual,Benefits_PBDual,Benefits_nan,medicaid_flag_1,Disease_intraction_DIABETES_HF_1,Disease_intraction_HF_CHR_LUNG_1,Disease_intraction_HF_KIDNEY_1,Disease_intraction_CHR_LUNG_CARD_RESP_FAIL_1,Disease_intraction_HF_HCC238_1,DISABLED_HF_1,DISABLED_CANCER_1,DISABLED_CHR_LUNG_1
0,BENE100001,-0.43577,-0.250163,-0.524643,0.008125,0.402713,-0.662303,-0.876816,1.144899,-0.470977,3.397172,6.628004,4.599712,-0.718548,0.937951,-1.040763,-0.297207,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,BENE100001,-0.43577,-0.326179,-0.524643,-0.482862,-0.491354,0.517964,-0.764611,0.63753,-0.237866,-0.249347,-0.406342,0.038686,-0.134706,1.066527,-1.050773,-0.297207,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,BENE100170,-0.434488,-0.93431,-0.524643,0.900328,1.002733,0.156717,-0.763511,0.758069,-0.576937,-0.217703,-0.375841,0.123668,-0.421506,0.927124,-0.94901,0.108717,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
3,BENE100170,-0.434488,-0.93431,-0.524643,-0.469203,-0.674576,0.130392,-0.060582,-0.414937,0.766103,-0.324227,0.399749,-0.391829,-0.626363,-0.550142,-0.303397,-0.053312,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,BENE100170,-0.434488,-0.93431,-0.524643,0.236989,0.252495,-0.125552,-0.419197,0.132053,-0.39813,-0.161394,0.20193,-0.00614,-0.909748,0.208454,-0.835569,-0.302324,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0


## cosine similairty

#### user_user simialrity

In [33]:
## selecting train and test samples

## groupping data at beneficiary level and finding the median of each value at member level.
user_vars = transformed_data_df.groupby('BeneID').median()
df_user_vars_train = user_vars.iloc[:,:]
df_user_vars_test = user_vars.iloc[:,:]

In [34]:
## defining the class of recommendation model
class CosineRecommendation:

    def __init__(self, rank=50):
        self.rank = rank
        self.U = None
        self.S = None
        self.VT = None
        self.mape = None
        self.labels = None
 
    def fit(self, df):
        
        # storing index in labels
        self.labels = df.index

        # Perform matrix factorization using SVD with rank k
        self.U, self.S, self.VT = svds(csr_matrix(df), k=self.rank)
        
    def SVD_conversion_MAPE(self,df):

        # converting df into sparsh matrix
        df_spars_actual = csr_matrix(df).toarray()
        
        # Perform matrix factorization using SVD with rank k
        self.U, self.S, self.VT = svds(csr_matrix(df), k=self.rank)
        
        # Reconstruct the cosine similarity matrix using the low-rank approximation
        df_sparse_low_rank = np.dot(self.U, np.dot(np.diag(self.S), self.VT))
        
        # calculate mean absolute percentage error
        numerator = np.sum(np.abs(df_spars_actual - df_sparse_low_rank))
        denominator = np.sum(df_spars_actual)
        if denominator != 0:
            mape = 100 / denominator * numerator
        else:
            mape = np.nan

    def predict(self, new_data, top_recommendations = 5):
        
        '''
            new_data: panadas DataFrame which needs to be predcited
            top_recommendations :if top_recommendation is in between 0 to 1 then it will take the recommendation,
                                whoes simialrity is greater than top_recommednation.
                                if top_recommednation is greater than 1 then it will take the top_recommended recommendations
                                bydefault it will take top 5 recommendation
                                
                                default = 5
        '''
        
        # Reconstruct the cosine similarity matrix using the low-rank approximation
        df_sparse_low_rank = np.dot(self.U, np.dot(np.diag(self.S), self.VT))
        
        # Compute cosine similarity between the new datapoint and all the other datapoints in the training data
        cosine_sim_predicted_sparse = csr_matrix(cosine_similarity(df_sparse_low_rank, new_data)).toarray()

        # Clip the cosine similarity matrix to range between -1 and 1
        cosine_sim_predicted_sparse = np.clip(cosine_sim_predicted_sparse, -1, 1)

        # add labels in the data
        cosine_sim_predicted_pandas = pd.DataFrame(cosine_sim_predicted_sparse)
        
        #creating final recommendations
        final_recommendation = pd.DataFrame(columns = ['Target_Beneficiary', 'Similar_Beneficiary', 'Similarity_score'])
        
        for i in range(cosine_sim_predicted_pandas.shape[1]):
            
            if (max(cosine_sim_predicted_pandas.iloc[:,1]) < 0.8) & (top_recommendations < 1):
                continue
            else:
                pass
                
            temp_df = cosine_sim_predicted_pandas.iloc[:,[i]].rename(columns={i:"Similarity_score"})
            temp_df = pd.concat([pd.DataFrame(self.labels),temp_df],axis=1)
            temp_df = temp_df.sort_values("Similarity_score",ascending=False)
            temp_df = temp_df.reset_index(drop=True).rename(columns={"BeneID":"Similar_Beneficiary"})
            temp_df["Target_Beneficiary"] = new_data.index[i]
            temp_df = temp_df[["Target_Beneficiary","Similar_Beneficiary","Similarity_score"]]
            temp_df = temp_df[temp_df["Target_Beneficiary"]!=temp_df["Similar_Beneficiary"]]
            
            if (top_recommendations > 0) & (top_recommendations < 1):
                temp_df = temp_df[(temp_df["Similarity_score"].astype(float)) > top_recommendations]
                
            elif top_recommendations != 5:
                temp_df = temp_df.head(top_recommendations)
            
            else:
                temp_df = temp_df.head()

            ## appending temp_df in final_recommendations
            final_recommendation = final_recommendation.append(temp_df, ignore_index = True)

        return final_recommendation


In [35]:
%%time
# Create an instance of the CosineRecommendation class with rank k
recommendation_model = CosineRecommendation(rank=45)

## checking the MAPE of SVD factorization
recommendation_model.SVD_conversion_MAPE(df_user_vars_train.head(10000))

# Fit the model on the input dataframe
recommendation_model.fit(df_user_vars_train)

CPU times: total: 2.14 s
Wall time: 1.1 s


In [36]:
%%time
# defining the top recommendations
top_recommendation = 0.8  ## select all simialr beneficiaries whoes similairty score is greater than 0.8
number_of_test_split = 500  ## split the test data into 500 sub datasets. it will reduce the memroy usage

# splitting the test set into 
splitted_datasets = np.array_split(df_user_vars_test, number_of_test_split, axis=0)

# creating empty pataframe if all recommendation
all_recommendation = pd.DataFrame(columns = ['Target_Beneficiary', 'Similar_Beneficiary', 'Similarity_score'])

# finding predictions of all spillted test sets
for i in range(len(splitted_datasets)):
    recommendations = (recommendation_model.predict(splitted_datasets[i], top_recommendations = top_recommendation))
    all_recommendation = all_recommendation.append(recommendations, ignore_index = True)

CPU times: total: 2h 45min 17s
Wall time: 2h 42min 11s


In [37]:
## creating target_HCC and similar_HCC columns

df_BeneID_HCC_list = master_data[["BeneID","HCC"]].groupby("BeneID").agg(lambda x: set(x.to_list())).reset_index()
all_recommendation = all_recommendation.merge(df_BeneID_HCC_list, how = "left", right_on="BeneID", left_on = "Target_Beneficiary").drop(columns=["BeneID"]).rename(columns = {"HCC": "Target_HCC"})
all_recommendation = all_recommendation.merge(df_BeneID_HCC_list, how = "left", right_on="BeneID", left_on = "Similar_Beneficiary").drop(columns=["BeneID"]).rename(columns = {"HCC": "Simialr_HCC"})

In [38]:
## finding common HCC between target hcc and simialr hcc
all_recommendation['common_HCC'] = [x[0].intersection(x[1]) for x in zip(all_recommendation['Target_HCC'], all_recommendation['Simialr_HCC'])]
## finding common HCC counts
all_recommendation['common_HCC_count'] = [len(x) for x in all_recommendation['common_HCC']]

In [39]:
## sorting the recommendations based of firstly common_hcc_count and secondly similairty score
all_recommendation.sort_values(["Target_Beneficiary","common_HCC_count","Similarity_score"],ascending=[True, False,False],inplace=True)
all_recommendation = all_recommendation.groupby('Target_Beneficiary').apply(lambda x: x.head())
all_recommendation = all_recommendation.drop(columns=["Target_Beneficiary"]).reset_index().drop(columns=["level_1","common_HCC_count"])

In [40]:
## creating individual recommendations by individual similar service for target services and converting it into a list
all_recommendation["individual_recommended_HCC"] = all_recommendation["Simialr_HCC"] - all_recommendation["Target_HCC"]

In [41]:
## combining individual recommended HCC's at target beneficiary level and removing the duplicates.
## putting the HCC's by frequency counts
from collections import OrderedDict
df_combined_recommedned = all_recommendation[["Target_Beneficiary","individual_recommended_HCC"]].explode(
                                                column = "individual_recommended_HCC").dropna().groupby(
                                                "Target_Beneficiary").agg(lambda x: x.to_list()).rename(
                                                    columns={"individual_recommended_HCC":"combined_recommedned_HCC"}).reset_index()
# Function to sort and remove duplicates from the list
def sort_and_remove_duplicates(lst):
    lst = sorted(lst, key=lst.count, reverse=True)
    return list(OrderedDict.fromkeys(lst))

# Apply the function to the column containing lists
df_combined_recommedned['combined_recommedned_HCC'] = df_combined_recommedned['combined_recommedned_HCC'].apply(sort_and_remove_duplicates)
all_recommendation = all_recommendation.merge(df_combined_recommedned,how='left',on='Target_Beneficiary')
all_recommendation = all_recommendation.dropna(subset='combined_recommedned_HCC').reset_index(drop=True)

In [42]:
# Define a lambda function to slice the lists
trimmed_recommendations = 5
trim_list = lambda x: x[:trimmed_recommendations]

# Apply the lambda function to the column of lists
all_recommendation['combined_recommedned_HCC_trimmed'] = all_recommendation['combined_recommedned_HCC'].apply(trim_list)

In [43]:
## merging beneficiary level data on Target_Beneficiary to calculate RAF score
RAF_data_beneficiary_level = master_data[['BeneID', 'Gender', 'Age', 'Aged', 'community_institutional',
                                         'Disability_condn', 'disability',
                                           'Benefits', 'medicaid_flag', 'Disease_intraction_DIABETES_HF',
                                           'Disease_intraction_HF_CHR_LUNG', 'Disease_intraction_HF_KIDNEY',
                                           'Disease_intraction_CHR_LUNG_CARD_RESP_FAIL',
                                           'Disease_intraction_HF_HCC238', 'DISABLED_HF', 'DISABLED_ULCER_',
                                           'DISABLED_CANCER', 'DISABLED_CHR_LUNG']].drop_duplicates(subset=["BeneID"]
                                                                                                   ).reset_index(drop=True)
all_recommendation = pd.merge(all_recommendation,RAF_data_beneficiary_level, how='left',left_on='Target_Beneficiary',right_on='BeneID').drop(columns=["BeneID"])

In [44]:
## exporting the dataset
all_recommendation.to_csv("../data/processed_data/all_recommendations_0.8_top5.csv",index=False)

In [45]:
# exporting the trained model to a file using pickle
import pickle
with open('../models/Recommendations_model.pkl', 'wb') as f:
    pickle.dump(recommendation_model, f)

In [46]:
all_recommendation.head(25)

Unnamed: 0,Target_Beneficiary,Similar_Beneficiary,Similarity_score,Target_HCC,Simialr_HCC,common_HCC,individual_recommended_HCC,combined_recommedned_HCC,combined_recommedned_HCC_trimmed,Gender,Age,Aged,community_institutional,Disability_condn,disability,Benefits,medicaid_flag,Disease_intraction_DIABETES_HF,Disease_intraction_HF_CHR_LUNG,Disease_intraction_HF_KIDNEY,Disease_intraction_CHR_LUNG_CARD_RESP_FAIL,Disease_intraction_HF_HCC238,DISABLED_HF,DISABLED_ULCER_,DISABLED_CANCER,DISABLED_CHR_LUNG
0,BENE100001,BENE131042,0.877307,"{152, 238}","{226, 228, 37, 326, 201, 238, 213, 22, 152}","{152, 238}","{226, 228, 37, 326, 201, 213, 22}","[326, 226, 228, 37, 201, 213, 22, 229, 38, 18,...","[326, 226, 228, 37, 201]",0,71.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0
1,BENE100001,BENE78294,0.856599,"{152, 238}","{229, 38, 238, 18, 152, 23, 280, 409, 93}","{152, 238}","{229, 38, 18, 23, 280, 409, 93}","[326, 226, 228, 37, 201, 213, 22, 229, 38, 18,...","[326, 226, 228, 37, 201]",0,71.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0
2,BENE100001,BENE102051,0.819628,"{152, 238}","{2, 36, 326, 238, 152, 127}","{152, 238}","{2, 36, 326, 127}","[326, 226, 228, 37, 201, 213, 22, 229, 38, 18,...","[326, 226, 228, 37, 201]",0,71.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0
3,BENE100001,BENE99314,0.816088,"{152, 238}","{152, 153, 238, 63}","{152, 238}","{153, 63}","[326, 226, 228, 37, 201, 213, 22, 229, 38, 18,...","[326, 226, 228, 37, 201]",0,71.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0
4,BENE100001,BENE79348,0.971816,"{152, 238}",{238},{238},{},"[326, 226, 228, 37, 201, 213, 22, 229, 38, 18,...","[326, 226, 228, 37, 201]",0,71.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0
5,BENE100002,BENE59415,0.833895,"{38, 264, 137, 201, 238, 19, 151, 280, 127}","{226, 2, 37, 38, 109, 238, 463, 78, 401, 19, 2...","{19, 38, 238}","{226, 2, 37, 109, 78, 463, 401, 213, 282, 253}","[37, 109, 463, 226, 2, 78, 401, 213, 282, 253,...","[37, 109, 463, 226, 2]",1,72.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0
6,BENE100002,BENE22625,0.813009,"{38, 264, 137, 201, 238, 19, 151, 280, 127}","{37, 38, 229, 298, 402, 21, 151, 280}","{280, 38, 151}","{229, 37, 298, 402, 21}","[37, 109, 463, 226, 2, 78, 401, 213, 282, 253,...","[37, 109, 463, 226, 2]",1,72.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0
7,BENE100002,BENE120886,0.826233,"{38, 264, 137, 201, 238, 19, 151, 280, 127}","{280, 109, 238, 463}","{280, 238}","{109, 463}","[37, 109, 463, 226, 2, 78, 401, 213, 282, 253,...","[37, 109, 463, 226, 2]",1,72.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0
8,BENE100002,BENE101706,0.81941,"{38, 264, 137, 201, 238, 19, 151, 280, 127}","{37, 38, 127}","{38, 127}",{37},"[37, 109, 463, 226, 2, 78, 401, 213, 282, 253,...","[37, 109, 463, 226, 2]",1,72.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0
9,BENE100002,BENE127425,0.807181,"{38, 264, 137, 201, 238, 19, 151, 280, 127}","{23, 38, 238}","{38, 238}",{23},"[37, 109, 463, 226, 2, 78, 401, 213, 282, 253,...","[37, 109, 463, 226, 2]",1,72.0,1,Community,Non_disabled,1,NonDual,0,0,0,0,0,0,0,0,0,0


In [47]:
len(all_recommendation)

528641