In [1]:
import pandas as pd
import glob
import os
import numpy as np

## Health data for Days in Hospital, women

* prep in chunks of seven files. Seven because the next seven files contain the exact same categories that must be added, but are just files for different municipalities.
* Remove duplicate rows and keep first, as each file contains a row for municipality_code = 0, which is Total Denmark Average. As we only need this row one time, we remove the duplicates
* Create new columns depending on the file chunk.

In [2]:
# Input all csv files at once
path = "Health_DaysInHospital_Women/"
all_files_health_days_w = glob.glob(path + "/*.csv")
# sort all files by date modified
all_files_health_days_w.sort(key=os.path.getmtime)

li_mapper_health_days_w = map(lambda filename: pd.read_csv(filename, index_col=None, header=0), all_files_health_days_w)
file_list_health_days_w = list(li_mapper_health_days_w)

In [3]:
# Women, age 18-29, Danish, No higher education
healthDaysW_1 = pd.concat(file_list_health_days_w[0:7], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_1.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_1['gender'] = 'Women'
healthDaysW_1['age'] = '18-29'
healthDaysW_1['ethnicity'] = 'Danish'
healthDaysW_1['education'] = 'No higher education'

# Women, age 18-29, Danish, Higher education
healthDaysW_2 = pd.concat(file_list_health_days_w[7:14], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_2.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_2['gender'] = 'Women'
healthDaysW_2['age'] = '18-29'
healthDaysW_2['ethnicity'] = 'Danish'
healthDaysW_2['education'] = 'Higher education'

# Women, age 18-29, Immigrants and descendants, No higher education
healthDaysW_3 = pd.concat(file_list_health_days_w[14:21], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_3.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_3['gender'] = 'Women'
healthDaysW_3['age'] = '18-29'
healthDaysW_3['ethnicity'] = 'Immigrants and descendants'
healthDaysW_3['education'] = 'No higher education'

# Women, age 18-29, Immigrants and descendants, Higher education
healthDaysW_4 = pd.concat(file_list_health_days_w[21:28], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_4.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_4['gender'] = 'Women'
healthDaysW_4['age'] = '18-29'
healthDaysW_4['ethnicity'] = 'Immigrants and descendants'
healthDaysW_4['education'] = 'Higher education'

# Women, age 18-29, Non-western immigrants and descendants, No higher education
healthDaysW_5 = pd.concat(file_list_health_days_w[28:35], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_5.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_5['gender'] = 'Women'
healthDaysW_5['age'] = '18-29'
healthDaysW_5['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_5['education'] = 'No higher education'

# Women, age 18-29, Non-western immigrants and descendants, Higher education
healthDaysW_6 = pd.concat(file_list_health_days_w[35:42], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_6.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_6['gender'] = 'Women'
healthDaysW_6['age'] = '18-29'
healthDaysW_6['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_6['education'] = 'Higher education'

# Women, age 30-39, Danish, No higher education
healthDaysW_7 = pd.concat(file_list_health_days_w[42:49], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_7.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_7['gender'] = 'Women'
healthDaysW_7['age'] = '30-39'
healthDaysW_7['ethnicity'] = 'Danish'
healthDaysW_7['education'] = 'No higher education'

# Women, age 30-39, Danish, Higher education
healthDaysW_8 = pd.concat(file_list_health_days_w[49:56], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_8.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_8['gender'] = 'Women'
healthDaysW_8['age'] = '30-39'
healthDaysW_8['ethnicity'] = 'Danish'
healthDaysW_8['education'] = 'Higher education'

# Women, age 30-39, Immigrants and descendants, No higher education
healthDaysW_9 = pd.concat(file_list_health_days_w[56:63], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_9.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_9['gender'] = 'Women'
healthDaysW_9['age'] = '30-39'
healthDaysW_9['ethnicity'] = 'Immigrants and descendants'
healthDaysW_9['education'] = 'No higher education'

# Women, age 30-39, Immigrants and descendants, Higher education
healthDaysW_10 = pd.concat(file_list_health_days_w[63:70], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_10.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_10['gender'] = 'Women'
healthDaysW_10['age'] = '30-39'
healthDaysW_10['ethnicity'] = 'Immigrants and descendants'
healthDaysW_10['education'] = 'Higher education'

# Women, age 30-39, Non-western immigrants and descendants, No higher education
healthDaysW_11 = pd.concat(file_list_health_days_w[70:77], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_11.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_11['gender'] = 'Women'
healthDaysW_11['age'] = '30-39'
healthDaysW_11['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_11['education'] = 'No higher education'

# Women, age 30-39, Immigrants and descendants, Higher education
healthDaysW_12 = pd.concat(file_list_health_days_w[77:84], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_12.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_12['gender'] = 'Women'
healthDaysW_12['age'] = '30-39'
healthDaysW_12['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_12['education'] = 'Higher education'

# Women, age 40-49, Danish, No higher education
healthDaysW_13 = pd.concat(file_list_health_days_w[84:91], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_13.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_13['gender'] = 'Women'
healthDaysW_13['age'] = '40-49'
healthDaysW_13['ethnicity'] = 'Danish'
healthDaysW_13['education'] = 'No higher education'

# Women, age 40-49, Danish, Higher education
healthDaysW_14 = pd.concat(file_list_health_days_w[91:98], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_14.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_14['gender'] = 'Women'
healthDaysW_14['age'] = '40-49'
healthDaysW_14['ethnicity'] = 'Danish'
healthDaysW_14['education'] = 'Higher education'

# Women, age 40-49, Immigrants and descendants, No higher education
healthDaysW_15 = pd.concat(file_list_health_days_w[98:105], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_15.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_15['gender'] = 'Women'
healthDaysW_15['age'] = '40-49'
healthDaysW_15['ethnicity'] = 'Immigrants and descendants'
healthDaysW_15['education'] = 'No higher education'

# Women, age 40-49, Immigrants and descendants, Higher education
healthDaysW_16 = pd.concat(file_list_health_days_w[105:112], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_16.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_16['gender'] = 'Women'
healthDaysW_16['age'] = '40-49'
healthDaysW_16['ethnicity'] = 'Immigrants and descendants'
healthDaysW_16['education'] = 'Higher education'

# Women, age 40-49, Non-western immigrants and descendants, No higher education
healthDaysW_17 = pd.concat(file_list_health_days_w[112:119], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_17.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_17['gender'] = 'Women'
healthDaysW_17['age'] = '40-49'
healthDaysW_17['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_17['education'] = 'No higher education'

# Women, age 40-49, Non-western immigrants and descendants, Higher education
healthDaysW_18 = pd.concat(file_list_health_days_w[119:126], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_18.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_18['gender'] = 'Women'
healthDaysW_18['age'] = '40-49'
healthDaysW_18['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_18['education'] = 'Higher education'

# Women, age 50-59, Danish, No higher education
healthDaysW_19 = pd.concat(file_list_health_days_w[126:133], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_19.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_19['gender'] = 'Women'
healthDaysW_19['age'] = '50-59'
healthDaysW_19['ethnicity'] = 'Danish'
healthDaysW_19['education'] = 'No higher education'

# Women, age 50-59, Danish, Higher education
healthDaysW_20 = pd.concat(file_list_health_days_w[133:140], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_20.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_20['gender'] = 'Women'
healthDaysW_20['age'] = '50-59'
healthDaysW_20['ethnicity'] = 'Danish'
healthDaysW_20['education'] = 'Higher education'

# Women, age 50-59, Immigrants and descendants, No higher education
healthDaysW_21 = pd.concat(file_list_health_days_w[140:147], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_21.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_21['gender'] = 'Women'
healthDaysW_21['age'] = '50-59'
healthDaysW_21['ethnicity'] = 'Immigrants and descendants'
healthDaysW_21['education'] = 'No higher education'

# Women, age 50-59, Immigrants and descendants, Higher education
healthDaysW_22 = pd.concat(file_list_health_days_w[147:154], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_22.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_22['gender'] = 'Women'
healthDaysW_22['age'] = '50-59'
healthDaysW_22['ethnicity'] = 'Immigrants and descendants'
healthDaysW_22['education'] = 'Higher education'

# Women, age 50-59, Non-western immigrants and descendants, No higher education
healthDaysW_23 = pd.concat(file_list_health_days_w[154:161], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_23.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_23['gender'] = 'Women'
healthDaysW_23['age'] = '50-59'
healthDaysW_23['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_23['education'] = 'No higher education'

# Women, age 50-59, Non-western immigrants and descendants, Higher education
healthDaysW_24 = pd.concat(file_list_health_days_w[161:168], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_24.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_24['gender'] = 'Women'
healthDaysW_24['age'] = '50-59'
healthDaysW_24['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_24['education'] = 'Higher education'

# Women, age 60-65, Danish, No higher education
healthDaysW_25 = pd.concat(file_list_health_days_w[168:175], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_25.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_25['gender'] = 'Women'
healthDaysW_25['age'] = '60-65'
healthDaysW_25['ethnicity'] = 'Danish'
healthDaysW_25['education'] = 'No higher education'

# Women, age 60-65, Danish, Higher education
healthDaysW_26 = pd.concat(file_list_health_days_w[175:182], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_26.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_26['gender'] = 'Women'
healthDaysW_26['age'] = '60-65'
healthDaysW_26['ethnicity'] = 'Danish'
healthDaysW_26['education'] = 'Higher education'

# Women, age 60-65, Immigrants and descendants, No higher education
healthDaysW_27 = pd.concat(file_list_health_days_w[182:189], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_27.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_27['gender'] = 'Women'
healthDaysW_27['age'] = '60-65'
healthDaysW_27['ethnicity'] = 'Immigrants and descendants'
healthDaysW_27['education'] = 'No higher education'

# Women, age 60-65, Immigrants and descendants, Higher education
healthDaysW_28 = pd.concat(file_list_health_days_w[189:196], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_28.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_28['gender'] = 'Women'
healthDaysW_28['age'] = '60-65'
healthDaysW_28['ethnicity'] = 'Immigrants and descendants'
healthDaysW_28['education'] = 'Higher education'

# Women, age 60-65, Non-western immigrants and descendants, No higher education
healthDaysW_29 = pd.concat(file_list_health_days_w[196:203], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_29.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_29['gender'] = 'Women'
healthDaysW_29['age'] = '60-65'
healthDaysW_29['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_29['education'] = 'No higher education'

# Women, age 60-65, Non-western immigrants and descendants, Higher education
healthDaysW_30 = pd.concat(file_list_health_days_w[203:210], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysW_30.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysW_30['gender'] = 'Women'
healthDaysW_30['age'] = '60-65'
healthDaysW_30['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysW_30['education'] = 'Higher education'

In [4]:
health_days_women = pd.concat([healthDaysW_1, healthDaysW_2, healthDaysW_3, healthDaysW_4, healthDaysW_5, healthDaysW_6, healthDaysW_7, healthDaysW_8, healthDaysW_9, healthDaysW_10, healthDaysW_11, healthDaysW_12, healthDaysW_13, healthDaysW_14, healthDaysW_15, healthDaysW_16, healthDaysW_17, healthDaysW_18, healthDaysW_19, healthDaysW_20, healthDaysW_21, healthDaysW_22, healthDaysW_23, healthDaysW_24, healthDaysW_25, healthDaysW_26, healthDaysW_27, healthDaysW_28, healthDaysW_29, healthDaysW_30], axis=0, ignore_index=True)

## Health data for Days in Hospital, men

* prep in chunks of seven files. Seven because the next seven files contain the exact same categories that must be added, but are just files for different municipalities.
* Remove duplicate rows and keep first, as each file contains a row for municipality_code = 0, which is Total Denmark Average. As we only need this row one time, we remove the duplicates
* Create new columns depending on the file chunk.

In [5]:
# Input all csv files at once
path = "Health_DaysInHospital_Men/"
all_files_health_days_m = glob.glob(path + "/*.csv")
# sort all files by date modified
all_files_health_days_m.sort(key=os.path.getmtime)

li_mapper_health_days_m = map(lambda filename: pd.read_csv(filename, index_col=None, header=0), all_files_health_days_m)
file_list_health_days_m = list(li_mapper_health_days_m)

In [6]:
# Men, age 18-29, Danish, No higher education
healthDaysM_1 = pd.concat(file_list_health_days_m[0:7], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_1.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_1['gender'] = 'Men'
healthDaysM_1['age'] = '18-29'
healthDaysM_1['ethnicity'] = 'Danish'
healthDaysM_1['education'] = 'No higher education'

# Men, age 18-29, Danish, Higher education
healthDaysM_2 = pd.concat(file_list_health_days_m[7:14], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_2.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_2['gender'] = 'Men'
healthDaysM_2['age'] = '18-29'
healthDaysM_2['ethnicity'] = 'Danish'
healthDaysM_2['education'] = 'Higher education'

# Men, age 18-29, Immigrants and descendants, No higher education
healthDaysM_3 = pd.concat(file_list_health_days_m[14:21], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_3.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_3['gender'] = 'Men'
healthDaysM_3['age'] = '18-29'
healthDaysM_3['ethnicity'] = 'Immigrants and descendants'
healthDaysM_3['education'] = 'No higher education'

# Men, age 18-29, Immigrants and descendants, Higher education
healthDaysM_4 = pd.concat(file_list_health_days_m[21:28], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_4.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_4['gender'] = 'Men'
healthDaysM_4['age'] = '18-29'
healthDaysM_4['ethnicity'] = 'Immigrants and descendants'
healthDaysM_4['education'] = 'Higher education'

# Men, age 18-29, Non-western immigrants and descendants, No higher education
healthDaysM_5 = pd.concat(file_list_health_days_m[28:35], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_5.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_5['gender'] = 'Men'
healthDaysM_5['age'] = '18-29'
healthDaysM_5['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_5['education'] = 'No higher education'

# Men, age 18-29, Non-western immigrants and descendants, Higher education
healthDaysM_6 = pd.concat(file_list_health_days_m[35:42], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_6.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_6['gender'] = 'Men'
healthDaysM_6['age'] = '18-29'
healthDaysM_6['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_6['education'] = 'Higher education'

# Men, age 30-39, Danish, No higher education
healthDaysM_7 = pd.concat(file_list_health_days_m[42:49], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_7.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_7['gender'] = 'Men'
healthDaysM_7['age'] = '30-39'
healthDaysM_7['ethnicity'] = 'Danish'
healthDaysM_7['education'] = 'No higher education'

# Men, age 30-39, Danish, Higher education
healthDaysM_8 = pd.concat(file_list_health_days_m[49:56], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_8.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_8['gender'] = 'Men'
healthDaysM_8['age'] = '30-39'
healthDaysM_8['ethnicity'] = 'Danish'
healthDaysM_8['education'] = 'Higher education'

# Men, age 30-39, Immigrants and descendants, No higher education
healthDaysM_9 = pd.concat(file_list_health_days_m[56:63], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_9.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_9['gender'] = 'Men'
healthDaysM_9['age'] = '30-39'
healthDaysM_9['ethnicity'] = 'Immigrants and descendants'
healthDaysM_9['education'] = 'No higher education'

# Men, age 30-39, Immigrants and descendants, Higher education
healthDaysM_10 = pd.concat(file_list_health_days_m[63:70], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_10.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_10['gender'] = 'Men'
healthDaysM_10['age'] = '30-39'
healthDaysM_10['ethnicity'] = 'Immigrants and descendants'
healthDaysM_10['education'] = 'Higher education'

# Men, age 30-39, Non-western immigrants and descendants, No higher education
healthDaysM_11 = pd.concat(file_list_health_days_m[70:77], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_11.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_11['gender'] = 'Men'
healthDaysM_11['age'] = '30-39'
healthDaysM_11['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_11['education'] = 'No higher education'

# Men, age 30-39, Immigrants and descendants, Higher education
healthDaysM_12 = pd.concat(file_list_health_days_m[77:84], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_12.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_12['gender'] = 'Men'
healthDaysM_12['age'] = '30-39'
healthDaysM_12['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_12['education'] = 'Higher education'

# Men, age 40-49, Danish, No higher education
healthDaysM_13 = pd.concat(file_list_health_days_m[84:91], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_13.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_13['gender'] = 'Men'
healthDaysM_13['age'] = '40-49'
healthDaysM_13['ethnicity'] = 'Danish'
healthDaysM_13['education'] = 'No higher education'

# Men, age 40-49, Danish, Higher education
healthDaysM_14 = pd.concat(file_list_health_days_m[91:98], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_14.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_14['gender'] = 'Men'
healthDaysM_14['age'] = '40-49'
healthDaysM_14['ethnicity'] = 'Danish'
healthDaysM_14['education'] = 'Higher education'

# Men, age 40-49, Immigrants and descendants, No higher education
healthDaysM_15 = pd.concat(file_list_health_days_m[98:105], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_15.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_15['gender'] = 'Men'
healthDaysM_15['age'] = '40-49'
healthDaysM_15['ethnicity'] = 'Immigrants and descendants'
healthDaysM_15['education'] = 'No higher education'

# Men, age 40-49, Immigrants and descendants, Higher education
healthDaysM_16 = pd.concat(file_list_health_days_m[105:112], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_16.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_16['gender'] = 'Men'
healthDaysM_16['age'] = '40-49'
healthDaysM_16['ethnicity'] = 'Immigrants and descendants'
healthDaysM_16['education'] = 'Higher education'

# Men, age 40-49, Non-western immigrants and descendants, No higher education
healthDaysM_17 = pd.concat(file_list_health_days_m[112:119], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_17.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_17['gender'] = 'Men'
healthDaysM_17['age'] = '40-49'
healthDaysM_17['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_17['education'] = 'No higher education'

# Men, age 40-49, Non-western immigrants and descendants, Higher education
healthDaysM_18 = pd.concat(file_list_health_days_m[119:126], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_18.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_18['gender'] = 'Men'
healthDaysM_18['age'] = '40-49'
healthDaysM_18['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_18['education'] = 'Higher education'

# Men, age 50-59, Danish, No higher education
healthDaysM_19 = pd.concat(file_list_health_days_m[126:133], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_19.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_19['gender'] = 'Men'
healthDaysM_19['age'] = '50-59'
healthDaysM_19['ethnicity'] = 'Danish'
healthDaysM_19['education'] = 'No higher education'

# Men, age 50-59, Danish, Higher education
healthDaysM_20 = pd.concat(file_list_health_days_m[133:140], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_20.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_20['gender'] = 'Men'
healthDaysM_20['age'] = '50-59'
healthDaysM_20['ethnicity'] = 'Danish'
healthDaysM_20['education'] = 'Higher education'

# Men, age 50-59, Immigrants and descendants, No higher education
healthDaysM_21 = pd.concat(file_list_health_days_m[140:147], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_21.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_21['gender'] = 'Men'
healthDaysM_21['age'] = '50-59'
healthDaysM_21['ethnicity'] = 'Immigrants and descendants'
healthDaysM_21['education'] = 'No higher education'

# Men, age 50-59, Immigrants and descendants, Higher education
healthDaysM_22 = pd.concat(file_list_health_days_m[147:154], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_22.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_22['gender'] = 'Men'
healthDaysM_22['age'] = '50-59'
healthDaysM_22['ethnicity'] = 'Immigrants and descendants'
healthDaysM_22['education'] = 'Higher education'

# Men, age 50-59, Non-western immigrants and descendants, No higher education
healthDaysM_23 = pd.concat(file_list_health_days_m[154:161], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_23.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_23['gender'] = 'Men'
healthDaysM_23['age'] = '50-59'
healthDaysM_23['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_23['education'] = 'No higher education'

# Men, age 50-59, Non-western immigrants and descendants, Higher education
healthDaysM_24 = pd.concat(file_list_health_days_m[161:168], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_24.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_24['gender'] = 'Men'
healthDaysM_24['age'] = '50-59'
healthDaysM_24['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_24['education'] = 'Higher education'

# Men, age 60-65, Danish, No higher education
healthDaysM_25 = pd.concat(file_list_health_days_m[168:175], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_25.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_25['gender'] = 'Men'
healthDaysM_25['age'] = '60-65'
healthDaysM_25['ethnicity'] = 'Danish'
healthDaysM_25['education'] = 'No higher education'

# Men, age 60-65, Danish, Higher education
healthDaysM_26 = pd.concat(file_list_health_days_m[175:182], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_26.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_26['gender'] = 'Men'
healthDaysM_26['age'] = '60-65'
healthDaysM_26['ethnicity'] = 'Danish'
healthDaysM_26['education'] = 'Higher education'

# Men, age 60-65, Immigrants and descendants, No higher education
healthDaysM_27 = pd.concat(file_list_health_days_m[182:189], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_27.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_27['gender'] = 'Men'
healthDaysM_27['age'] = '60-65'
healthDaysM_27['ethnicity'] = 'Immigrants and descendants'
healthDaysM_27['education'] = 'No higher education'

# Men, age 60-65, Immigrants and descendants, Higher education
healthDaysM_28 = pd.concat(file_list_health_days_m[189:196], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_28.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_28['gender'] = 'Men'
healthDaysM_28['age'] = '60-65'
healthDaysM_28['ethnicity'] = 'Immigrants and descendants'
healthDaysM_28['education'] = 'Higher education'

# Men, age 60-65, Non-western immigrants and descendants, No higher education
healthDaysM_29 = pd.concat(file_list_health_days_m[196:203], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_29.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_29['gender'] = 'Men'
healthDaysM_29['age'] = '60-65'
healthDaysM_29['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_29['education'] = 'No higher education'

# Men, age 60-65, Non-western immigrants and descendants, Higher education
healthDaysM_30 = pd.concat(file_list_health_days_m[203:210], axis=0, ignore_index=True)
# Remove duplicate rows
healthDaysM_30.drop_duplicates(keep='first', inplace=True)
# Create new columns
healthDaysM_30['gender'] = 'Men'
healthDaysM_30['age'] = '60-65'
healthDaysM_30['ethnicity'] = 'Non-western immigrants and descendants'
healthDaysM_30['education'] = 'Higher education'

In [7]:
health_days_men = pd.concat([healthDaysM_1, healthDaysM_2, healthDaysM_3, healthDaysM_4, healthDaysM_5, healthDaysM_6, healthDaysM_7, healthDaysM_8, healthDaysM_9, healthDaysM_10, healthDaysM_11, healthDaysM_12, healthDaysM_13, healthDaysM_14, healthDaysM_15, healthDaysM_16, healthDaysM_17, healthDaysM_18, healthDaysM_19, healthDaysM_20, healthDaysM_21, healthDaysM_22, healthDaysM_23, healthDaysM_24, healthDaysM_25, healthDaysM_26, healthDaysM_27, healthDaysM_28, healthDaysM_29, healthDaysM_30], axis=0, ignore_index=True)

## Prep days in hospital data frame

* Concatenate
* Change column names
* Sort 
* Rearrange columns
* Rearrange index
* Save to csv

In [8]:
health_days = pd.concat([health_days_women, health_days_men], axis=0, ignore_index=True)

# change column names
health_days.rename(columns={'vaerdi': 'days in hospital', 'kommunekode': 'municipality code', 
                       'antal observationer': 'days in hospital observations', 'aar': 'year'}, inplace=True)

In [9]:
# Add Copenhagen municipality code

# remove municipality code 0 for denmark average
cop = health_days[health_days['municipality code'] != 0]

# calculate sum of days in hospital observations for all municipalities grouped by other parameters
cop_obs_sum = cop.groupby(['year', 'gender', 'age', 'ethnicity', 'education'])['days in hospital observations'].sum().reset_index().rename(columns={'days in hospital observations': 'hosp_obs_sum'})
cop = pd.merge(cop, cop_obs_sum, on=['year', 'gender', 'age', 'ethnicity', 'education'], how='outer')

# calculate weighted observations and weighted days in hospital
cop['weighted_obs'] = cop['days in hospital observations']/cop['hosp_obs_sum']
cop['weighted_hosp'] = cop['days in hospital']*cop['weighted_obs']

# calculate average days in hospital for copenhagen
cop_hosp_sum = cop.groupby(['year', 'gender', 'age', 'ethnicity', 'education'])['weighted_hosp'].sum().reset_index().rename(columns={'weighted_hosp': 'cop_hosp_avg'})
cop = pd.merge(cop, cop_hosp_sum, on=['year', 'gender', 'age', 'ethnicity', 'education'], how='outer')

# convert copenhagen column to a single row grouped by other parameters
cop = cop.groupby(['year', 'gender', 'age', 'ethnicity', 'education']).mean()
# choose only the columns we need
cop = cop[['cop_hosp_avg', 'hosp_obs_sum']]
# rename the columns
cop = cop.rename(columns={'cop_hosp_avg': 'days in hospital', 'hosp_obs_sum': 'days in hospital observations'})
cop['municipality code'] = '1'
cop.reset_index(inplace=True)

# add the copenhagen row to the health_days dataframe
health_days = pd.concat([health_days, cop], ignore_index=True)

In [10]:
# calculate weighted observations and weighted income for all municipalities by year
mun_hosp_obs_sum = health_days.groupby(['municipality code', 'year'])['days in hospital observations'].sum().reset_index().rename(columns={'days in hospital observations': 'hosp_obs_mun_sum'})
health_days = pd.merge(health_days, mun_hosp_obs_sum, on=['municipality code', 'year'], how='outer')

# calculate weighted observations and weighted income
health_days['weighted_obs_mun'] = health_days['days in hospital observations']/health_days['hosp_obs_mun_sum']
health_days['weighted_hosp_days_mun'] = health_days['days in hospital']*health_days['weighted_obs_mun']

In [11]:
# calculate weighted observations and weighted income for all municipalities by year and gender
gen_mun_days_obs_sum = health_days.groupby(['municipality code', 'year', 'gender'])['days in hospital observations'].sum().reset_index().rename(columns={'days in hospital observations': 'days_obs_gen_mun_sum'})
health_days = pd.merge(health_days, gen_mun_days_obs_sum, on=['municipality code', 'year', 'gender'], how='outer')

# calculate weighted observations and weighted income
health_days['weighted_obs_gen_mun'] = health_days['days in hospital observations']/health_days['days_obs_gen_mun_sum']
health_days['weighted_days_gen_mun'] = health_days['days in hospital']*health_days['weighted_obs_gen_mun']

# calculate weighted observations and weighted income for all municipalities by year and age
age_mun_days_obs_sum = health_days.groupby(['municipality code', 'year', 'age'])['days in hospital observations'].sum().reset_index().rename(columns={'days in hospital observations': 'days_obs_age_mun_sum'})
health_days = pd.merge(health_days, age_mun_days_obs_sum, on=['municipality code', 'year', 'age'], how='outer')

# calculate weighted observations and weighted income
health_days['weighted_obs_age_mun'] = health_days['days in hospital observations']/health_days['days_obs_age_mun_sum']
health_days['weighted_days_age_mun'] = health_days['days in hospital']*health_days['weighted_obs_age_mun']

# calculate weighted observations and weighted income for all municipalities by year and ethnicity
eth_mun_days_obs_sum = health_days.groupby(['municipality code', 'year', 'ethnicity'])['days in hospital observations'].sum().reset_index().rename(columns={'days in hospital observations': 'days_obs_eth_mun_sum'})
health_days = pd.merge(health_days, eth_mun_days_obs_sum, on=['municipality code', 'year', 'ethnicity'], how='outer')

# calculate weighted observations and weighted income
health_days['weighted_obs_eth_mun'] = health_days['days in hospital observations']/health_days['days_obs_eth_mun_sum']
health_days['weighted_days_eth_mun'] = health_days['days in hospital']*health_days['weighted_obs_eth_mun']

# calculate weighted observations and weighted income for all municipalities by year and education
edu_mun_days_obs_sum = health_days.groupby(['municipality code', 'year', 'education'])['days in hospital observations'].sum().reset_index().rename(columns={'days in hospital observations': 'days_obs_edu_mun_sum'})
health_days = pd.merge(health_days, edu_mun_days_obs_sum, on=['municipality code', 'year', 'education'], how='outer')

# calculate weighted observations and weighted income
health_days['weighted_obs_edu_mun'] = health_days['days in hospital observations']/health_days['days_obs_edu_mun_sum']
health_days['weighted_days_edu_mun'] = health_days['days in hospital']*health_days['weighted_obs_edu_mun']

In [12]:
# sort values
health_days.sort_values(by=['municipality code', 'year', 'gender', 'age', 'ethnicity', 'education'], inplace=True)

# order columns
cols = ['municipality code', 'year', 'gender', 'age', 'ethnicity', 'education', 'days in hospital', 'days in hospital observations', 'weighted_hosp_days_mun', 'weighted_days_gen_mun', 'weighted_days_age_mun', 'weighted_days_eth_mun', 'weighted_days_edu_mun']
health_days = health_days[cols]

# start index at 1
health_days.index = np.arange(1, len(health_days) + 1)

# save to csv
health_days.to_csv('health_days.csv', index=False)