In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.preprocessing import Normalizer
import urllib.request
import warnings 
warnings.simplefilter('ignore')
import plotly.express as px
%matplotlib inline
import os

In [2]:
# Display format
pd.set_option('display.max_columns', None)
#pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [3]:
DATA_DIR = r'/Users/yotamdery/Old_Desktop/git/SEIR_model_COVID-main/Data'

## Accessing the API - ״מתחסנים לפי ישוב״

In [4]:
url = 'https://data.gov.il/api/3/action/datastore_search?resource_id=12c9045c-1bf4-478a-a9e1-1e876cc2e182&limit=1000000'
fileobj = urllib.request.urlopen(url)
df = pd.read_json(fileobj)
data = df.loc['records']['result']

In [5]:
# Creating a DataFrame out of the API results:
vaccinations_per_town = pd.DataFrame(data)
vaccinations_per_town = vaccinations_per_town.loc[vaccinations_per_town['Date'] <= '2021-10-25']

* Functions

In [6]:
# Reading the sick file
def reading_sick_file(df):
    def filling_missing_values(temp_df):     # Function of interpolation to use
        temp_df = temp_df.copy()
        temp_df.iloc[0] = temp_df.iloc[0].fillna(0.0)      # Filling with zeros only the NaN values
        for col in temp_df.columns[2:]:
            temp_df[col] = temp_df[col].astype(float)     # Converting types
            # Creating random array
            arr = np.random.randint(low= 0, high= 15, size= sum(temp_df[col].isnull()))
            arr = np.sort(arr)
            # Getting the indexes of the rows of the column where the value is 0
            idx = temp_df[temp_df.loc[ : , col].isna()][col].index
            # Filling the NaN values in the correct places with the array
            temp_df.loc[idx, col] = arr
        return temp_df

    df = df.iloc[ : , np.r_[2:4 , 13:31]]          # Choosing relevant columns
    #df = df.iloc[ : , np.r_[2:4 , 22:31]]          # Choosing relevant columns
    sick_filtered = df[~(df.iloc[:, 3:] == '0.0').all(axis=1)]       # Selecting the relevant dates (where not all of the row is zero)
    sick_filtered.replace(to_replace= '<15', value= np.nan, inplace= True)
    #sick_filtered.replace(to_replace= '0.0', value= np.nan, inplace= True)
    sick_filtered = sick_filtered.groupby(by= 'CityCode', as_index= False).apply(filling_missing_values)
    sick_filtered.reset_index(drop= True, inplace= True)
    sick_filtered['CityCode'] = sick_filtered['CityCode'].astype(int)

    return sick_filtered

In [7]:
# Importing and cleaning of Taz_Yeshuv_250_mapping
def reading_town_code_to_taz_mapper():
    town_code_to_taz_mapper =  pd.read_excel(r'/Users/yotamdery/Old_Desktop/git/health_data_mapping/Taz_Yeshuv_250_mapping.xlsx',engine='openpyxl', usecols= 'A, E, P')
    town_code_to_taz_mapper.columns = ['taz_id','town_code','population']
    town_code_to_taz_mapper = town_code_to_taz_mapper.iloc[4:2633]
    town_code_to_taz_mapper = town_code_to_taz_mapper[~town_code_to_taz_mapper['town_code'].isna()]
    town_code_to_taz_mapper['town_code'] = town_code_to_taz_mapper['town_code'].astype(int)
    
    return town_code_to_taz_mapper

In [8]:
def normalized_pop_per_taz(df):
    def normalized_inner(temp):
        population_sum = np.sum(temp['population'])               # Getting the sum of the column to normalize accordingly
        temp['population'] = temp['population'] / population_sum   # performing the normalization
        temp = temp.loc[temp['population'] != 0.0]
        return temp
    
    df = df.groupby(by= 'town_code').apply(normalized_inner)
    df['town_code'] = df['town_code'].astype(int)
    df.reset_index(drop= True, inplace= True)
    return df

In [9]:
## filtering the df
# def filtering_rows(df):
#     df_exclude_90plus = df.copy().iloc[: , :-1]    # a DF without the first_dose_90+ column
#     sick_filtered = df[((df_exclude_90plus != '<15') & (df_exclude_90plus != '0.0')).all(axis= 1)]
#     sick_filtered['first_dose_90+'] = sick_filtered['first_dose_90+'].replace(to_replace= '<15', value= '0')  # in cases that a lot of values are '<15'. we sum this column anyway
#     sick_filtered.sort_values(by= ['CityCode','Date'], inplace= True)
#     return sick_filtered

In [10]:
# Type convertion of fields
# def types_convertion(df):
#     df['Date'] = pd.to_datetime(df['Date'])
#     for c in df.columns[1:] :
#         if c not in ['Date']:    # Have to convert to float first because of the original format...
#             df[c] = df[c].astype(float)
#             # Converting all columns but the date column
#             df[c] = df[c].astype(int)
#     return df

In [11]:
# Merging 80-89 and 90+ age groups:
def merge_columns_to_70plus(df):
    df['second_dose_70+'] = df['second_dose_70-79'] + df['second_dose_80-89'] + df['second_dose_90+']
    df['third_dose_70+'] = df['third_dose_70-79'] + df['third_dose_80-89'] + df['third_dose_90+']
    df.drop(columns= ['second_dose_70-79', 'second_dose_80-89', 'second_dose_90+',
                     'third_dose_70-79', 'third_dose_80-89', 'third_dose_90+'], axis= 1, inplace= True)
    reordered_df = df.iloc[ : , np.r_[0:8, 14, 8:14, 15]]
    return reordered_df

<b> spreading the vaccinated individuals over the Tazs:

In [12]:
### multiplying by the population ratio:
def multiply_by_pop_ratio(df):
    for column in df.columns[2:16]:
        df[column + "_multiplied"] = df[column] * df['population']
    df = df.iloc[ : , np.r_[0:2, 19:33 , 16]]
    return df   

In [13]:
# A mapper - taz for 250 regions 
def reading_taz_250_mapping():
    taz_250_mapping = pd.read_excel(r'/Users/yotamdery/Old_Desktop/git/health_data_mapping/Taz_Yeshuv_250_mapping.xlsx',engine='openpyxl', usecols= "A,P,AH")
    taz_250_mapping = taz_250_mapping.rename(columns= {'Unnamed: 0' : 'taz_id', 'Unnamed: 15' : 'population', 'Unnamed: 33' : '250_regions'})
    taz_250_mapping = taz_250_mapping.loc[3:2633]
    return taz_250_mapping

In [14]:
# Mappers - Total population for 250 regions and Taz for 250_regions
def create_taz_250_mapper():
    taz_to_250_mapper = reading_taz_250_mapping()[['taz_id','250_regions']]
    #pop_for_250_mapper = reading_taz_250_mapping()[['population','250_regions']]
    #pop_for_250_mapper = pop_for_250_mapper.groupby('250_regions', as_index= False).sum()
    return taz_to_250_mapper #, pop_for_250_mapper


In [15]:
def reading_250_30_mapping():
    mapping_30_250 = pd.read_excel(r'/Users/yotamdery/Old_Desktop/git/SEIR_model_COVID-main/Data/division_choice/30/cell250_to_cell30.xlsx', engine='openpyxl')
    return mapping_30_250

In [16]:
# Receives the recent DF and a mapper from 250 to 30
def aggregate_to_30_level(vaccinated_250, mapping_30_250):
    vaccinated_30_merged = vaccinated_250.merge(mapping_30_250, left_on= '250_regions', right_on= 'cell_id')
    vaccinated_30 = vaccinated_30_merged.groupby(by= ['30_county','Date'], as_index= False) \
                                                [vaccinated_30_merged.columns[2:16]].sum()      # Aggregate health data to county level
    # Ensure that there are no duplicated entries:
    vaccinated_30_merged_no_duplicates = vaccinated_30[~vaccinated_30.duplicated(
                                            subset= ['30_county','Date'])]
    # Aggregate the *population* to county level:
    #pop_30_mapper = vaccinated_30_merged_no_duplicates.groupby('30_county', as_index= False)['population'].sum()
    #vaccinated_30_with_pop = vaccinated_30_no_pop.merge(pop_30_mapper, on= '30_county')
    vaccinated_30_merged_no_duplicates['30_county'] = vaccinated_30_merged_no_duplicates['30_county'].astype(str)
    return vaccinated_30_merged_no_duplicates

In [17]:
# Replacing the 'multiplied' ending with 'accumulated' for each dose column:
def replacing_multiplied_ending(df):
    new_columns_names = []
    for column in df.columns:
        splitted_column = column.split('_')
        if 'multiplied' not in splitted_column:
            new_columns_names.append('_'.join(splitted_column))
        else:
            splitted_column.pop()
            splitted_column.append('acc')
            new_columns_names.append('_'.join(splitted_column))
    df.columns = new_columns_names
    return df

In [18]:
# Calculating the absolute values of vaccinations - creating new columns for it (with 'absolut' prefix)
def calculate_difference_vaccinations(df):
    df_copy = df.copy()
    # If it is the first row, take it as it is, don't apply the difference function
    first_row = df_copy.iloc[0].to_frame().T
    # Creating a DF of one row with the same columns as the big DF
    first_row['absolute_' + first_row.columns[9:16]] = first_row[first_row.columns[9:16]]

    # Applying the difference function on the 3rd dose columns:
    df_copy['absolute_' + df_copy.columns[9:16]] = df_copy[df_copy.columns[9:16]].diff(periods= 1)
    
    # Removing the first row (the start of documentation date)
    whole_df_no_null = df_copy[~df_copy[df_copy.columns[-1]].isna()]
    # Concatenating to get the whole df
    final_df = pd.concat([first_row, whole_df_no_null], axis= 0)

    return final_df

In [19]:
def removing_acc_ending(df):
    new_columns_names = []
    for column in df.columns:
        splitted_column = column.split('_')
        if 'absolute' not in splitted_column:
            new_columns_names.append('_'.join(splitted_column))
        else:
            splitted_column.pop()
            new_columns_names.append('_'.join(splitted_column))
    df.columns = new_columns_names
    return df

In [20]:
# Main function - Receiving only the health df
def main(df):
    df = reading_sick_file(df)
    #sick_filtered = filtering_rows(df)     # Removing categorical values (rows with '0' and '<15')   
    #df_types_converted = types_convertion(sick_filtered)          # Converting the types
    df = merge_columns_to_70plus(df)       # Merging 80-89 and 90+ age groups
    
    # Sanity check:
    print("Total number of vaccinations per age:")
    print (sum((df[df['Date'] == df['Date'].max()][df.columns[9:]].sum(axis= 1))))

    ### Merging the health file with taz to yeshuv mapping - To get the TAZ for each town code:
    town_code_to_taz_mapper = reading_town_code_to_taz_mapper()                 # Reading the Taz_to_yeshuv file
    town_code_to_taz_mapper = normalized_pop_per_taz(town_code_to_taz_mapper)   # Normalizing the population column    
    vaccination_taz_merged = df.merge(town_code_to_taz_mapper, how= 'inner', left_on= 'CityCode', right_on= 'town_code')
    #return vaccination_taz_merged
    
    ## Spreading over the Tazs:
    normed_vaccinated = multiply_by_pop_ratio(vaccination_taz_merged)     # Multiplying each column by the population ratio
    #return normed_vaccinated

    grouped_vaccinated_and_mapping = normed_vaccinated.groupby(by= ['taz_id','Date']).sum().reset_index()  # grouping by taz and date
    grouped_vaccinated_and_mapping.drop("CityCode", axis= 1, inplace= True)    # Not relevant anymore
    #return(grouped_vaccinated_and_mapping)

    # Sanity check:
    print("Total number of vaccinations per age:")
    print (sum((grouped_vaccinated_and_mapping[grouped_vaccinated_and_mapping['Date'] == 
                                               grouped_vaccinated_and_mapping['Date'].max()]
                                              [grouped_vaccinated_and_mapping.columns[9:]].sum(axis= 1))))
    taz_250_mapping = create_taz_250_mapper()      # Mappers - Total population for 250 regions and Taz for 250_regions
    
    vaccinated_250_merged = grouped_vaccinated_and_mapping.merge(taz_250_mapping)
    vaccinated_250 = vaccinated_250_merged.groupby(by= ['250_regions', 'Date'], 
                                                   as_index= False).sum()    # Aggregating to 250 regions level
    #vaccinated_with_pop_250 = vaccinated_250.merge(pop_for_250_mapper)    # envolving the population for each region
    
    # Sanity check:
    print("Total number of vaccinations per age:")
    print (sum((vaccinated_250[vaccinated_250['Date'] == vaccinated_250['Date'].max()][vaccinated_250.columns[9:16]].sum(axis= 1))))
    
    mapping_250_30 = reading_250_30_mapping()                               # reading the 250 to 30 counties file
    vaccinated_30 = aggregate_to_30_level(vaccinated_250, mapping_250_30)        # Aggregating the data to 30 counties level
    vaccinated_30 = replacing_multiplied_ending(vaccinated_30)      # Replacing the 'multiplied' ending with 'accumulated' for each dose column:
    absolute_vaccination_amount = vaccinated_30.groupby(by= ['30_county'], as_index= False) \
                                                                .apply(calculate_difference_vaccinations)    # Calculating the actual amounts of vaccinations. grouping by 250_regions to avoid negative values
    # Removing the _acc ending for the new columns (the absolute columns)
    final_df = removing_acc_ending(absolute_vaccination_amount)
    return final_df

In [21]:
# Taking only the relevant columns into account (the third dose columns)
df_third_dose = main(vaccinations_per_town.copy())
print("\nFinished creating third dose\n")

Total number of vaccinations per age:
4029987.0
Total number of vaccinations per age:
3893696.9999999995
Total number of vaccinations per age:
3893697.0000000005

Finished creating third dose



In [22]:
df_third_dose

Unnamed: 0,30_county,Date,second_dose_0-19_acc,second_dose_20-29_acc,second_dose_30-39_acc,second_dose_40-49_acc,second_dose_50-59_acc,second_dose_60-69_acc,second_dose_70+_acc,third_dose_0-19_acc,third_dose_20-29_acc,third_dose_30-39_acc,third_dose_40-49_acc,third_dose_50-59_acc,third_dose_60-69_acc,third_dose_70+_acc,absolute_third_dose_0-19,absolute_third_dose_20-29,absolute_third_dose_30-39,absolute_third_dose_40-49,absolute_third_dose_50-59,absolute_third_dose_60-69,absolute_third_dose_70+
0,11,2021-01-10,0.0,20.168174,46.098683,46.818975,73.742186,43.937808,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,11,2021-01-11,4.556757,84.51182,201.992205,246.987748,279.166557,565.866881,436.83615,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,11,2021-01-12,17.831148,190.424161,410.064066,465.908691,507.698916,1409.97622,1490.37653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,11,2021-01-13,31.473322,357.12472,684.573618,745.790013,794.05025,2481.920199,2872.946468,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,11,2021-01-14,42.395204,539.705118,912.105977,1026.86031,1118.287391,3786.369116,4465.685438,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16135,71_haredi,2022-06-27,11810.540887,9970.367315,8103.054028,6244.860286,3027.874343,1464.428554,1194.987532,3127.165628,5681.279061,4883.235057,4124.613006,2402.277472,1252.157682,1074.014301,0.0,0.0,0.0,0.162205,0.0,0.0,0.0
16136,71_haredi,2022-06-28,11810.540887,9970.367315,8103.054028,6244.860286,3027.874343,1465.428554,1194.987532,3129.327833,5681.279061,4883.235057,4124.775211,2402.277472,1252.157682,1074.014301,2.162205,0.0,0.0,0.162205,0.0,0.0,0.0
16137,71_haredi,2022-06-29,11810.703093,9970.367315,8103.054028,6244.860286,3027.874343,1465.428554,1194.987532,3129.327833,5682.279061,4883.235057,4124.775211,2402.277472,1252.157682,1074.014301,0.0,1.0,0.0,0.0,0.0,0.0,0.0
16138,71_haredi,2022-06-30,11810.703093,9970.367315,8105.054028,6245.860286,3028.874343,1465.428554,1194.987532,3129.327833,5683.279061,4884.235057,4124.775211,2402.277472,1252.157682,1074.014301,0.0,1.0,1.0,0.0,0.0,0.0,0.0


In [23]:
result = df_third_dose

<b> W'd like to be aligned with the age groups of the current model:

In [24]:
# Splitting the amounts of 'absolute_third_dose_0-19' column
groups_of_columns = [ ['second_dose_0-4_acc','second_dose_5-9_acc','second_dose_10-19_acc','second_dose_0-19_acc'],
                      ['third_dose_0-4_acc', 'third_dose_5-9_acc', 'third_dose_10-19_acc', 'third_dose_0-19_acc'],
                      ['absolute_third_dose_0-4', 'absolute_third_dose_5-9', 'absolute_third_dose_10-19', 'absolute_third_dose_0-19'] ]
for i in range(len(groups_of_columns)):
    result[groups_of_columns[i][0]] , result[groups_of_columns[i][1]], result[groups_of_columns[i][2]] = \
    result[groups_of_columns[i][3]] * 0.1, result[groups_of_columns[i][3]] * 0.15, result[groups_of_columns[i][3]] * 0.75

result.drop(columns= ['second_dose_0-19_acc', 'third_dose_0-19_acc', 'absolute_third_dose_0-19'],
            axis= 1, inplace= True)

In [25]:
# Re-ordering the DF
all_columns = result.columns.tolist()
second_dose_acc_ordered_columns = all_columns[20:23] + all_columns[2:8]
third_dose_acc_ordered_columns = all_columns[23:26] + all_columns[8:14]
third_dose_absolute_ordered_columns = all_columns[26:29] + all_columns[14:20]
new_ordered_columns = all_columns[0:2] + second_dose_acc_ordered_columns + third_dose_acc_ordered_columns + \
                                         third_dose_absolute_ordered_columns
result = result[new_ordered_columns]

# # Summing the 2 columns to get 'absolute_third_dose_70+' column
# result['absolute_third_dose_70+'] = result['absolute_third_dose_70-79'] + result['absolute_third_dose_80+']
# result.drop(columns= ['absolute_third_dose_70-79', 'absolute_third_dose_80+'], axis= 1, inplace= True)

<b> Adding the 'county_id' to the counties to be able to involve it in the SEIR model:

In [26]:
# Reading
county_string_to_id = pd.read_excel(r'/Users/yotamdery/Old_Desktop/git/SEIR_model_covid_yotams copy/Data/'
                                    r'division_choice/30/county_int_2name_county_string.xlsx', engine='openpyxl', usecols= "A,C")
county_string_to_id['county_string_id'] = county_string_to_id['county_string_id'].astype(str)

In [27]:
# Merging
result_with_county_id = result.merge(county_string_to_id, how= 'inner', left_on= '30_county',
                                     right_on= 'county_string_id').drop('county_string_id', axis= 1)

In [28]:
# Reordering
all_columns = result_with_county_id.columns.tolist()
new_ordered_columns = [all_columns[0]] + [all_columns[-1]] + [all_columns[1]] + all_columns[2:-1]
result_with_county_id = result_with_county_id[new_ordered_columns]

# Sorting
result_with_county_id = result_with_county_id.sort_values(by= ['Date', 'county_id']).reset_index(drop= True)

In [29]:
# Types conversion
for column in result_with_county_id.columns[3:]:
    result_with_county_id[column] = result_with_county_id[column].astype(float)

<b> Creating a proportion DF - each value will be the proportion of vaccinated individuals
    per county and age-group. The sample space will be the population for each county

Create a dictionary of {(county_id, county_string_id) : population}

In [30]:
# df = result_with_county_id[['county_id', 'county_string_id', 'population']]
# unique_df = df[~df.duplicated()].sort_values(by= 'county_id')
# dict_region_population = unique_df.set_index(['county_id', 'county_string_id']).T.to_dict('list')

Filling zeroes for counties that don't have vaccinated individuals in a specific date:

In [31]:
# def filling_zeros(df, **kwargs):
#     set_from_dict = set(dict_region_population.keys())       # Static set of the counties
#     current_set_from_df = set(zip(df['county_id'], df['county_string_id']))  # Set of the current counties (differs from each date)
#     set_difference = sorted(list(set_from_dict - current_set_from_df))    # getting the counties that don't exist in the current date
#
#     if set_difference == []:    # If all counties are there - exit the function, do nothing
#         return df
#
#     # Building the new df to concat - the non-exist counties df:
#     data = []      # empty list to end up as list of lists to create the df
#     current_date = df['Date'].iloc[0]
#     for element in set_difference:     # Iterating over the difference list - counties that dont exists in the specific date:
#         data.append([current_date, element[0], element[1], dict_region_population[element][0], 0, 0, 0, 0, 0, 0, 0, 0, 0])
#     current_df = pd.DataFrame(data, columns= df.columns)
#
#     final_df = pd.concat([df, current_df], axis= 0)     # Stacking the df one ontop of the other
#     return final_df

In [32]:
# # Applying the function above
# df_filled_zeros = result_with_county_id.groupby(by= ['Date'], as_index= False).apply(filling_zeros, args= dict_region_population).reset_index()
# df_filled_zeros = df_filled_zeros.drop(labels= ['level_0', 'level_1'], axis= 1).sort_values(by= ['Date', 'county_id'])

Reading the 'population_per_county_age-group' file to get the population for each intersection of county and age-group:

In [33]:
# population_per_county_age_group = pd.read_csv(r'/Users/yotamdery/Old_Desktop/git/SEIR_model_COVID-main/Data/division_choice/30/population_per_county_age-group.csv')

In [34]:
# df_filled_zeros.head()

In [35]:
# population_per_county_age_group.head()

* Merging it with the most updated data frame; merging by the county_id field:

In [36]:
# full_pop_df = df_filled_zeros.merge(population_per_county_age_group, how= 'inner', left_on= 'county_id',
#                                    right_on= 'county_id').drop(columns= ['population'], axis= 1)

* Calculate the eligible people for getting the 3rd vaccination:
* It is the total people who got the 2nd vaccine minus the total people who haven't been vaccinated with the 3rd vaccine,
* for each age group j in county k

In [37]:
age_groups_list = ['0-4', '5-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70+']
def calc_eligible_for_vaccine(df):
    temp = df.copy()
    # Initiating the temp_df with NULLs:
    for age_group in age_groups_list:
        temp['eligible_vaccine_' + age_group] = np.nan
    # Filling the NULLs with the computational value:
    for i in range(temp.shape[0]):
        if i == 0:      # update the first value with the people who have been vaccinated 2nd dose so far
            temp.iloc[i, 30:39] = temp.iloc[i, 3:12].to_list()
        else:           # calculate the eligible column according to the logic
            temp.iloc[i, 30:39] = (temp.iloc[i-1, 3:12].to_numpy()) - (temp.iloc[i-1, 12:21].to_numpy())
    return temp

In [38]:
# Applying the function above
eligible_df = result_with_county_id.groupby(by= ['county_id'], as_index= False).apply(calc_eligible_for_vaccine)

getting the proportion of vaccinated individuals: <br>
* The proportion is defined by: (absolute amount of 3rd dose / eligible for 3rd dose)

In [39]:
def get_proportion_of_vaccinated_people(df):
    temp = df.copy()
    # Creating proportion column for each age group
    for column in temp.columns[21:30]:
        column_list_without_beginning = column.split('_')[1:]
        column_name_without_beginning = '_'.join(column_list_without_beginning)
        age_group_ending = column.split('_').pop()
        # Performing the calc logic
        temp['prop_' + column_name_without_beginning] = (temp[column] / temp['eligible_vaccine_' + age_group_ending])
        # handeling null values received from calculating 0/0
        temp.fillna(value= 0, inplace= True)
    return temp

In [40]:
# Applying the function above
proportion_df = eligible_df.groupby(by= ['county_id'], as_index= False).apply(get_proportion_of_vaccinated_people)

In [41]:
proportion_df[(proportion_df.iloc[: , 3:] < 0).any(1)]['county_id'].unique()

array([], dtype=int64)

In [42]:
proportion_df[(proportion_df.iloc[: , 3:] < 0).any(1)]

Unnamed: 0,30_county,county_id,Date,second_dose_0-4_acc,second_dose_5-9_acc,second_dose_10-19_acc,second_dose_20-29_acc,second_dose_30-39_acc,second_dose_40-49_acc,second_dose_50-59_acc,second_dose_60-69_acc,second_dose_70+_acc,third_dose_0-4_acc,third_dose_5-9_acc,third_dose_10-19_acc,third_dose_20-29_acc,third_dose_30-39_acc,third_dose_40-49_acc,third_dose_50-59_acc,third_dose_60-69_acc,third_dose_70+_acc,absolute_third_dose_0-4,absolute_third_dose_5-9,absolute_third_dose_10-19,absolute_third_dose_20-29,absolute_third_dose_30-39,absolute_third_dose_40-49,absolute_third_dose_50-59,absolute_third_dose_60-69,absolute_third_dose_70+,eligible_vaccine_0-4,eligible_vaccine_5-9,eligible_vaccine_10-19,eligible_vaccine_20-29,eligible_vaccine_30-39,eligible_vaccine_40-49,eligible_vaccine_50-59,eligible_vaccine_60-69,eligible_vaccine_70+,prop_third_dose_0-4,prop_third_dose_5-9,prop_third_dose_10-19,prop_third_dose_20-29,prop_third_dose_30-39,prop_third_dose_40-49,prop_third_dose_50-59,prop_third_dose_60-69,prop_third_dose_70+


In [43]:
# Reordering the columns - moving the proportion columns to the start of the DF
all_columns = proportion_df.columns.tolist()
new_ordered_columns = all_columns[0:3] + all_columns[39:48] + all_columns[3:39]
proportion_df = proportion_df[new_ordered_columns]


In [44]:
proportion_df

Unnamed: 0,30_county,county_id,Date,prop_third_dose_0-4,prop_third_dose_5-9,prop_third_dose_10-19,prop_third_dose_20-29,prop_third_dose_30-39,prop_third_dose_40-49,prop_third_dose_50-59,prop_third_dose_60-69,prop_third_dose_70+,second_dose_0-4_acc,second_dose_5-9_acc,second_dose_10-19_acc,second_dose_20-29_acc,second_dose_30-39_acc,second_dose_40-49_acc,second_dose_50-59_acc,second_dose_60-69_acc,second_dose_70+_acc,third_dose_0-4_acc,third_dose_5-9_acc,third_dose_10-19_acc,third_dose_20-29_acc,third_dose_30-39_acc,third_dose_40-49_acc,third_dose_50-59_acc,third_dose_60-69_acc,third_dose_70+_acc,absolute_third_dose_0-4,absolute_third_dose_5-9,absolute_third_dose_10-19,absolute_third_dose_20-29,absolute_third_dose_30-39,absolute_third_dose_40-49,absolute_third_dose_50-59,absolute_third_dose_60-69,absolute_third_dose_70+,eligible_vaccine_0-4,eligible_vaccine_5-9,eligible_vaccine_10-19,eligible_vaccine_20-29,eligible_vaccine_30-39,eligible_vaccine_40-49,eligible_vaccine_50-59,eligible_vaccine_60-69,eligible_vaccine_70+
0,11,1100,2021-01-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,20.168174,46.098683,46.818975,73.742186,43.937808,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,20.168174,46.098683,46.818975,73.742186,43.937808,0.000000
1,11_haredi,1101,2021-01-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,7.831826,17.901317,18.181025,21.257814,17.062192,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,7.831826,17.901317,18.181025,21.257814,17.062192,0.000000
2,11_betshemesh,1103,2021-01-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,21,2100,2021-01-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,22_jewish,2200,2021-01-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16135,61,6100,2022-07-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4146.300000,6219.450000,31097.250000,48060.000000,48068.000000,44160.000000,41261.000000,41706.000000,45140.000000,1377.000000,2065.500000,10327.500000,31973.000000,33376.000000,34199.000000,34125.000000,37123.000000,42150.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2769.300000,4153.950000,20769.750000,16087.000000,14691.000000,9961.000000,7136.000000,4583.000000,2990.000000
16136,62_jewish,6200,2022-07-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4502.500000,6753.750000,33768.750000,55414.000000,50387.000000,43598.000000,39622.000000,36988.000000,36872.000000,1609.500000,2414.250000,12071.250000,38426.000000,37044.000000,34916.000000,33929.000000,33838.000000,34818.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2893.000000,4339.500000,21697.500000,16987.000000,13342.000000,8682.000000,5693.000000,3150.000000,2054.000000
16137,62_arab,6202,2022-07-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1127.500000,1691.250000,8456.250000,18100.000000,10727.000000,8398.000000,5386.000000,2947.000000,2030.000000,273.200000,409.800000,2049.000000,8541.000000,5360.000000,4556.000000,3382.000000,1969.000000,1417.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,854.300000,1281.450000,6407.250000,9559.000000,5367.000000,3842.000000,2004.000000,978.000000,613.000000
16138,71,7100,2022-07-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2329.200000,3493.800000,17469.000000,18902.000000,14728.000000,14274.000000,12140.000000,10655.000000,8058.000000,887.900000,1331.850000,6659.250000,14536.000000,11534.000000,12086.000000,10786.000000,9933.000000,7941.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1441.300000,2161.950000,10809.750000,4366.000000,3194.000000,2188.000000,1354.000000,722.000000,117.000000


In [45]:
proportion_df.to_excel(DATA_DIR + '/Vaccinations/Third_dose_vaccination_by_30area_age.xlsx', index= False)