### ERA5 Data Extraction for Census Blocks

In [None]:
#%pip install google
#%conda install google
#%pip install --upgrade google-api-python-client
#%pip install --upgrade ee
#%pip install earthengine-api --upgrade

In [29]:
import numpy as np
import re
import pandas as pd
import geopandas as gpd
import gdown
import ee
import google
import os
import julian
import datetime

In [None]:
ee.Authenticate()
ee.Initialize()

### Get ERA5 hourly data by Census places

In [None]:
era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
hourly2022 = era5.filterDate('2022-01-01', '2022-01-02').select('temperature_2m')

In [None]:
# AK Census places as feature collection
ak_places = ee.FeatureCollection('projects/geospatial-data-kpleung/assets/akplaces_2010')

In [None]:
# reducer 
def reduceByPlaces(image):
  return image.reduceRegions(collection = ak_places,
                             reducer = ee.Reducer.mean().setOutputs(['avg_air_temp']))

placesHourly2022 = hourly2022.map(reduceByPlaces).flatten()

In [None]:
## Run with caution: expected runtime: 45m

# task = ee.batch.Export.table.toDrive(**{
#   'collection': placesHourly2022,
#   'description': 'Places Average Temp by Hourly 2022',
#   'folder': 'EarthEngine',
#   'selectors': ['system:index','NAME', 'FIPS', 'avg_air_temp'], 
#   'fileFormat': 'CSV'
# })
# task.start()

In [None]:
# download the result from Google drive (later change to Github submodule)
url = "https://drive.google.com/file/d/1T1vRS8OU1S89sfaoDCynN9BxAKRlGRPO/view?usp=drive_link"
output_path = "../Data/"
gdown.download(url, output_path, quiet=False,fuzzy=True)

In [2]:
# reading in data all (download from drive and upload on jupyter)
temp_data = pd.read_csv("../Data/all_temp.csv")

In [3]:
temp_data

Unnamed: 0,FIPS,date,min_temp,mean_temp,max_temp
0,20700001001245,1.0,266.462904,269.285745,271.749924
1,20700002001001,1.0,266.558705,269.145364,271.637401
2,20700002001018,1.0,266.973592,269.518289,272.001239
3,20700001001251,1.0,266.462904,269.285745,271.749924
4,20700002001015,1.0,266.973592,269.518289,272.001239
...,...,...,...,...,...
4294220,22400001002001,365.0,252.581530,255.190441,257.350809
4294221,22400004001067,365.0,258.794727,262.188802,265.296953
4294222,22400004002040,365.0,259.166285,262.630677,265.949409
4294223,22400001001259,365.0,252.405769,255.446224,258.508935


In [4]:
#Replicating daily data 24 times to use as hourly
df = pd.DataFrame(np.repeat(temp_data, 24, axis = 0))
df.columns = temp_data.columns
df['FIPS'] = df['FIPS'].astype(np.int64)

In [5]:
# creating hours
df['hours'] = df.index%24

In [6]:
df.tail()

Unnamed: 0,FIPS,date,min_temp,mean_temp,max_temp,hours
103061395,22400001001097,365.0,249.371691,252.087242,254.672444,19
103061396,22400001001097,365.0,249.371691,252.087242,254.672444,20
103061397,22400001001097,365.0,249.371691,252.087242,254.672444,21
103061398,22400001001097,365.0,249.371691,252.087242,254.672444,22
103061399,22400001001097,365.0,249.371691,252.087242,254.672444,23


In [7]:
# converting date to timestamp
df['timestamp'] = pd.to_datetime(df['date'] - 1, unit = 'D') + pd.to_timedelta(df['hours'], unit = 'H')
df['timestamp'] = df['timestamp'].round('s')

In [15]:
# extract month from timestamp
df['Month'] = pd.DatetimeIndex(df['timestamp']).month

In [22]:
# drop redundant columns
df = df.drop(['date', 'hours'], axis = 1)

In [23]:
df

Unnamed: 0,FIPS,min_temp,mean_temp,max_temp,timestamp,Month
0,20700001001245,266.462904,269.285745,271.749924,1970-01-01 00:00:00,1
1,20700001001245,266.462904,269.285745,271.749924,1970-01-01 01:00:00,1
2,20700001001245,266.462904,269.285745,271.749924,1970-01-01 02:00:00,1
3,20700001001245,266.462904,269.285745,271.749924,1970-01-01 03:00:00,1
4,20700001001245,266.462904,269.285745,271.749924,1970-01-01 04:00:00,1
...,...,...,...,...,...,...
103061395,22400001001097,249.371691,252.087242,254.672444,1970-12-31 19:00:00,12
103061396,22400001001097,249.371691,252.087242,254.672444,1970-12-31 20:00:00,12
103061397,22400001001097,249.371691,252.087242,254.672444,1970-12-31 21:00:00,12
103061398,22400001001097,249.371691,252.087242,254.672444,1970-12-31 22:00:00,12


In [24]:
# setting timestamp as index
df.set_index('timestamp', inplace = True)

In [25]:
df

Unnamed: 0_level_0,FIPS,min_temp,mean_temp,max_temp,Month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1970-01-01 00:00:00,20700001001245,266.462904,269.285745,271.749924,1
1970-01-01 01:00:00,20700001001245,266.462904,269.285745,271.749924,1
1970-01-01 02:00:00,20700001001245,266.462904,269.285745,271.749924,1
1970-01-01 03:00:00,20700001001245,266.462904,269.285745,271.749924,1
1970-01-01 04:00:00,20700001001245,266.462904,269.285745,271.749924,1
...,...,...,...,...,...
1970-12-31 19:00:00,22400001001097,249.371691,252.087242,254.672444,12
1970-12-31 20:00:00,22400001001097,249.371691,252.087242,254.672444,12
1970-12-31 21:00:00,22400001001097,249.371691,252.087242,254.672444,12
1970-12-31 22:00:00,22400001001097,249.371691,252.087242,254.672444,12


In [26]:
# partitioning data into different FIPS and changind it to a dataframes
df_dict = {str(g): d for g, d in df.groupby('FIPS')}

In [28]:
# saving FIPS data as csv and pkl
for i in df_dict:
    df_dict[i].to_csv ('../Data/Block_ERA5/'+ i + '.csv', index = None, header=True)
    df_dict[i].to_pickle ('../Data/Block_ERA5/'+ i + '.pkl', compression = 'bz2') 