### ERA5 Data Extraction for Census Places

In [None]:
#%pip install google
#%conda install google
#%pip install --upgrade google-api-python-client
#%pip install --upgrade ee
#%pip install earthengine-api --upgrade

In [None]:
import numpy as np
import re
import pandas as pd
import geopandas as gpd
import gdown
import ee
import google
import os

In [None]:
ee.Authenticate()
ee.Initialize()

### Get ERA5 hourly data by Census places

In [None]:
era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
hourly2022 = era5.filterDate('2022-01-01', '2022-01-02').select('temperature_2m')

In [None]:
# AK Census places as feature collection
ak_places = ee.FeatureCollection('projects/geospatial-data-kpleung/assets/akplaces_2010')

In [None]:
# reducer 
def reduceByPlaces(image):
  return image.reduceRegions(collection = ak_places,
                             reducer = ee.Reducer.mean().setOutputs(['avg_air_temp']))

placesHourly2022 = hourly2022.map(reduceByPlaces).flatten()

In [None]:
## Run with caution: expected runtime: 45m

# task = ee.batch.Export.table.toDrive(**{
#   'collection': placesHourly2022,
#   'description': 'Places Average Temp by Hourly 2022',
#   'folder': 'EarthEngine',
#   'selectors': ['system:index','NAME', 'FIPS', 'avg_air_temp'], 
#   'fileFormat': 'CSV'
# })
# task.start()

In [None]:
# download the result from Google drive (later change to Github submodule)
url = "https://drive.google.com/file/d/1T1vRS8OU1S89sfaoDCynN9BxAKRlGRPO/view?usp=drive_link"
output_path = "../Data/"
gdown.download(url, output_path, quiet=False,fuzzy=True)

In [None]:
# reading in data
temp_data = pd.read_csv("../Data/Places Average Temp by Hourly 2022.csv")
temp_data.head()

In [None]:
# extracting month from system:index, converting temp from kelvin to faranheit, extracting timestamp from system:index
# susetting specific columns
temp_data['Month'] = temp_data['system:index'].str[4:6]
temp_data['db_temp'] = (1.8 * (temp_data['avg_air_temp']-273)) + 32
temp_data['timestamp'] = temp_data['system:index'].str[:11].str.replace("T","")
temp_data['timestamp'] = pd.to_datetime(temp_data['timestamp'], format ='%Y%m%d%H')
data = temp_data[['FIPS','NAME','timestamp', 'db_temp', 'Month']]
data.tail(10)

In [None]:
# partitioning data into different FIPS and changind it to a dataframes
df = data
df_dict = {str(g): d for g, d in df.groupby('FIPS')}

In [None]:
# saving FIPS data as csv
for i in df_dict:
    df_dict[i].to_csv ('../Data/ERA5/'+ i + '.csv', index = None, header=True) 

In [None]:
# saving FIPS data as pickle
for i in df_dict:
    df_dict[i].to_pickle ('../Data/ERA5/'+ i + '.pkl', compression = 'bz2') 