### ERA5 Data Extraction for Census Places

In [1]:
import pandas as pd
import gdown
import ee
import google

In [None]:
ee.Authenticate()
ee.Initialize()

### Get ERA5 hourly data by Census places

In [None]:
era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
hourly2022 = era5.filterDate('2022-01-01', '2022-01-02').select('temperature_2m')

In [None]:
# AK Census places as feature collection
ak_places = ee.FeatureCollection('projects/geospatial-data-kpleung/assets/akplaces_2010')

In [None]:
# reducer 
def reduceByPlaces(image):
  return image.reduceRegions(collection = ak_places,
                             reducer = ee.Reducer.mean().setOutputs(['avg_air_temp']))

placesHourly2022 = hourly2022.map(reduceByPlaces).flatten()

In [None]:
# getting temperature data
## Run with caution: expected runtime: 45m

 task = ee.batch.Export.table.toDrive(**{
   'collection': placesHourly2022,
   'description': 'Places Average Temp by Hourly 2022',
   'folder': 'EarthEngine',
   'selectors': ['system:index','NAME', 'FIPS', 'avg_air_temp'], 
   'fileFormat': 'CSV'
 })
 task.start()

In [None]:
# download the result from Google drive (later change to Github submodule)
url = "https://drive.google.com/file/d/1T1vRS8OU1S89sfaoDCynN9BxAKRlGRPO/view?usp=drive_link"
output_path = "../Data/"
gdown.download(url, output_path, quiet=False,fuzzy=True)

In [None]:
# reading in data
temp_data = pd.read_csv("../Data/Places Average Temp by Hourly 2022.csv")
temp_data.head()

In [6]:
# extracting month from system:index, converting temp from kelvin to faranheit, extracting timestamp from system:index
# susetting specific columns
temp_data['Month'] = temp_data['system:index'].str[4:6]
temp_data['db_temp'] = (1.8 * (temp_data['avg_air_temp']-273)) + 32
temp_data['timestamp'] = temp_data['system:index'].str[:11].str.replace("T","")
temp_data['timestamp'] = pd.to_datetime(temp_data['timestamp'], format ='%Y%m%d%H')
data = temp_data[['FIPS','NAME','timestamp', 'db_temp', 'Month']]
data.tail(10)

Unnamed: 0,FIPS,NAME,timestamp,db_temp,Month
3101270,217410,Cordova city,2022-12-30 23:00:00,34.112994,12
3101271,270540,Sitka city and borough,2022-12-30 23:00:00,35.98865,12
3101272,224230,Fairbanks city,2022-12-30 23:00:00,-2.567381,12
3101273,242832,Lakes CDP,2022-12-30 23:00:00,27.502363,12
3101274,260310,Petersburg city,2022-12-30 23:00:00,33.778169,12
3101275,201090,Akutan city,2022-12-30 23:00:00,36.943349,12
3101276,283080,Wasilla city,2022-12-30 23:00:00,27.77325,12
3101277,240645,Knik-Fairview CDP,2022-12-30 23:00:00,28.403257,12
3101278,240950,Kodiak city,2022-12-30 23:00:00,,12
3101279,236400,Juneau city and borough,2022-12-30 23:00:00,24.122793,12


In [8]:
# making the timestamp the index
df = data.set_index('timestamp')

Unnamed: 0_level_0,FIPS,NAME,db_temp,Month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-12-30 23:00:00,201090,Akutan city,36.943349,12
2022-12-30 23:00:00,283080,Wasilla city,27.77325,12
2022-12-30 23:00:00,240645,Knik-Fairview CDP,28.403257,12
2022-12-30 23:00:00,240950,Kodiak city,,12
2022-12-30 23:00:00,236400,Juneau city and borough,24.122793,12


In [9]:
# partitioning data into different FIPS and changind it to a dataframes
df_dict = {str(g): d for g, d in df.groupby('FIPS')}

In [11]:
# saving FIPS data as csv
for i in df_dict:
    df_dict[i].to_csv ('../Data/ERA5/'+ i + '.csv', index = None, header=True) 

In [12]:
# saving FIPS data as pickle
for i in df_dict:
    df_dict[i].to_pickle ('../Data/ERA5/'+ i + '.pkl', compression = 'bz2') 