### ERA5 Data Extraction for Heat Pump Calculator Simulation

In [20]:
#%pip install gdown

Collecting gdown
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Installing collected packages: gdown
Successfully installed gdown-4.7.1
Note: you may need to restart the kernel to use updated packages.


In [21]:
import numpy as np
import re
import pandas as pd
import geopandas as gpd
import gdown

In [4]:
COLAB_AUTH_FLOW_CLOUD_PROJECT_FOR_API_CALLS = None

import ee
import google
import os

if COLAB_AUTH_FLOW_CLOUD_PROJECT_FOR_API_CALLS is None:
  print("Authenticating using Notebook auth...")
  if os.path.exists(ee.oauth.get_credentials_path()) is False:
    ee.Authenticate()
  else:
    print('\N{check mark} '
          'Previously created authentication credentials were found.')
  ee.Initialize()
else:
  print('Authenticating using Colab auth...')
  # Authenticate to populate Application Default Credentials in the Colab VM.
  google.colab.auth.authenticate_user()
  # Create credentials needed for accessing Earth Engine.
  credentials, auth_project_id = google.auth.default()
  # Initialize Earth Engine.
  ee.Initialize(credentials, project=COLAB_AUTH_FLOW_CLOUD_PROJECT_FOR_API_CALLS)
print('\N{check mark} Successfully initialized!')

Authenticating using Notebook auth...
✓ Previously created authentication credentials were found.
✓ Successfully initialized!


### Get ERA5 hourly data by Census places

In [16]:
era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
hourly2022 = era5.filterDate('2022-01-01', '2022-01-02').select('temperature_2m')

In [17]:
# AK Census places as feature collection
ak_places = ee.FeatureCollection('projects/geospatial-data-kpleung/assets/akplaces_2010')

In [18]:
# reducer 
def reduceByPlaces(image):
  return image.reduceRegions(collection = ak_places,
                             reducer = ee.Reducer.mean().setOutputs(['avg_air_temp']))

placesHourly2022 = hourly2022.map(reduceByPlaces).flatten()

In [19]:
# expected runtime: 45m
task = ee.batch.Export.table.toDrive(**{
  'collection': placesHourly2022,
  'description': 'Places Average Temp by Hourly 2022',
  'folder': 'EarthEngine',
  'selectors': ['system:index','NAME', 'FIPS', 'avg_air_temp'], 
  'fileFormat': 'CSV'
})
task.start()

In [23]:
# download the result from Google drive (later change to Github submodule)
url = "https://drive.google.com/file/d/1T1vRS8OU1S89sfaoDCynN9BxAKRlGRPO/view?usp=drive_link"
output_path = "../Data/"
gdown.download(url, output_path, quiet=False,fuzzy=True)

Downloading...
From (uriginal): https://drive.google.com/uc?id=1T1vRS8OU1S89sfaoDCynN9BxAKRlGRPO
From (redirected): https://drive.google.com/uc?id=1T1vRS8OU1S89sfaoDCynN9BxAKRlGRPO&confirm=t&uuid=4e35ce7f-a566-433f-acbe-e9abbb0e6d96
To: /Users/brianleung/Documents/Python-Projects/DSSG2023-Heating-Loads/Data/Places Average Temp by Hourly 2022.csv
100%|██████████| 226M/226M [00:09<00:00, 24.8MB/s] 


'../Data/Places Average Temp by Hourly 2022.csv'

In [24]:
# replicating above with the full hourly data
temp_data = pd.read_csv("../Data/Places Average Temp by Hourly 2022.csv")
temp_data.head()

Unnamed: 0,system:index,NAME,FIPS,avg_air_temp
0,20220101T00_000000000000000000e7,Chisana CDP,213890,256.219526
1,20220101T00_00000000000000000124,Flat CDP,225880,248.954065
2,20220101T00_00000000000000000162,Mertarvik CDP,248590,255.76948
3,20220101T00_000000000000000000b6,Ivanof Bay CDP,235890,274.298566
4,20220101T00_000000000000000000b0,Pope-Vannoy Landing CDP,262125,271.321084


In [26]:
temp_data['Month'] = temp_data['system:index'].str[4:6]
temp_data['db_temp'] = (1.8 * (temp_data['avg_air_temp']-273)) + 32
temp_data['timestamp'] = temp_data['system:index'].str[:11].str.replace("T","")
temp_data['timestamp'] = pd.to_datetime(temp_data['timestamp'], format ='%Y%m%d%H')
data = temp_data[['FIPS','NAME','timestamp', 'db_temp', 'Month']]
data.tail(10)

Unnamed: 0,FIPS,NAME,timestamp,db_temp,Month
3101270,217410,Cordova city,2022-12-30 23:00:00,34.112994,12
3101271,270540,Sitka city and borough,2022-12-30 23:00:00,35.98865,12
3101272,224230,Fairbanks city,2022-12-30 23:00:00,-2.567381,12
3101273,242832,Lakes CDP,2022-12-30 23:00:00,27.502363,12
3101274,260310,Petersburg city,2022-12-30 23:00:00,33.778169,12
3101275,201090,Akutan city,2022-12-30 23:00:00,36.943349,12
3101276,283080,Wasilla city,2022-12-30 23:00:00,27.77325,12
3101277,240645,Knik-Fairview CDP,2022-12-30 23:00:00,28.403257,12
3101278,240950,Kodiak city,2022-12-30 23:00:00,,12
3101279,236400,Juneau city and borough,2022-12-30 23:00:00,24.122793,12


In [27]:
df = data
df_dict = {str(g): d for g, d in df.groupby('FIPS')}

In [28]:
for i in df_dict:
    df_dict[i].to_csv ('../Data/ERA5/'+ i + '.csv', index = None, header=True) 

In [29]:
for i in df_dict:
    df_dict[i].to_pickle ('../Data/ERA5/'+ i + '.pkl') 