### ERA5 Data Extraction for Heat Pump Calculator Simulation

In [38]:
# download important libraries
import numpy as np
import re
import pandas as pd

In [39]:
# reading in data
temp_data = pd.read_csv("Placeshourly.csv")
temp_data.head()

Unnamed: 0,system:index,NAME,FIPS,avg air temp
0,20220101T00_000000000000000000e7,Chisana CDP,213890,256.219526
1,20220101T00_00000000000000000124,Flat CDP,225880,248.954065
2,20220101T00_00000000000000000162,Mertarvik CDP,248590,255.76948
3,20220101T00_000000000000000000b6,Ivanof Bay CDP,235890,274.298566
4,20220101T00_000000000000000000b0,Pope-Vannoy Landing CDP,262125,271.321084


In [40]:
# extracting months from system:index column
temp_data['Month'] = temp_data['system:index'].str[4:6]

In [41]:
# converting temperature from kelvin to faranheit
temp_data['db_temp'] = (1.8 * (temp_data['avg air temp']-273)) + 32


In [42]:
# extracting timestamp from system:index column
temp_data['timestamp'] = temp_data['system:index'].str[:11].str.replace("T","")
temp_data['timestamp'] = pd.to_datetime(temp_data['timestamp'], format ='%Y%m%d%H')

In [43]:
# subsetting needed columns
data = temp_data[['FIPS','NAME','timestamp', 'db_temp', 'Month']]

In [44]:
data.tail(10)

Unnamed: 0,FIPS,NAME,timestamp,db_temp,Month
8510,217410,Cordova city,2022-01-01 23:00:00,13.92081,1
8511,270540,Sitka city and borough,2022-01-01 23:00:00,30.493218,1
8512,224230,Fairbanks city,2022-01-01 23:00:00,-17.901079,1
8513,242832,Lakes CDP,2022-01-01 23:00:00,3.353057,1
8514,260310,Petersburg city,2022-01-01 23:00:00,29.430837,1
8515,201090,Akutan city,2022-01-01 23:00:00,29.826286,1
8516,283080,Wasilla city,2022-01-01 23:00:00,1.609551,1
8517,240645,Knik-Fairview CDP,2022-01-01 23:00:00,0.116085,1
8518,240950,Kodiak city,2022-01-01 23:00:00,,1
8519,236400,Juneau city and borough,2022-01-01 23:00:00,21.454601,1


In [45]:
# grouping data by unique FIPS
df = data

# using a dict-comprehension, the unique 'method' value will be the key
df_dict = {str(g): d for g, d in df.groupby('FIPS')}

#print(df_dict.keys())

In [37]:
# saving as csv
for i in df_dict:
    df_dict[i].to_csv ('ERA5/'+ i + '.csv', index = None, header=True) 

In [36]:
# saving as pickle
for i in df_dict:
    df_dict[i].to_pickle ('ERA5/'+ i + '.pkl') 

In [46]:
# replicating above with the full hourly data
temp_data = pd.read_csv("PlacesHourly(Full).csv")
temp_data.head()

Unnamed: 0,system:index,NAME,FIPS,avg_air_temp
0,20220101T00_000000000000000000e7,Chisana CDP,213890,256.219526
1,20220101T00_00000000000000000124,Flat CDP,225880,248.954065
2,20220101T00_00000000000000000162,Mertarvik CDP,248590,255.76948
3,20220101T00_000000000000000000b6,Ivanof Bay CDP,235890,274.298566
4,20220101T00_000000000000000000b0,Pope-Vannoy Landing CDP,262125,271.321084


In [48]:
temp_data['Month'] = temp_data['system:index'].str[4:6]
temp_data['db_temp'] = (1.8 * (temp_data['avg_air_temp']-273)) + 32
temp_data['timestamp'] = temp_data['system:index'].str[:11].str.replace("T","")
temp_data['timestamp'] = pd.to_datetime(temp_data['timestamp'], format ='%Y%m%d%H')
data = temp_data[['FIPS','NAME','timestamp', 'db_temp', 'Month']]
data.tail(10)

Unnamed: 0,FIPS,NAME,timestamp,db_temp,Month
3101270,217410,Cordova city,2022-12-30 23:00:00,34.112994,12
3101271,270540,Sitka city and borough,2022-12-30 23:00:00,35.98865,12
3101272,224230,Fairbanks city,2022-12-30 23:00:00,-2.567381,12
3101273,242832,Lakes CDP,2022-12-30 23:00:00,27.502363,12
3101274,260310,Petersburg city,2022-12-30 23:00:00,33.778169,12
3101275,201090,Akutan city,2022-12-30 23:00:00,36.943349,12
3101276,283080,Wasilla city,2022-12-30 23:00:00,27.77325,12
3101277,240645,Knik-Fairview CDP,2022-12-30 23:00:00,28.403257,12
3101278,240950,Kodiak city,2022-12-30 23:00:00,,12
3101279,236400,Juneau city and borough,2022-12-30 23:00:00,24.122793,12


In [49]:
df = data
df_dict = {str(g): d for g, d in df.groupby('FIPS')}

In [50]:
for i in df_dict:
    df_dict[i].to_csv ('ERA5/'+ i + '.csv', index = None, header=True) 

In [51]:
for i in df_dict:
    df_dict[i].to_pickle ('ERA5/'+ i + '.pkl') 