### ERA5 Data Extraction for Census Places

In [300]:
import numpy as np
import re
import pandas as pd
import geopandas as gpd
import gdown
import ee
import google
import os
import geemap

from tobler.util import h3fy
from ee_jupyter.ipyleaflet import Map

In [17]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
ee.Authenticate()
ee.Initialize()

KeyboardInterrupt: Interrupted by user

# Get temp data by block

In [204]:
era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
hourly2022 = era5.filterDate('2022-01-01', '2022-12-31').select('temperature_2m')

In [273]:
era5daily = ee.ImageCollection("ECMWF/ERA5/DAILY")
daily2022 = era5daily.filterDate('2014-01-01', '2019-12-31').select('minimum_2m_air_temperature', 'mean_2m_air_temperature', 'maximum_2m_air_temperature')

In [274]:
era5daily.size().getInfo()

15165

In [156]:
blocks = gpd.read_file('../Data/Blocks2020/Blocks2020.shp')

In [157]:
blocks.shape

(28568, 21)

In [179]:
blocks.columns

Index(['NAME', 'STATE', 'BOROUGH', 'TRACT', 'BLOCK', 'FIPS', 'TOTALPOP',
       'WHITE', 'BLACK', 'NATIVE', 'ASIAN', 'PACISLAND', 'OTHER', 'TWO_PLUS',
       'HISPANIC', 'NATALNCOMB', 'GRPQTRS', 'HOUSEUNITS', 'VACANT', 'OCCUPIED',
       'geometry'],
      dtype='object')

In [180]:
blocks = blocks[['NAME', 'STATE', 'BOROUGH', 'TRACT', 'BLOCK', 'FIPS', 'TOTALPOP', 'HOUSEUNITS', 'geometry']]

In [181]:
blocks.head()

Unnamed: 0,NAME,STATE,BOROUGH,TRACT,BLOCK,FIPS,TOTALPOP,HOUSEUNITS,geometry
0,Block 1224,2,282,100,1224,22820001001224,0.0,0.0,"POLYGON ((798058.221 1196684.982, 798238.898 1..."
1,Block 1080,2,185,200,1080,21850002001080,24.0,14.0,"POLYGON ((-526365.293 2093756.131, -526343.435..."
2,Block 2180,2,185,200,2180,21850002002180,50.0,15.0,"POLYGON ((-227333.603 2305244.729, -227247.314..."
3,Block 1018,2,185,300,1018,21850003001018,0.0,0.0,"POLYGON ((141825.171 2294747.938, 142101.298 2..."
4,Block 1074,2,185,100,1074,21850001001074,53.0,22.0,"POLYGON ((-101290.982 2368756.518, -101177.439..."


In [182]:
blocks_nonzero = blocks[blocks['TOTALPOP'] > 0]
blocks_nonzero.shape

(11765, 9)

In [254]:
fairbanks_blocks = blocks_nonzero[blocks_nonzero['BOROUGH'] == '090']
fairbanks_blocks.shape

(1828, 9)

In [255]:
fairbanks_block_features = geemap.geopandas_to_ee(fairbanks_blocks)

In [258]:
type(fairbanks_block_features)

ee.featurecollection.FeatureCollection

In [259]:
fairbanks_block_features.size().getInfo()

1828

In [289]:
gpd = fairbanks_blocks.iloc[:2,:]
print(gpd.shape)
few_blocks = geemap.geopandas_to_ee(gpd)

(2, 9)


In [305]:
def reduceByFeature(image):
    combined_reducers = ee.Reducer.min().combine(
        ee.Reducer.median(), sharedInputs = True).combine(
        ee.Reducer.max(), sharedInputs = True)

    return image.reduceRegions(collection = fairbanks_block_features,
                               #reducer = combined_reducers.setOutputs(['min_air_temp', 'median_air_temp', 'max_air_temp']),
                               reducer = ee.Reducer.mean(),
                               scale = 11132)

fairbanks_blocks_daily2022 = daily2022.map(reduceByFeature).flatten()


AttributeError: 'Image' object has no attribute 'map'

In [295]:
task = ee.batch.Export.table.toDrive(**{
  'collection': fairbanks_blocks_daily2022,
  'description': 'Fairbanks Blocks Borough 090 Temp Stats Daily 2022 Try 3',
  'folder': 'EarthEngine',
  'selectors': ['system:index','NAME', 'FIPS', 'maximum_2m_air_temperature', 
                'mean_2m_air_temperature', 'minimum_2m_air_temperature'], 
  'fileFormat': 'CSV'
})
task.start()

In [None]:
meanDaily5YearAvg = era5daily.filterDate('2015-01-01', '2019-12-31').select('minimum_2m_air_temperature', 'mean_2m_air_temperature', 'maximum_2m_air_temperature').mean()

In [343]:

from calendar import monthrange

# Load the ERA5Daily dataset
era5daily = ee.ImageCollection("ECMWF/ERA5/DAILY")

# Define the date range you're interested in
start_date = '2015-01-01'
end_date = '2019-12-31'

# Filter the dataset by date range and select the temperature variables
filtered_data = era5daily \
    .filterDate(start_date, end_date) \
    .select(['minimum_2m_air_temperature', 'mean_2m_air_temperature', 'maximum_2m_air_temperature'])

# Define a function to compute the five-year daily averages
def compute_daily_averages_for_month_day(collection, month, day):
    # Filter data for the current month-day combination
    daily_data = collection.filter(ee.Filter.calendarRange(month, month, 'month')) \
                           .filter(ee.Filter.calendarRange(day, day, 'day_of_month'))
    
    # Calculate daily averages for the month-day combination
    daily_averages = daily_data.mean()
    
    # Set the 'system:time_start' property to the start date of the month-day combination
    start_date_str = '{:02d}-{:02d}-{:02d}'.format(2014, month, day)
    start_date = ee.Date(start_date_str)
    daily_averages = daily_averages.set('system:time_start', start_date.millis())
    
    return daily_averages

# Compute the daily averages for each month-day combination
daily_averages_list = []
for month in range(1, 13):  # Loop over each month
    _, num_days_in_month = monthrange(2014, month)  # Get the number of days in the current month
    for day in range(1, num_days_in_month + 1):
        daily_averages_list.append(compute_daily_averages_for_month_day(filtered_data, month, day))

# Create an ImageCollection from the list of daily averages
daily_averages_collection = ee.ImageCollection(daily_averages_list)

# Print the result (this will show the ImageCollection with daily averages for each month-day combination)
daily_averages_collection.getInfo()

# Check the number of outputs (should be 336)
#print(daily_averages_collection.size().getInfo())




# Print the result (this will show the ImageCollection with daily averages for each month-day combination)
#daily_averages_collection.size().getInfo()

# Visualize the first image (daily averages for the first month-day combination)
#first_image = daily_averages_collection.first()
#vis_params = {'bands': ['mean_2m_air_temperature'], 'min': -30, 'max': 30}
#ee.Map().addLayer(first_image, vis_params, 'First Month-Day Daily Mean Temp').add_to_map()


{'type': 'ImageCollection',
 'bands': [],
 'features': [{'type': 'Image',
   'bands': [{'id': 'minimum_2m_air_temperature',
     'data_type': {'type': 'PixelType', 'precision': 'float'},
     'crs': 'EPSG:4326',
     'crs_transform': [1, 0, 0, 0, 1, 0]},
    {'id': 'mean_2m_air_temperature',
     'data_type': {'type': 'PixelType', 'precision': 'float'},
     'crs': 'EPSG:4326',
     'crs_transform': [1, 0, 0, 0, 1, 0]},
    {'id': 'maximum_2m_air_temperature',
     'data_type': {'type': 'PixelType', 'precision': 'float'},
     'crs': 'EPSG:4326',
     'crs_transform': [1, 0, 0, 0, 1, 0]}],
   'properties': {'system:time_start': 1388534400000, 'system:index': '0'}},
  {'type': 'Image',
   'bands': [{'id': 'minimum_2m_air_temperature',
     'data_type': {'type': 'PixelType', 'precision': 'float'},
     'crs': 'EPSG:4326',
     'crs_transform': [1, 0, 0, 0, 1, 0]},
    {'id': 'mean_2m_air_temperature',
     'data_type': {'type': 'PixelType', 'precision': 'float'},
     'crs': 'EPSG:4326',

In [None]:
# Load the ERA5Daily dataset
era5daily = ee.ImageCollection("ECMWF/ERA5/DAILY")

# Define the date range you're interested in
start_date = '2015-01-01'
end_date = '2019-12-31'

# Filter the dataset by date range and select the temperature variables
filtered_data = era5daily \
    .filterDate(start_date, end_date) \
    .select(['minimum_2m_air_temperature', 'mean_2m_air_temperature', 'maximum_2m_air_temperature'])

# Load the Feature Collection of regions (fairbanks_block_features)
regions_fc = fairbanks_block_features

In [492]:
# Define a function to compute daily 5-year averages for each day
def compute_daily_averages_for_month_day(collection, julianDay):
    # Filter data for the current month-day combination
    daily_data = collection.filter(ee.Filter.dayOfYear(julianDay))
#     daily_data = collection.filter(ee.Filter.calendarRange(month, month, 'month')) \
#                            .filter(ee.Filter.calendarRange(day, day, 'day_of_month'))
    
    # Calculate daily averages for the month-day combination
    daily_averages = daily_data.mean()
    
    # Set the 'system:time_start' property to the start date of the month-day combination
    #start_date_str = '{:02d}-{:02d}-{:02d}'.format(2000, month, day)
    #start_date = ee.Date(start_date_str)
    #daily_averages = daily_averages.set('system:time_start', start_date.millis())
    
    return daily_averages


# https://developers.google.com/earth-engine/apidocs/ee-filter-dayofyear do this instead

# Compute the daily averages for each month-day combination
daily_averages_list = []

for julianDay in range(1, 366):
    julianDayData = filtered_data.filter(ee.Filter.dayOfYear(julianDay, julianDay)).mean()
    julianDayDataWDate = julianDayData.set('date', julianDay)
    daily_averages_list.append(julianDayDataWDate)
    # SET A PROPERTY HERE? MAYBE NOT

# Create an ImageCollection from the list of daily averages
daily_averages_collection = ee.ImageCollection(daily_averages_list)


In [495]:
daily_averages_collection.first().getInfo()

{'type': 'Image',
 'bands': [{'id': 'minimum_2m_air_temperature',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [1, 0, 0, 0, 1, 0]},
  {'id': 'mean_2m_air_temperature',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [1, 0, 0, 0, 1, 0]},
  {'id': 'maximum_2m_air_temperature',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'crs': 'EPSG:4326',
   'crs_transform': [1, 0, 0, 0, 1, 0]}],
 'properties': {'date': 1, 'system:index': '0'}}

In [498]:
reduced_by_region.first().getInfo()

{'type': 'FeatureCollection',
 'columns': {'BLOCK': 'String',
  'BOROUGH': 'String',
  'FIPS': 'String',
  'HOUSEUNITS': 'Integer',
  'NAME': 'String',
  'STATE': 'String',
  'TOTALPOP': 'Integer',
  'TRACT': 'String',
  'maximum_2m_air_temperature': 'Float',
  'mean_2m_air_temperature': 'Float',
  'minimum_2m_air_temperature': 'Float',
  'system:index': 'String'},
 'properties': {'system:index': '0', 'date': 1},
 'features': [{'type': 'Feature',
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-147.769284, 64.857436],
      [-147.769288, 64.857164],
      [-147.765503, 64.857165],
      [-147.765503, 64.857434],
      [-147.765503, 64.857614],
      [-147.766259, 64.857614],
      [-147.768527, 64.857615],
      [-147.769283, 64.857617],
      [-147.769284, 64.857436]]]},
   'id': '0',
   'properties': {'BLOCK': '2048',
    'BOROUGH': '090',
    'FIPS': '020900006002048',
    'HOUSEUNITS': 9,
    'NAME': 'Block 2048',
    'STATE': '02',
    'TOTALPOP': 17,
    'TRACT': '000600

In [None]:
reduced_by_region.map(lambda x:)

In [497]:
# reducer reducer !
# Reduce the ImageCollection using reduceRegions to get the mean temperature for each region for each day
def reduce_to_region(image):
    reduced_image = image.reduceRegions(collection=regions_fc, reducer=ee.Reducer.mean(), scale=11132)
    reduced_image = reduced_image.set('date', image.get('date'))
    return reduced_image

# Map over the ImageCollection to reduce to regions
reduced_by_region = daily_averages_collection.map(reduce_to_region)

# dates = reduced_by_region.map(lambda x: ee.Feature(None, {'date': x.get('date')}))
# datesList365 = dates.aggregate_array('date')
# datesArrayFull = np.array([])
# for i in range(365):
#     md = datesList365.get(i).getInfo()
#     mdArray = np.repeat(md, 1828)
#     datesArrayFull = np.append(datesArrayFull, mdArray)
    
#datesListFull = datesArrayFull.tolist()

# Flatten the ImageCollection to get a feature collection
flattened = reduced_by_region.flatten()
#flattened_with_date = flattened.set('date', datesListFull)

# Print the result (this will show the FeatureCollection with daily averages for each region and each day)
#flattened.getInfo()

# Check the number of outputs (should be 1828 * 365)
print(flattened.size().getInfo())

flattened.first().getInfo()

667220


{'type': 'Feature',
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-147.769284, 64.857436],
    [-147.769288, 64.857164],
    [-147.765503, 64.857165],
    [-147.765503, 64.857434],
    [-147.765503, 64.857614],
    [-147.766259, 64.857614],
    [-147.768527, 64.857615],
    [-147.769283, 64.857617],
    [-147.769284, 64.857436]]]},
 'id': '0_0',
 'properties': {'BLOCK': '2048',
  'BOROUGH': '090',
  'FIPS': '020900006002048',
  'HOUSEUNITS': 9,
  'NAME': 'Block 2048',
  'STATE': '02',
  'TOTALPOP': 17,
  'TRACT': '000600',
  'maximum_2m_air_temperature': None,
  'mean_2m_air_temperature': None,
  'minimum_2m_air_temperature': None}}

In [464]:
# precalculations at the raster level for both ERA5 and ERA5LAND
# reduce for ERA5LAND first, filter out blanks to get data from ERA5

array([], dtype=float64)

In [469]:
datesList365 = dates.aggregate_array('date')
datesList365

In [476]:
type(datesList365.get(2).getInfo())

int

In [353]:
flattened.filter(ee.Filter.eq('BLOCK', '2006')).first().getInfo()

{'type': 'Feature',
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-147.675661, 64.887057],
    [-147.674931, 64.887305],
    [-147.674442, 64.887479],
    [-147.673625, 64.887787],
    [-147.673596, 64.887799],
    [-147.672742, 64.888144],
    [-147.672138, 64.888402],
    [-147.670942, 64.88896],
    [-147.669533, 64.889722],
    [-147.669265, 64.889876],
    [-147.668975, 64.890066],
    [-147.668462, 64.89045],
    [-147.667969, 64.890891],
    [-147.666502, 64.892209],
    [-147.666372, 64.892349],
    [-147.665603, 64.893001],
    [-147.664301, 64.894109],
    [-147.663356, 64.895027],
    [-147.662634, 64.895731],
    [-147.662536, 64.895872],
    [-147.662485, 64.895965],
    [-147.662403, 64.896118],
    [-147.662322, 64.896351],
    [-147.662225, 64.896752],
    [-147.66216, 64.897022],
    [-147.662126, 64.897337],
    [-147.662081, 64.897756],
    [-147.662089, 64.897991],
    [-147.662116, 64.898698],
    [-147.662125, 64.898935],
    [-147.66213, 64.899063],
    [-

In [391]:
#flattened.filter(ee.Filter.eq('BLOCK', '2006')).getInfo()

In [436]:
dates = reduced_by_region.map(lambda x: ee.Feature(None, {'date': x.get('date')}))

In [437]:
dates.first().get('date')

In [438]:
dates.size().getInfo()

365

In [444]:
type(dates)

ee.imagecollection.ImageCollection

In [422]:
type(regions_fc)

ee.featurecollection.FeatureCollection

In [390]:

ee.Image(daily_averages_list[:1]).get('system:time_start')

In [411]:
reduced_by_region.get('date')

In [415]:
reduced_by_region.first().get('date')

In [416]:
ee.FeatureCollection(reduced_by_region.first()).first().get('date')

In [394]:
reduced_by_region.size().getInfo()

365

In [410]:
ee.FeatureCollection(reduced_by_region.first()).size().getInfo()

1828

In [417]:
reduced_by_region.first().getInfo()

{'type': 'FeatureCollection',
 'columns': {'BLOCK': 'String',
  'BOROUGH': 'String',
  'FIPS': 'String',
  'HOUSEUNITS': 'Integer',
  'NAME': 'String',
  'STATE': 'String',
  'TOTALPOP': 'Integer',
  'TRACT': 'String',
  'maximum_2m_air_temperature': 'Float',
  'mean_2m_air_temperature': 'Float',
  'minimum_2m_air_temperature': 'Float',
  'system:index': 'String'},
 'properties': {'system:index': '0', 'date': 946684800000},
 'features': [{'type': 'Feature',
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-147.769284, 64.857436],
      [-147.769288, 64.857164],
      [-147.765503, 64.857165],
      [-147.765503, 64.857434],
      [-147.765503, 64.857614],
      [-147.766259, 64.857614],
      [-147.768527, 64.857615],
      [-147.769283, 64.857617],
      [-147.769284, 64.857436]]]},
   'id': '0',
   'properties': {'BLOCK': '2048',
    'BOROUGH': '090',
    'FIPS': '020900006002048',
    'HOUSEUNITS': 9,
    'NAME': 'Block 2048',
    'STATE': '02',
    'TOTALPOP': 17,
    'TRAC

In [369]:
import ee

ee.Initialize()

# Load the ERA5Daily dataset
era5daily = ee.ImageCollection("ECMWF/ERA5/DAILY")

# Define the date range you're interested in
start_date = '2014-01-01'
end_date = '2019-12-31'

# Filter the dataset by date range and select the temperature variables
filtered_data = era5daily \
    .filterDate(start_date, end_date) \
    .select(['minimum_2m_air_temperature', 'mean_2m_air_temperature', 'maximum_2m_air_temperature'])

# Load the Feature Collection of regions (fairbanks_block_features)
regions_fc = ee.FeatureCollection("your_fairbanks_block_features_id")

# Define a function to compute daily 5-year averages for each day
def compute_daily_averages_for_month_day(collection, year, month, day):
    # Filter data for the current month-day combination
    daily_data = collection.filter(ee.Filter.calendarRange(month, month, 'month')) \
                           .filter(ee.Filter.calendarRange(day, day, 'day_of_month'))
    
    # Calculate daily averages for the month-day combination
    daily_averages = daily_data.mean()
    
    # Set the 'system:time_start' property to the start date of the month-day combination
    start_date_str = '{:02d}-{:02d}-{:04d}'.format(month, day, year)
    daily_averages = daily_averages.set('system:time_start', ee.Date(start_date_str).millis())
    
    # Add the 'date' property to indicate the date for which the temperature is calculated
    daily_averages = daily_averages.set('date', ee.Date(start_date_str).format('YYYY-MM-dd'))
    
    return daily_averages

# Compute the daily averages for each month-day combination for years 2014 to 2019
daily_averages_list = []
for year in range(2014, 2020):  # Loop over each year from 2014 to 2019
    for month in range(1, 13):  # Loop over each month
        _, num_days_in_month = monthrange(year, month)  # Get the number of days in the current month
        for day in range(1, num_days_in_month + 1):
            daily_averages_list.append(compute_daily_averages_for_month_day(filtered_data, year, month, day))

# Create an ImageCollection from the list of daily averages
daily_averages_collection = ee.ImageCollection(daily_averages_list)

# Reduce the ImageCollection using reduceRegions to get the mean temperature for each region for each day
def reduce_to_region(image):
    reduced_image = image.reduceRegions(collection=regions_fc, reducer=ee.Reducer.mean(), scale=1000)
    return reduced_image

# Map over the ImageCollection to reduce to regions
reduced_by_region = daily_averages_collection.map(reduce_to_region)

# Flatten the ImageCollection to get a feature collection
flattened = reduced_by_region.flatten()

# Map over the FeatureCollection to set the 'date' property for each feature
def set_date_property(feature):
    date = ee.String(feature.get('date'))
    print(date)
    return feature.set('date', date)

flattened_with_date = flattened.map(set_date_property)

# Print the result (this will show the FeatureCollection with daily averages for each region and each day)
print(flattened_with_date)

# Check the number of outputs (should be 1828 * 365 * 6)
print(flattened_with_date.size().getInfo())

# Check a specific feature to see the 'date' property
print(flattened_with_date.filter(ee.Filter.eq('BLOCK', '2006')).first().getInfo())


print(flattened_with_date.first().propertyNames())

# Check a specific feature to see the 'date' property
print(flattened_with_date.filter(ee.Filter.eq('BLOCK', '2006')).first().getInfo())




EEException: Date: Bad date/time '01-02-2014'.

In [None]:
flattened_with_date.first().propertyNames()

In [321]:
#gpd = fairbanks_blocks.iloc[:2,:]
#few_blocks = geemap.geopandas_to_ee(gpd)
import geopandas as gpd

few_regions_temp = meanDaily5YearAvg.reduceRegions(collection = few_blocks,
                               reducer = ee.Reducer.mean().group(),
                               scale = 11132).getInfo()



zone_stats = gpd.GeoDataFrame.from_features(few_regions_temp)


In [331]:
daily5year = era5daily.filterDate('2014-01-01', '2019-12-31').select('minimum_2m_air_temperature', 'mean_2m_air_temperature', 'maximum_2m_air_temperature')

def mosaicByDate(imcol):
  # imcol: An image collection
  # returns: An image collection
    def funcD(d):
        d = ee.Date(d)
        im = imcol.filterDate(d, d.advance(1, "day")).mosaic()
        return im.set("system:time_start", d.millis(), 
        "system:id", d.format("YYYY-MM-dd"))

    imlist = imcol.toList(imcol.size())

    unique_dates = imlist.map(lambda x: ee.Image(x).date().format("YYYY-MM-dd")).distinct()

    mosaic_imlist = unique_dates.map(funcD).mosaic()

    return ee.ImageCollection(mosaic_imlist)



mosaicByDate(daily5year)

AttributeError: 'List' object has no attribute 'mosaic'

In [328]:
daily5year = era5daily.filterDate('2014-01-01', '2019-12-31').select('minimum_2m_air_temperature', 'mean_2m_air_temperature', 'maximum_2m_air_temperature')

daily5year.first().getInfo()

{'type': 'Image',
 'bands': [{'id': 'minimum_2m_air_temperature',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'dimensions': [1440, 721],
   'crs': 'EPSG:4326',
   'crs_transform': [0.25, 0, -180, 0, -0.25, 90]},
  {'id': 'mean_2m_air_temperature',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'dimensions': [1440, 721],
   'crs': 'EPSG:4326',
   'crs_transform': [0.25, 0, -180, 0, -0.25, 90]},
  {'id': 'maximum_2m_air_temperature',
   'data_type': {'type': 'PixelType', 'precision': 'float'},
   'dimensions': [1440, 721],
   'crs': 'EPSG:4326',
   'crs_transform': [0.25, 0, -180, 0, -0.25, 90]}],
 'version': 1578408700275214,
 'id': 'ECMWF/ERA5/DAILY/20140101',
 'properties': {'system:time_start': 1388534400000,
  'month': 1,
  'year': 2014,
  'system:footprint': {'type': 'LinearRing',
   'coordinates': [[-180, -90],
    [180, -90],
    [180, 90],
    [-180, 90],
    [-180, -90]]},
  'system:time_end': 1388620800000,
  'system:asset_size': 36688976,


In [326]:
daily5year = era5daily.filterDate('2014-01-01', '2019-12-31').select('minimum_2m_air_temperature', 'mean_2m_air_temperature', 'maximum_2m_air_temperature')

def reduceByFeature(image):
    return image.reduceRegions(collection = few_blocks,
                               reducer = ee.Reducer.mean().group().repeat(3),
                               scale = 11132)


daily5year.map(reduceByFeature)


2190

In [296]:
# small block is block 1080

small_block = fairbanks_block_features.first().geometry()
# print(ee.List(small_block.centroid()))

dataset = ee.ImageCollection('ECMWF/ERA5/DAILY').filter(ee.Filter.date('2020-07-01', '2020-07-02'));

visualization = {
  'bands': ['mean_2m_air_temperature'],
  'min': 250.0,
  'max': 320.0,
  'palette': [
    '000080', '0000d9', '4000ff', '8000ff', '0080ff', '00ffff',
    '00ff80', '80ff00', 'daff00', 'ffff00', 'fff500', 'ffda00',
    'ffb000', 'ffa400', 'ff4f00', 'ff2500', 'ff0a00', 'ff00ff',
  ]
}

map1 = geemap.Map(center = [64.86, -147.77], zoom = 20)

map1.addLayer(small_block, {'color': "red"}, 'how big are you?')
map1.addLayer(dataset, visualization, 'Air temperature [K] at 2m height');

map1

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



<class 'ee.featurecollection.FeatureCollection'>


### Get ERA5 hourly data by h3 hexagons, resolution 5

In [8]:
era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
hourly2022 = era5.filterDate('2022-01-01', '2022-01-02').select('temperature_2m')

In [22]:
def get_ee_feature(geom):
    x,y = geom.exterior.coords.xy
    coords = np.dstack((x,y)).tolist()
    g = ee.Geometry.Polygon(coords)
    return ee.Feature(g)

Data source: https://catalog.data.gov/dataset/tiger-line-shapefile-2019-state-alaska-current-alaska-native-regional-corporation-anrc-state-ba

In [23]:
# ak shapefile
geodf_alaska = gpd.read_file('../Data/tl_2019_02_anrc/tl_2019_02_anrc.shp')
geodf_alaska = geodf_alaska.to_crs(epsg=3338) 
# to hexagons
hexgrid_ak = h3fy(geodf_alaska.buffer(0.01), resolution=5)
# to feature collection
features = ee.FeatureCollection([get_ee_feature(x[1].geometry) for x in hexgrid_ak.iterrows()])

In [152]:
# try running with full features
hex_ids = features.map(lambda x: x.set({'id': x.id()}))
hex_ids_right = filtered_features_AK_right.map(lambda x: x.set({'id': x.id()}))

In [153]:
hex_ids_list = hex_ids.aggregate_array('id')
hex_ids_right_list = hex_ids_right.aggregate_array('id')

In [154]:
hex_ids_list

In [155]:
# map that takes in an ID or 

# map a function that filters IDs over features
# run the aggregation for that subset



# filter features according to ID
# run the aggregation


In [206]:
# map1 = geemap.Map()

# rectangle_left = ee.Geometry.Rectangle([-200, 10, -150, 80])
# rectangle_right = ee.Geometry.Rectangle([-150, 10, -100, 80])
# rectangle_small = ee.Geometry.Rectangle([-190, 50, -180, 60])

# map1.addLayer(rectangle_left, {'color': "yellow"}, 'AK Left')
# map1.addLayer(rectangle_right, {'color': 'blue'}, 'AK Right')
# map1.addLayer(rectangle_small, {'color': 'black'}, 'AK Small')


# map1

TraitError: The 'east' trait of a Map instance expected a float, not the NoneType None.

In [90]:
filtered_features_AK_left = features.filterBounds(rectangle_left)
filtered_features_AK_right = features.filterBounds(rectangle_right)

left_size = filtered_features_AK_left.size().getInfo()
right_size = filtered_features_AK_right.size().getInfo()
print(left_size, right_size, left_size + right_size)

4260 4221 8481


In [107]:
filtered_features_AK_small = features.filterBounds(rectangle_small)

def reduceByFeatureSmall(image):
  return image.reduceRegions(collection = filtered_features_AK_small,
                             reducer = ee.Reducer.mean().setOutputs(['avg_air_temp']),
                            scale = 11132)

In [108]:
hourlyClippedSmall = hourly2022.map(lambda image: image.clipToCollection(filtered_features_AK_small))

hexExportSmall = hourlyClippedLeft.map(reduceByFeatureSmall).flatten()

In [119]:
filtered_features_AK_small.size().getInfo()

2124

In [110]:
task = ee.batch.Export.table.toDrive(**{
  'collection': hexExportSmall,
  'description': 'Hexagons Average Temp by Hourly 2022 Try 7 Small',
  'folder': 'EarthEngine',
  'selectors': ['system:index','NAME', 'FIPS', 'avg_air_temp'], 
  'fileFormat': 'CSV'
})
task.start()

In [91]:
# reducer 
def reduceByFeatureLeft(image):
  return image.reduceRegions(collection = filtered_features_AK_left,
                             reducer = ee.Reducer.mean().setOutputs(['avg_air_temp']),
                            scale = 11132)
def reduceByFeatureRight(image):
  return image.reduceRegions(collection = filtered_features_AK_right,
                             reducer = ee.Reducer.mean().setOutputs(['avg_air_temp']),
                            scale = 11132)

In [92]:
hourlyClippedLeft = hourly2022.map(lambda image: image.clipToCollection(filtered_features_AK_left))

hexExportLeft = hourlyClippedLeft.map(reduceByFeatureLeft).flatten()

In [93]:
hourlyClippedRight = hourly2022.map(lambda image: image.clipToCollection(filtered_features_AK_right))

hexExportRight = hourlyClippedRight.map(reduceByFeatureRight).flatten()

In [94]:
task = ee.batch.Export.table.toDrive(**{
  'collection': hexExportRight,
  'description': 'Hexagons Average Temp by Hourly 2022 Try 6 Right',
  'folder': 'EarthEngine',
  'selectors': ['system:index','NAME', 'FIPS', 'avg_air_temp'], 
  'fileFormat': 'CSV'
})
task.start()

In [95]:
task = ee.batch.Export.table.toDrive(**{
  'collection': hexExportLeft,
  'description': 'Hexagons Average Temp by Hourly 2022 Try 6 Left',
  'folder': 'EarthEngine',
  'selectors': ['system:index','NAME', 'FIPS', 'avg_air_temp'], 
  'fileFormat': 'CSV'
})
task.start()

In [46]:
# # reducer 
# def reduceByFeature(image):
#   return image.reduceRegions(collection = features,
#                              reducer = ee.Reducer.mean().setOutputs(['avg_air_temp']),
#                             scale = 11132)

In [47]:
# hourlyClipped = hourly2022.map(lambda image: image.clipToCollection(features))

# hexExport = hourlyClipped.map(reduceByFeature).flatten()

In [48]:
# # Run with caution: expected runtime: 45m

# task = ee.batch.Export.table.toDrive(**{
#   'collection': hexExport,
#   'description': 'Hexagons Average Temp by Hourly 2022 Clipped Try 4',
#   'folder': 'EarthEngine',
#   'selectors': ['system:index','NAME', 'FIPS', 'avg_air_temp'], 
#   'fileFormat': 'CSV'
# })
# task.start()

In [5]:
# download the result from Google drive (later change to Github submodule)
url = "https://drive.google.com/file/d/1T1vRS8OU1S89sfaoDCynN9BxAKRlGRPO/view?usp=drive_link"
output_path = "../Data/"
gdown.download(url, output_path, quiet=False,fuzzy=True)

Downloading...
From (uriginal): https://drive.google.com/uc?id=1T1vRS8OU1S89sfaoDCynN9BxAKRlGRPO
From (redirected): https://drive.google.com/uc?id=1T1vRS8OU1S89sfaoDCynN9BxAKRlGRPO&confirm=t&uuid=7927446a-026c-4a3f-a776-b0f4f8d038e1
To: /Users/brianleung/Documents/Python-Projects/DSSG2023-Heating-Loads/Data/Places Average Temp by Hourly 2022.csv
100%|██████████| 226M/226M [00:10<00:00, 21.5MB/s] 


'../Data/Places Average Temp by Hourly 2022.csv'

In [None]:
# replicating above with the full hourly data
temp_data = pd.read_csv("../Data/Places Average Temp by Hourly 2022.csv")
temp_data.head()

In [None]:
temp_data['Month'] = temp_data['system:index'].str[4:6]
temp_data['db_temp'] = (1.8 * (temp_data['avg_air_temp']-273)) + 32
temp_data['timestamp'] = temp_data['system:index'].str[:11].str.replace("T","")
temp_data['timestamp'] = pd.to_datetime(temp_data['timestamp'], format ='%Y%m%d%H')
data = temp_data[['FIPS','NAME','timestamp', 'db_temp', 'Month']]
data.tail(10)

In [None]:
df = data
df_dict = {str(g): d for g, d in df.groupby('FIPS')}

In [None]:
for i in df_dict:
    df_dict[i].to_csv ('../Data/ERA5/'+ i + '.csv', index = None, header=True) 

In [None]:
for i in df_dict:
    df_dict[i].to_pickle ('../Data/ERA5/'+ i + '.pkl') 