# ERA5 Data Extraction for Census Blocks

Exports are split by borough for export convenience.

In [2]:
import numpy as np
import re
import pandas as pd
import geopandas as gpd
import gdown
import ee
import google
import os
import geemap

from tobler.util import h3fy
from ee_jupyter.ipyleaflet import Map

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [91]:
ee.Authenticate()
ee.Initialize()

Enter verification code: 4/1Adeu5BVR_ytoRkmzRAaeXwDgHM0QbNvpSDfIFb-t8WemBa4bkjR40pDAwrg

Successfully saved authorization token.


## Load in the Block Data

In [16]:
os.path.exists('../DSSG2023-Heating-Loads-Data/shapefiles')

True

In [17]:
blocks = gpd.read_file('../DSSG2023-Heating-Loads-Data/shapefiles/Blocks2020/Blocks2020.shp')

In [18]:
blocks.shape

(28568, 21)

In [19]:
blocks = blocks[['NAME', 'STATE', 'BOROUGH', 'TRACT', 'BLOCK', 'FIPS', 'TOTALPOP', 'HOUSEUNITS', 'geometry']]

In [20]:
blocks.head()

Unnamed: 0,NAME,STATE,BOROUGH,TRACT,BLOCK,FIPS,TOTALPOP,HOUSEUNITS,geometry
0,Block 1224,2,282,100,1224,22820001001224,0.0,0.0,"POLYGON ((798058.221 1196684.982, 798238.898 1..."
1,Block 1080,2,185,200,1080,21850002001080,24.0,14.0,"POLYGON ((-526365.293 2093756.131, -526343.435..."
2,Block 2180,2,185,200,2180,21850002002180,50.0,15.0,"POLYGON ((-227333.603 2305244.729, -227247.314..."
3,Block 1018,2,185,300,1018,21850003001018,0.0,0.0,"POLYGON ((141825.171 2294747.938, 142101.298 2..."
4,Block 1074,2,185,100,1074,21850001001074,53.0,22.0,"POLYGON ((-101290.982 2368756.518, -101177.439..."


In [21]:
blocks_nonzero = blocks[blocks['TOTALPOP'] > 0]
blocks_nonzero.shape

(11765, 9)

## Filter down to Fairbanks North Star Borough

In [11]:
fairbanks_blocks = blocks_nonzero[blocks_nonzero['BOROUGH'] == '090']
fairbanks_blocks.shape

(1828, 9)

In [22]:
fairbanks_block_fc = geemap.geopandas_to_ee(fairbanks_blocks)

In [13]:
type(fairbanks_block_fc)

ee.featurecollection.FeatureCollection

In [14]:
fairbanks_block_fc.size().getInfo()

1828

Examine just one block as a Feature

In [91]:
one_block = fairbanks_block_fc.filter(ee.Filter.eq('FIPS', '020900001002006'))

In [92]:
one_block.getInfo()

{'type': 'FeatureCollection',
 'columns': {'BLOCK': 'String',
  'BOROUGH': 'String',
  'FIPS': 'String',
  'HOUSEUNITS': 'Integer',
  'NAME': 'String',
  'STATE': 'String',
  'TOTALPOP': 'Integer',
  'TRACT': 'String',
  'system:index': 'String'},
 'features': [{'type': 'Feature',
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-147.729781, 64.843503],
      [-147.729939, 64.843422],
      [-147.729345, 64.8432519999999],
      [-147.727565, 64.842743],
      [-147.727173, 64.842632],
      [-147.726966, 64.842606],
      [-147.726898, 64.8426979999999],
      [-147.726694, 64.84298],
      [-147.726627, 64.843074],
      [-147.726908, 64.843132],
      [-147.727137, 64.843203],
      [-147.728646, 64.843675],
      [-147.729149, 64.843833],
      [-147.729306, 64.84375],
      [-147.729781, 64.843503]]]},
   'id': '127',
   'properties': {'BLOCK': '2006',
    'BOROUGH': '090',
    'FIPS': '020900001002006',
    'HOUSEUNITS': 16,
    'NAME': 'Block 2006',
    'STATE': '02',
  

## Load in Temperature Data

In [None]:
era5_land = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR")
daily_land_10years = era5_land.filterDate('2010-01-01', '2019-12-31').select('temperature_2m', 'temperature_2m_min', 'temperature_2m_max')

In [None]:
era5_gen = ee.ImageCollection("ECMWF/ERA5/DAILY")
daily_gen_10years = era5_gen.filterDate('2010-01-01', '2019-12-31').select('minimum_2m_air_temperature', 'mean_2m_air_temperature', 'maximum_2m_air_temperature')

## Reductions and Export for Fairbanks

Get 10 year averages for temperature

In [21]:
JulianDayList = ee.List.sequence(1, 365)

def julianDayMeansLand(day):
    means = daily_land_10years.filter(ee.Filter.dayOfYear(day, day)).mean()
    ret_means = means.set('julian_date', day)
    return ret_means

def julianDayMeansGen(day):
    means = daily_gen_10years.filter(ee.Filter.dayOfYear(day, day)).mean() #temporal reduction
    ret_means = means.set('julian_date', day)
    return ret_means

mappedListLand = JulianDayList.map(julianDayMeansLand)
mappedListGen = JulianDayList.map(julianDayMeansGen)
daily_averages_collection_land = ee.ImageCollection(mappedListLand)
daily_averages_collection_gen = ee.ImageCollection(mappedListGen)

Get temperature data by block

In [23]:
# Reduce the ImageCollection using reduceRegions to get the mean temperature for each region for each day
def reduce_to_region(image):
    # use the scale of the image (ERA5)
    reduced_image = image.reduceRegions(collection=fairbanks_block_fc, reducer=ee.Reducer.mean(), scale=11132) #spatial reduction
    reduced_image = reduced_image.set('date', image.get('julian_date'))
    return reduced_image

# Map over the ImageCollection to reduce to regions
#reduced_by_region = daily_averages_collection.map(reduce_to_region)
reduced_by_region_land = daily_averages_collection_land.map(reduce_to_region)
reduced_by_region_gen = daily_averages_collection_gen.map(reduce_to_region)

def doublemapOuter(fc):
    jd = fc.get('date')
    def doublemapInner(feature):
        return feature.set('date', jd)
    return ee.FeatureCollection(fc).map(doublemapInner)
    

reduced_by_region_with_date_land = reduced_by_region_land.map(doublemapOuter)
reduced_by_region_with_date_gen = reduced_by_region_gen.map(doublemapOuter)

Check the sizes. These might be problematic given how GEE handles compute and memory.

In [231]:
print('Land:')
print(reduced_by_region_with_date_land.size().getInfo())
print(ee.FeatureCollection(reduced_by_region_with_date_land.first()).size().getInfo())

print('Gen:')
print(reduced_by_region_with_date_gen.size().getInfo())
print(ee.FeatureCollection(reduced_by_region_with_date_gen.first()).size().getInfo())

In [26]:
# Flatten the ImageCollection to get a feature collection
flattened_land = reduced_by_region_with_date_land.flatten()
flattened_gen = reduced_by_region_with_date_gen.flatten()

# Check the number of outputs (should be 1828 * 365)
print(flattened_land.size().getInfo())
print(flattened_gen.size().getInfo())
#print(1828 * 365)

667220


filter flattened_land to remove blocks with empty temperature data

In [None]:
flattened_land_null = flattened_land.filter(ee.Filter.Not(ee.Filter.notNull(['temperature_2m', 'temperature_2m_max', 'temperature_2m_min'])))
## the following line will cause a memory error, yay!
# flattened_land_null.size().getInfo()

## instead export
task = ee.batch.Export.table.toDrive(**{
  'collection': flattened_land_null,
  'description': 'ERA5 LAND Data for Empty Temperature Try 1',
  'folder': 'EarthEngine',
  'fileFormat': 'CSV'
})
task.start()

In [73]:
flattened_gen_null = flattened_gen.filter(ee.Filter.Not(ee.Filter.notNull(['mean_2m_air_temperature', 'maximum_2m_air_temperature', 'minimum_2m_air_temperature'])))

# divide the following by 365 to get the number of blocks with empty temperature data
flattened_gen_null.size().getInfo()

470850

In [103]:
flattened_gen_null_distinct = flattened_gen_null.distinct('.geo')
flattened_gen_null_distinct.size().getInfo()

1290

Try decreasing the scale of the reduction because the blocks are significantly smaller than the temperature data 

In [None]:
def reduce_to_region_gen_scale_1000(image):
    # use the scale of the image (ERA5)
    reduced_image = image.reduceRegions(collection=flattened_gen_null_distinct, reducer=ee.Reducer.mean(), scale=1000) #spatial reduction
    reduced_image = reduced_image.set('date', image.get('julian_date'))
    return reduced_image

reduced_with_date_gen_scale1000 = daily_averages_collection_gen.map(reduce_to_region_gen_scale_1000)
reduced_with_date_gen_scale1000 = reduced_with_date_gen_scale1000.map(doublemapOuter)

In [150]:
reduced_with_date_gen_scale1000.flatten().first().getInfo()

{'type': 'Feature',
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-147.729781, 64.843503],
    [-147.729939, 64.843422],
    [-147.729345, 64.8432519999999],
    [-147.727565, 64.842743],
    [-147.727173, 64.842632],
    [-147.726966, 64.842606],
    [-147.726898, 64.8426979999999],
    [-147.726694, 64.84298],
    [-147.726627, 64.843074],
    [-147.726908, 64.843132],
    [-147.727137, 64.843203],
    [-147.728646, 64.843675],
    [-147.729149, 64.843833],
    [-147.729306, 64.84375],
    [-147.729781, 64.843503]]]},
 'id': '0_127',
 'properties': {'BLOCK': '2006',
  'BOROUGH': '090',
  'FIPS': '020900001002006',
  'HOUSEUNITS': 16,
  'NAME': 'Block 2006',
  'STATE': '02',
  'TOTALPOP': 45,
  'TRACT': '000100',
  'date': 1,
  'maximum_2m_air_temperature': 264.2896728515625,
  'mean_2m_air_temperature': 260.2138366699219,
  'minimum_2m_air_temperature': 255.99856567382812}}

In [105]:
# should be 365 for each day of the year
reduced_with_date_gen_scale1000.size().getInfo()

365

In [106]:
# should match the number of distinct blocks without temperature data from the first try
ee.FeatureCollection(reduced_with_date_gen_scale1000.first()).size().getInfo()

1290

In [113]:
flattened_gen_scale1000 = reduced_with_date_gen_scale1000.flatten()
# flattened_gen_scale1000.size().getInfo()

Export the temperature data that was previously empty

In [110]:
flattened_gen_scale1000_with_temp = flattened_gen_scale1000.filter(ee.Filter.notNull(['mean_2m_air_temperature', 'maximum_2m_air_temperature', 'minimum_2m_air_temperature']))

In [111]:
task = ee.batch.Export.table.toDrive(**{
  'collection': flattened_gen_scale1000_with_temp,
  'description': 'ERA5 GEN Data for Originally Empty Blocks with Scale 1000 Try 1',
  'folder': 'EarthEngine',
  'selectors': ['BLOCK','FIPS', 'date', 'mean_2m_air_temperature', 'maximum_2m_air_temperature', 'minimum_2m_air_temperature'], 
  'fileFormat': 'CSV'
})
task.start()

In [121]:
flattened_land_null_distinct = flattened_land_null.distinct('.geo')

def reduce_to_region_empty_temp_block_land(image):
    reduced_image = image.reduceRegions(collection=flattened_land_null_distinct, reducer=ee.Reducer.mean(), scale=5000) #spatial reduction
    reduced_image = reduced_image.set('date', image.get('julian_date'))
    return reduced_image

reduced_by_region_land_scale1000 = daily_averages_collection_land.map(reduce_to_region_empty_temp_block_land)
reduced_with_date_land_scale1000 = reduced_by_region_land_scale1000.map(doublemapOuter)
flattened_land_scale1000 = reduced_with_date_land_scale1000.flatten()

In [122]:
flattened_land_scale1000_with_temp = flattened_land_scale1000.filter(ee.Filter.notNull(['temperature_2m',
                                                                                      'temperature_2m_max', 
                                                                                      'temperature_2m_min']))


In [123]:
task = ee.batch.Export.table.toDrive(**{
  'collection': flattened_land_scale1000_with_temp,
  'description': 'ERA5 LAND Data for Originally Empty Blocks with Scale 5000 Try 1',
  'folder': 'EarthEngine',
  'selectors': ['BLOCK','FIPS', 'date', 'temperature_2m', 'temperature_2m_max', 'temperature_2m_min'], 
  'fileFormat': 'CSV'
})
task.start()

## Repeat the above process for other boroughs

Make a master function that does all the above in one step.

Process:
1. Load in the land temperature data
2. Produce 10 year temperature averages for each day in the year (ignoring leap years)
3. Reduce the temperature data according to the blocks in `blocks_fc`
4. Determine if the land temperature data has any missing values for the block.
5. If there are missing values, repeat steps 1 to 3 for the more general temperature data that covers non-land area and has lower resolution.
6. Export data to drive

In [92]:
def full_process(block_fc, export_description):
    '''
    block_fc: a FeatureCollection for some blocks
    
    returns: nothing but will export data to drive
    '''

    ##############
    #### LAND ####
    ##############
    
    # load the temperature data for ERA5 land
    era5_land = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR")
    daily_land_10years = era5_land.filterDate('2010-01-01', '2019-12-31').select('temperature_2m', 
                                                                                 'temperature_2m_min', 
                                                                                 'temperature_2m_max')
    
    # get the 10 year temp averages (temporal reduction)
    JulianDayList = ee.List.sequence(1, 365)

    def julianDayMeansLand(day):
        means = daily_land_10years.filter(ee.Filter.dayOfYear(day, day)).mean()
        ret_means = means.set('julian_date', day)
        return ret_means

    mappedListLand = JulianDayList.map(julianDayMeansLand)
    daily_averages_collection_land = ee.ImageCollection(mappedListLand)
    
    # spatial reduction according to the blocks
    def reduce_to_region(image):
        reduced_image = image.reduceRegions(collection=block_fc, reducer=ee.Reducer.mean(), scale=1000)
        reduced_image = reduced_image.set('date', image.get('julian_date'))
        return reduced_image

    reduced_by_region_land = daily_averages_collection_land.map(reduce_to_region)

    # keep track of date
    def doublemapOuter(fc):
        jd = fc.get('date')
        def doublemapInner(feature):
            return feature.set('date', jd)
        return ee.FeatureCollection(fc).map(doublemapInner)
    
    reduced_by_region_with_date_land = reduced_by_region_land.map(doublemapOuter)

    # flatten
    flattened_land = reduced_by_region_with_date_land.flatten()
    
    # determine which blocks don't have temperature data
    flattened_land_null = flattened_land.filter(ee.Filter.Not(ee.Filter.notNull(['temperature_2m', 'temperature_2m_max', 'temperature_2m_min'])))
    
    # if null temperature values exist, then merge with the more general ERA5 data
    if flattened_land_null.size().neq(ee.Number(0)):
        
        ##############
        #### GEN #####
        ##############

        # load in temperature data for ERA5 daily aggregates
        era5_gen = ee.ImageCollection("ECMWF/ERA5/DAILY")
        daily_gen_10years = era5_gen.filterDate('2010-01-01', '2019-12-31').select('minimum_2m_air_temperature', 
                                                                                   'mean_2m_air_temperature', 
                                                                                   'maximum_2m_air_temperature')
        
        # get the 10 year temp averages (temporal reduction)
        def julianDayMeansGen(day):
            means = daily_gen_10years.filter(ee.Filter.dayOfYear(day, day)).mean() 
            ret_means = means.set('julian_date', day)
            return ret_means

        mappedListGen = JulianDayList.map(julianDayMeansGen)
        daily_averages_collection_gen = ee.ImageCollection(mappedListGen)

        # spatial reduction according to the empty temp data blocks
        empty_block_fc = flattened_land_null.distinct('FIPS')
        
        def reduce_to_empty_regions(image):
            reduced_image = image.reduceRegions(collection=empty_block_fc, reducer=ee.Reducer.mean(), scale=1000)
            reduced_image = reduced_image.set('date', image.get('julian_date'))
            return reduced_image
        
        reduced_by_region_gen = daily_averages_collection_gen.map(reduce_to_empty_regions)
        
        # keep track of date
        reduced_by_region_with_date_gen = reduced_by_region_gen.map(doublemapOuter)
        
        # flatten
        flattened_gen = reduced_by_region_with_date_gen.flatten()
        
        
        # note: there are two possible problems with merging
        # 1. the 'FIPS' property for the gen data has a leading zero where the land data does not
        # 2. the names of the temperature properties do not match between the land and the gen data
        
        # combine land and gen
        flattened_land_non_empty = flattened_land.filter(ee.Filter.notNull(['temperature_2m', 'temperature_2m_max', 'temperature_2m_min']))
        merged = flattened_land_non_empty.merge(flattened_gen)
        print('exporting merged')
        
        # export merged data
        task = ee.batch.Export.table.toDrive(**{
          'collection': merged,
          'description': export_description,
          'folder': 'EarthEngine',
          'selectors': ['BLOCK','FIPS', 'date', 'mean_2m_air_temperature', 'maximum_2m_air_temperature', 'minimum_2m_air_temperature', 'temperature_2m', 'temperature_2m_max', 'temperature_2m_min'], 
          'fileFormat': 'CSV'
        })
        task.start()
        return
    # if the land data gets everything, just export it
    print('exporting just land')
    task = ee.batch.Export.table.toDrive(**{
      'collection': flattened_land,
      'description': export_description,
      'folder': 'EarthEngine',
      'selectors': ['BLOCK','FIPS', 'date', 'temperature_2m', 'temperature_2m_max', 'temperature_2m_min'], 
      'fileFormat': 'CSV'
    })
    task.start()


In [165]:
smallest_blocks1 = blocks_nonzero[(blocks_nonzero['BOROUGH'] == '016') | 
               (blocks_nonzero['BOROUGH'] == '230') | 
               (blocks_nonzero['BOROUGH'] == '195') |
               (blocks_nonzero['BOROUGH'] == '013') |
               (blocks_nonzero['BOROUGH'] == '275')]

smallest_blocks1_fc = geemap.geopandas_to_ee(smallest_blocks1)

full_process(smallest_blocks1_fc, "export for smallest boroughs 016 230 195 013 and 275 Try 1")

exporting merged


In [166]:
smallest_blocks2 = blocks_nonzero[(blocks_nonzero['BOROUGH'] == '282') | 
               (blocks_nonzero['BOROUGH'] == '105') | 
               (blocks_nonzero['BOROUGH'] == '100') |
               (blocks_nonzero['BOROUGH'] == '164') |
               (blocks_nonzero['BOROUGH'] == '060')]

smallest_blocks2_fc = geemap.geopandas_to_ee(smallest_blocks2)

full_process(smallest_blocks2_fc, "export for smallest boroughs 282 105 100 164 060 Try 1")

exporting merged


In [168]:
blocks185 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '185']

blocks185_fc = geemap.geopandas_to_ee(blocks185)
full_process(blocks185_fc, "export for borough 185 Try 1")

exporting merged


In [169]:
blocks150 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '150']

blocks150_fc = geemap.geopandas_to_ee(blocks150)
full_process(blocks150_fc, "export for borough 150 Try 1")

exporting merged


In [170]:
blocks198 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '198']

blocks198_fc = geemap.geopandas_to_ee(blocks198)
full_process(blocks198_fc, "export for borough 198 Try 1")

exporting merged


In [171]:
blocks110 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '110']

blocks110_fc = geemap.geopandas_to_ee(blocks110)
full_process(blocks110_fc, "export for borough 110 Try 1")

exporting merged


In [172]:
blocks188 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '188']

blocks188_fc = geemap.geopandas_to_ee(blocks188)
full_process(blocks188_fc, "export for borough 188 Try 1")

exporting merged


In [173]:
blocks063 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '063']

blocks063_fc = geemap.geopandas_to_ee(blocks063)
full_process(blocks063_fc, "export for borough 063 Try 1")

exporting merged


In [174]:
blocks068 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '068']

blocks068_fc = geemap.geopandas_to_ee(blocks068)
full_process(blocks068_fc, "export for borough 068 Try 1")

exporting merged


In [175]:
blocks220 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '220']

blocks220_fc = geemap.geopandas_to_ee(blocks220)
full_process(blocks220_fc, "export for borough 220 Try 1")

exporting merged


In [215]:
blocks290_1 = blocks290[blocks290['TOTALPOP'] < 8]

blocks290_1_fc = geemap.geopandas_to_ee(blocks290_1)
full_process(blocks290_1_fc, "export for borough 290 part 1 Try 1")

exporting merged


In [216]:
blocks290_2 = blocks290[blocks290['TOTALPOP'] >= 8]

blocks290_2_fc = geemap.geopandas_to_ee(blocks290_2)
full_process(blocks290_2_fc, "export for borough 290 part 2 Try 1")

exporting merged


In [181]:
blocks180 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '180']

blocks180_fc = geemap.geopandas_to_ee(blocks180)
full_process(blocks180_fc, "export for borough 180 Try 1")

exporting merged


In [182]:
blocks240 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '240']

blocks240_fc = geemap.geopandas_to_ee(blocks240)
full_process(blocks240_fc, "export for borough 240 Try 1")

exporting merged


In [183]:
blocks070 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '070']

blocks070_fc = geemap.geopandas_to_ee(blocks070)
full_process(blocks070_fc, "export for borough 070 Try 1")

exporting merged


In [184]:
blocks050 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '050']

blocks050_fc = geemap.geopandas_to_ee(blocks050)
full_process(blocks068_fc, "export for borough 050 Try 1")

exporting merged


In [185]:
blocks130 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '130']

blocks130_fc = geemap.geopandas_to_ee(blocks130)
full_process(blocks130_fc, "export for borough 130 Try 1")

exporting merged


In [186]:
blocks066 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '066']

blocks066_fc = geemap.geopandas_to_ee(blocks066)
full_process(blocks066_fc, "export for borough 066 Try 1")

exporting merged


In [187]:
blocks158 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '158']

blocks158_fc = geemap.geopandas_to_ee(blocks158)
full_process(blocks158_fc, "export for borough 158 Try 1")

exporting merged


In [222]:
blocks122_1 = blocks122[blocks['TOTALPOP'] < 18]

blocks122_1_fc = geemap.geopandas_to_ee(blocks122_1)
full_process(blocks122_1_fc, "export for borough 122 part 1 Try 1")

exporting merged


In [224]:
blocks122_2 = blocks122[blocks['TOTALPOP'] >= 18]

blocks122_2_fc = geemap.geopandas_to_ee(blocks122_2)
full_process(blocks122_2_fc, "export for borough 122 part 2 Try 1")

exporting merged


In [180]:
blocks020 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '020']

blocks020_fc = geemap.geopandas_to_ee(blocks020)
full_process(blocks020_fc, "export for borough 020 Try 1")

exporting merged


In [226]:
blocks170.shape

(1659, 9)

In [228]:
blocks170_1 = blocks170[blocks170['TOTALPOP'] < 30]

blocks170_1_fc = geemap.geopandas_to_ee(blocks170_1)
full_process(blocks170_1_fc, "export for borough 170 part 1 Try 1")

exporting merged


In [229]:
blocks170_2 = blocks170[blocks170['TOTALPOP'] >= 30]

blocks170_2_fc = geemap.geopandas_to_ee(blocks170_2)
full_process(blocks170_2_fc, "export for borough 170 part 2 Try 1")

exporting merged


In [280]:
blocks090 = blocks_nonzero[blocks_nonzero['BOROUGH'] == '090']

blocks090_fc = geemap.geopandas_to_ee(blocks090)
full_process(blocks090_fc, "export for borough 090 Try 1")

exporting merged


## Put the extracted temperature Data into one file

Problems to address:
1. Land and general temperature data refer to the temperature bands differently; need to combine the two different columns
2. Many different CSV export files need to be combined
3. There are still some empty temperature values :(

### Start by looking at just one of the exported temperature files

In [239]:
smallest_boroughs1_temp = pd.read_csv('../Data/smallest_boroughs_016_230_195_013_and_275.csv')
# address problem 1 for this CSV
mean_temp = smallest_boroughs1_temp['temperature_2m'].fillna(smallest_boroughs1_temp['mean_2m_air_temperature'])
min_temp = smallest_boroughs1_temp['temperature_2m_min'].fillna(smallest_boroughs1_temp['minimum_2m_air_temperature'])
max_temp = smallest_boroughs1_temp['temperature_2m_max'].fillna(smallest_boroughs1_temp['maximum_2m_air_temperature'])

# format a dataframe
smallest_boroughs1_formatted = smallest_boroughs1_temp[['BLOCK', "FIPS", 'date']]

smallest_boroughs1_formatted['min_temp'] = min_temp
smallest_boroughs1_formatted['mean_temp'] = mean_temp
smallest_boroughs1_formatted['max_temp'] = max_temp

smallest_boroughs1_formatted.head()

Unnamed: 0,BLOCK,FIPS,date,min_temp,mean_temp,max_temp
0,1061,22300001001061,1.0,263.855821,265.55679,267.09093
1,1058,22300001001058,1.0,263.855821,265.55679,267.09093
2,4102,21950002004102,1.0,269.532498,271.327224,273.511868
3,4193,21950002004193,1.0,270.038328,271.782951,274.020266
4,1134,22750003001134,1.0,271.091117,272.701396,274.896804


Check to see if there are still any empty temperature cells

In [253]:
smallest_boroughs1_empty = smallest_boroughs1_formatted[smallest_boroughs1_formatted['min_temp'].isnull()]
len(smallest_boroughs1_empty['FIPS'].unique())

23

There are empty temperature values so pull out the ones that don't have empty values and check that sizes match up (i.e. a day-block combination is missing all three temp values)

In [266]:
smallest_boroughs1_formatted_temp = smallest_boroughs1_formatted.dropna()

assert(smallest_boroughs1_empty.shape[0] + smallest_boroughs1_formatted_temp.shape[0] == smallest_boroughs1_temp.shape[0])

### Whole process

In [46]:
export_dir = "../DSSG2023-Heating-Loads-Data/Data/temp_export_raw/"

file_list = []
for root, dirs, files in os.walk(export_dir):
    for filename in files:
        if filename.lower().endswith(('.csv')):
            file_list.append(export_dir + filename)
print(len(file_list))

28


In [5]:
def process_exports(file_list):
    
    temp_df = pd.DataFrame()
    empty_df = pd.DataFrame()
    
    total_empty = 0
    
    for f in file_list:
        df = pd.read_csv(f)
        
        mean_temp = df['temperature_2m'].fillna(df['mean_2m_air_temperature'])
        min_temp = df['temperature_2m_min'].fillna(df['minimum_2m_air_temperature'])
        max_temp = df['temperature_2m_max'].fillna(df['maximum_2m_air_temperature'])
        
        df_formatted = df[['BLOCK', "FIPS", 'date']]

        df_formatted['min_temp'] = min_temp
        df_formatted['mean_temp'] = mean_temp
        df_formatted['max_temp'] = max_temp
        
        empty = df_formatted[df_formatted['min_temp'].isnull()]
        total_empty += empty.shape[0] / 365
        
        full = df_formatted.dropna()
        
        #assert(empty.shape[0] + full.shape[0] == df.shape[0])
        
        temp_df = pd.concat([temp_df, full])
        empty_df = pd.concat([empty_df, empty])
        
    print('total empty', total_empty)
    return temp_df, empty_df

temp, empty = process_exports(file_list)

total empty 1044.0


In [6]:
temp.head()

Unnamed: 0,BLOCK,FIPS,date,min_temp,mean_temp,max_temp
0,1245.0,20700001001245,1.0,266.462904,269.285745,271.749924
1,1001.0,20700002001001,1.0,266.558705,269.145364,271.637401
2,1018.0,20700002001018,1.0,266.973592,269.518289,272.001239
3,1251.0,20700001001251,1.0,266.462904,269.285745,271.749924
4,1015.0,20700002001015,1.0,266.973592,269.518289,272.001239


In [7]:
empty.head()

Unnamed: 0,BLOCK,FIPS,date,min_temp,mean_temp,max_temp
35770,1249.0,20700001001249,1.0,,,
35771,1213.0,20700001001213,1.0,,,
35772,1249.0,20700001001249,2.0,,,
35773,1213.0,20700001001213,2.0,,,
35774,1249.0,20700001001249,3.0,,,


In [11]:
print('there are', len(temp['FIPS'].unique()), 'blocks with temperature data and', len(set(empty['FIPS'].unique()).difference(set(temp['FIPS'].unique()))), 'blocks without')


there are 11165 blocks with temperature data and 300 blocks without


In [12]:
empty['FIPS'] = '0' + empty['FIPS'].astype('string') 

Join the `empty` dataframe with the `blocks_nonzero` geopandas dataframe to regain the `geometry` column

In [22]:
empty_with_geo = pd.merge(
    blocks_nonzero,
    empty,
    how="inner",
    on='FIPS'
).drop(['BLOCK_x', 'date'], axis=1).drop_duplicates()

print(empty_with_geo.shape[0])

796


In [23]:
empty_with_geo.head()

Unnamed: 0,NAME,STATE,BOROUGH,TRACT,FIPS,TOTALPOP,HOUSEUNITS,geometry,BLOCK_y,min_temp,mean_temp,max_temp
0,Block 3050,2,110,500,21100005003050,21.0,5.0,"POLYGON ((1127807.264 1089892.478, 1127854.373...",3050.0,,,
730,Block 3050,2,110,500,21100005003050,21.0,5.0,"POLYGON ((1127807.264 1089892.478, 1127854.373...",,,,
1095,Block 2048,2,90,600,20900006002048,17.0,9.0,"POLYGON ((294995.554 1669006.359, 294993.710 1...",2048.0,,,
1460,Block 2048,2,90,600,20900006002048,17.0,9.0,"POLYGON ((294995.554 1669006.359, 294993.710 1...",,,,
2190,Block 1070,2,90,980000,20909800001070,11.0,4.0,"POLYGON ((298850.160 1666140.734, 299144.306 1...",1070.0,,,


Make a feature collection, simplify the geometries, and repeat the process of exporting

In [343]:
empty_fc = geemap.geopandas_to_ee(empty_with_geo)

In [96]:
def simplify_map(feature):
    return feature.simplify(1000)

def simplify_map2500(feature):
    return feature.simplify(2500)

def simplify_map5000(feature):
    return feature.simplify(5000)

In [346]:
simple_empty_fc = empty_fc.map(simplify_map)

In [347]:
full_process(simple_empty_fc, 'export previously empty features Try 1')

exporting merged


In [25]:
prev_empty = pd.read_csv('../DSSG2023-Heating-Loads-Data/Data/temp_export_raw/previously_empty_features1.csv')

In [26]:
mean_temp = prev_empty['temperature_2m'].fillna(prev_empty['mean_2m_air_temperature'])
min_temp = prev_empty['temperature_2m_min'].fillna(prev_empty['minimum_2m_air_temperature'])
max_temp = prev_empty['temperature_2m_max'].fillna(prev_empty['maximum_2m_air_temperature'])

# format a dataframe
prev_empty_formatted = prev_empty[['BLOCK', "FIPS", 'date']]

prev_empty_formatted['min_temp'] = min_temp
prev_empty_formatted['mean_temp'] = mean_temp
prev_empty_formatted['max_temp'] = max_temp

prev_empty_formatted.head()

Unnamed: 0,BLOCK,FIPS,date,min_temp,mean_temp,max_temp
0,,20200002061019,1.0,258.07323,260.929655,263.593041
1,,20200010002035,1.0,266.794147,268.383954,270.239012
2,,20900003003016,1.0,258.07323,260.929655,263.593041
3,,20900002002021,1.0,258.07323,260.929655,263.593041
4,,20900002002025,1.0,258.07323,260.929655,263.593041


In [28]:
# check to see if there's still some empty blocks
prev_empty_formatted[prev_empty_formatted['min_temp'].isnull()].shape[0] / 365

248.0

Erin said to just try again with the same ones that didn't come out in case it was a backend thing.

In [29]:
really_empty = prev_empty_formatted[prev_empty_formatted['min_temp'].isnull()]

In [30]:
really_empty = really_empty.drop(['BLOCK', 'date'], axis = 1).drop_duplicates()
really_empty['FIPS'] = '0' + really_empty['FIPS'].astype('string') 

In [31]:
really_empty_with_geo = pd.merge(
    blocks_nonzero,
    really_empty,
    how="inner",
    on='FIPS'
)

print(really_empty_with_geo.shape[0])

248


In [370]:
really_empty_fc = geemap.geopandas_to_ee(really_empty_with_geo)

In [371]:
full_process(really_empty_fc, 'export previously empty features for 248 blocks Try 2')

exporting merged


In [33]:
empty_export2 = pd.read_csv('../DSSG2023-Heating-Loads-Data/Data/temp_export_raw/previously_empty_features2.csv')

In [34]:
mean_temp = empty_export2['temperature_2m'].fillna(empty_export2['mean_2m_air_temperature'])
min_temp = empty_export2['temperature_2m_min'].fillna(empty_export2['minimum_2m_air_temperature'])
max_temp = empty_export2['temperature_2m_max'].fillna(empty_export2['maximum_2m_air_temperature'])

# format a dataframe
empty_export2_formatted = empty_export2[['BLOCK', "FIPS", 'date']]

empty_export2_formatted['min_temp'] = min_temp
empty_export2_formatted['mean_temp'] = mean_temp
empty_export2_formatted['max_temp'] = max_temp

empty_export2_formatted.shape

(90520, 6)

In [35]:
empty_export2_formatted[empty_export2_formatted['min_temp'].isnull()].shape[0] / 365

248.0

But again, all previously empty blocks came out empty again. **Now try the centroid approach.**

In [40]:
really_empty_with_geo['centroid'] = really_empty_with_geo['geometry'].centroid
really_empty_centroid = really_empty_with_geo.drop(['geometry'], axis = 1)
really_empty_centroid['geometry'] = really_empty_centroid['centroid']
really_empty_centroid = really_empty_centroid.drop('centroid', axis = 1)

In [41]:
really_empty_centroid.shape

(248, 12)

In [None]:
centroid_fc = geemap.geopandas_to_ee(really_empty_centroid)

In [400]:
full_process(centroid_fc, 'export previously empty features with centroids try 1')

exporting merged


In [42]:
centroids = pd.read_csv('../DSSG2023-Heating-Loads-Data/Data/temp_export_raw/centroids.csv')

In [43]:
mean_temp = centroids['temperature_2m'].fillna(centroids['mean_2m_air_temperature'])
min_temp = centroids['temperature_2m_min'].fillna(centroids['minimum_2m_air_temperature'])
max_temp = centroids['temperature_2m_max'].fillna(centroids['maximum_2m_air_temperature'])

# format a dataframe
centroids_formatted = centroids[['BLOCK', "FIPS", 'date']]

centroids_formatted['min_temp'] = min_temp
centroids_formatted['mean_temp'] = mean_temp
centroids_formatted['max_temp'] = max_temp

print('with centroids, there are', centroids_formatted[centroids_formatted['min_temp'].isnull()].shape[0], 'empty temperature values still')

with centroids, there are 0 empty temperature values still


In [44]:
centroids_formatted.head()

Unnamed: 0,BLOCK,FIPS,date,min_temp,mean_temp,max_temp
0,3050,21100005003050,1.0,267.862251,269.377729,270.988893
1,2048,20900006002048,1.0,255.829561,260.244257,263.99305
2,1070,20909800001070,1.0,256.018942,260.589024,264.7845
3,2045,20900006002045,1.0,255.829561,260.244257,263.99305
4,5002,20200008015002,1.0,266.288834,267.837755,269.563901


In [290]:
def process_exports_full(file_list):
    
    temp_df = pd.DataFrame(columns=['FIPS', 'date', 'min_temp', 'mean_temp', 'max_temp'])
    total_empty = set()
    
    for f in file_list:
        df = pd.read_csv(f)
        
        mean_temp = df['temperature_2m'].fillna(df['mean_2m_air_temperature'])
        min_temp = df['temperature_2m_min'].fillna(df['minimum_2m_air_temperature'])
        max_temp = df['temperature_2m_max'].fillna(df['maximum_2m_air_temperature'])
        
        df_formatted = df[["FIPS", 'date']]

        df_formatted['min_temp'] = min_temp
        df_formatted['mean_temp'] = mean_temp
        df_formatted['max_temp'] = max_temp
        
        empty = df_formatted[df_formatted['min_temp'].isnull()]
        
        total_empty.update(empty['FIPS'].unique())
        
        full = df_formatted.dropna()
        full_unique = full.loc[~full['FIPS'].isin(temp_df['FIPS'])]
        
        temp_df = pd.concat([temp_df, full_unique])
        
    return temp_df, total_empty

The above doesn't check if the empty temperature data has actually already been processed (thanks to `centroids.csv` and `previously_empty_features1.csv` and `previously_empty_features2.csv`). Do so now:

In [55]:
emptyFIPS = set(empty['FIPS'].unique()) 
tempFIPS = set(temp['FIPS'].unique())
exportFIPS = emptyFIPS.union(tempFIPS)
trulyEmptyFIPS = emptyFIPS.difference(tempFIPS)

print("the number of blocks in both empty and temp is", len(emptyFIPS.intersection(tempFIPS)), "(intersection).")
print("the exports are for a total of", len(exportFIPS), "blocks regardless of if temperature data was grabbed or not (union).")
print("the number of blocks we have temperature data for is", len(tempFIPS), ".")
print("the number of blocks still with empty data is", len(trulyEmptyFIPS), "(difference).")

the number of blocks in both empty and temp is 248 (intersection).
the exports are for a total of 11465 blocks regardless of if temperature data was grabbed or not (union).
the number of blocks we have temperature data for is 11165 .
the number of blocks still with empty data is 300 (difference).


PROBLEM: the union should have the same number of FIPS as is `blocks_nonzero`

In [534]:
len(blocks_nonzero['FIPS'].unique())

11765

In [71]:
allFIPS = set(blocks_nonzero['FIPS'].astype('int'))
FIPS_not_in_export = allFIPS.difference(exportFIPS)
len(FIPS_not_in_export)

300

Get all blocks with missing data and try another export

In [73]:
missingFIPS = FIPS_not_in_export.union(trulyEmptyFIPS)
len(missingFIPS)

600

In [82]:
missingFIPSseries = pd.Series(list(missingFIPS), name = 'FIPS')

In [86]:
blocks_nonzero['FIPS'] = blocks_nonzero['FIPS'].astype(int)

In [90]:
missing_blocks = pd.merge(blocks_nonzero, missingFIPSseries)
missing_blocks.head()

Unnamed: 0,NAME,STATE,BOROUGH,TRACT,BLOCK,FIPS,TOTALPOP,HOUSEUNITS,geometry
0,Block 1019,2,20,206,1019,20200002061019,45.0,21.0,"POLYGON ((238355.674 1266926.621, 238351.617 1..."
1,Block 2035,2,20,1000,2035,20200010002035,69.0,47.0,"POLYGON ((220636.702 1254570.568, 220632.594 1..."
2,Block 1027,2,50,100,1027,20500001001027,2.0,0.0,"POLYGON ((-606288.486 1226249.377, -604697.422..."
3,Block 3016,2,90,300,3016,20900003003016,37.0,15.0,"POLYGON ((297519.986 1666335.393, 297535.672 1..."
4,Block 3149,2,50,100,3149,20500001003149,56.0,9.0,"POLYGON ((-389675.008 1235440.223, -389642.063..."


In [94]:
missing_blocks_fc = geemap.geopandas_to_ee(missing_blocks)

type(missing_blocks_fc), missing_blocks_fc.size().getInfo()

(ee.featurecollection.FeatureCollection, 600)

In [97]:
missing_fc_simplified = missing_blocks_fc.map(simplify_map)

In [99]:
full_process(missing_fc_simplified, "600 missing blocks Try 1")

exporting merged


Of those, there are still 80 missing values...

In [163]:
missing600 = pd.read_csv('../DSSG2023-Heating-Loads-Data/Data/temp_export_raw/600_missing_blocks.csv')
missing600.shape[0] / 365

600.0

In [131]:
mean_temp = missing600['temperature_2m'].fillna(missing600['mean_2m_air_temperature'])
min_temp = missing600['temperature_2m_min'].fillna(missing600['minimum_2m_air_temperature'])
max_temp = missing600['temperature_2m_max'].fillna(missing600['maximum_2m_air_temperature'])

missing600_formatted = missing600[['BLOCK', "FIPS", 'date']]

missing600_formatted['min_temp'] = min_temp
missing600_formatted['mean_temp'] = mean_temp
missing600_formatted['max_temp'] = max_temp

empty600 = missing600_formatted[missing600_formatted['min_temp'].isnull()]
len(empty600['FIPS'].unique())

80

In [141]:
empty600FIPS = empty600.drop('date', axis = 1).drop_duplicates()

empty600_with_geo = pd.merge(
    blocks_nonzero,
    empty600FIPS,
    how="inner",
    on='FIPS'
)

print(empty600_with_geo.shape[0])

80


Look at the areas for the remaining empty to determine if centroids is appropriate

In [143]:
empty600area = empty600_with_geo['geometry'].area
print(empty600area.min(), empty600area.median(), empty600area.max())

2643.267376923572 17006.94036563371 606924.3826917891


In [145]:
empty600_fc = geemap.geopandas_to_ee(empty600_with_geo)

In [147]:
empty600_fc.size().getInfo()

80

In [148]:
full_process(empty600_fc, "final 80 blocks")

exporting merged


In [161]:
df = pd.read_csv("../DSSG2023-Heating-Loads-Data/Data/temp_export_raw/last_80_blocks.csv")

mean_temp = df['temperature_2m'].fillna(df['mean_2m_air_temperature'])
min_temp = df['temperature_2m_min'].fillna(df['minimum_2m_air_temperature'])
max_temp = df['temperature_2m_max'].fillna(df['maximum_2m_air_temperature'])

df_formatted = df[['BLOCK', "FIPS", 'date']]

df_formatted['min_temp'] = min_temp
df_formatted['mean_temp'] = mean_temp
df_formatted['max_temp'] = max_temp

empty = df_formatted[df_formatted['min_temp'].isnull()]
len(empty['FIPS'].unique())

6

Check if everything has exported. We ended up with some missing still... rinse and repeat

With Maddie's approval, export centroids for all blocks with area < 10,000 m and increase the error tolerated for all others

In [175]:
reallyEmptyFIPS2 = list(trulyEmptyFIPS)

reallyEmptyBlocks2 = blocks_nonzero.loc[blocks_nonzero['FIPS'].isin(reallyEmptyFIPS2)]
reallyEmptyBlocks2_fc = geemap.geopandas_to_ee(reallyEmptyBlocks2)
reallyEmptyBlocks2_fc_simple = reallyEmptyBlocks2_fc.map(simplify_map)
full_process(reallyEmptyBlocks2_fc_simple, "80 missing blocks Try 1")

exporting merged


In [209]:
reallyEmptyBlocks2['area'] = reallyEmptyBlocks2['geometry'].area
reallyEmptyBlocks2.head()

Unnamed: 0,NAME,STATE,BOROUGH,TRACT,BLOCK,FIPS,TOTALPOP,HOUSEUNITS,geometry,area
405,Block 3149,2,50,100,3149,20500001003149,56.0,9.0,"POLYGON ((-389675.008 1235440.223, -389642.063...",24234.919081
902,Block 1169,2,50,300,1169,20500003001169,5.0,4.0,"POLYGON ((-334080.574 1300186.741, -334063.036...",6173.081423
1796,Block 3060,2,50,100,3060,20500001003060,20.0,4.0,"POLYGON ((-373640.711 1255024.581, -373634.369...",43855.707222
2273,Block 1162,2,50,100,1162,20500001001162,13.0,10.0,"POLYGON ((-665953.223 1216901.416, -665862.180...",9025.848766
2581,Block 1158,2,50,100,1158,20500001001158,87.0,18.0,"POLYGON ((-604704.246 1222337.889, -604689.978...",14361.386888


In [216]:
empty_centroids_ok = reallyEmptyBlocks2[reallyEmptyBlocks2['area'] < 10000]
empty_centroids_ok['centroids'] = empty_centroids_ok['geometry'].centroid
empty_centroids_ok = empty_centroids_ok.drop(['geometry'], axis = 1)
empty_centroids_ok['geometry'] = empty_centroids_ok['centroids']
empty_centroids_ok = empty_centroids_ok.drop(['centroids'], axis = 1)
centroid18_fc = geemap.geopandas_to_ee(empty_centroids_ok)

In [217]:
full_process(centroid18_fc, "export 18 blocks as centroids")

exporting merged


In [218]:
empty_extra_simple = reallyEmptyBlocks2[reallyEmptyBlocks2['area'] >= 10000]
empty_extra_simple_fc = geemap.geopandas_to_ee(empty_extra_simple)

empty_extra_simple_fc_2500 = empty_extra_simple_fc.map(simplify_map2500)

In [219]:
full_process(empty_extra_simple_fc_2500, "export blocks with simplify 2500")

exporting merged


In [235]:
temp, empty = process_exports_full(file_list)
tempFIPS = set(temp['FIPS'].unique())
exportFIPS = emptyFIPS.union(tempFIPS)
trulyEmptyFIPS = empty.difference(tempFIPS)

print("the number of blocks in both empty and temp is", len(empty.intersection(tempFIPS)), "(intersection).")
print("the exports are for a total of", len(exportFIPS), "blocks regardless of if temperature data was grabbed or not (union).")
print("the number of blocks we have temperature data for is", len(tempFIPS), ".")
print("the number of blocks still with empty data is", len(trulyEmptyFIPS), "(difference).")

the number of blocks in both empty and temp is 627 (intersection).
the exports are for a total of 11765 blocks regardless of if temperature data was grabbed or not (union).
the number of blocks we have temperature data for is 11764 .
the number of blocks still with empty data is 1 (difference).


In [246]:
final_block = blocks_nonzero[blocks_nonzero['FIPS'] == list(trulyEmptyFIPS)[0]]
final_block_fc = geemap.geopandas_to_ee(final_block)
final_block_fc_simple = final_block_fc.map(simplify_map2500)

The following produces empty results

In [247]:
full_process(final_block_fc_simple, 'final block export Try 2 simplify 2500')

exporting merged


In [280]:
final_block = blocks_nonzero[blocks_nonzero['FIPS'] == list(trulyEmptyFIPS)[0]]
final_block['centroid'] = final_block['geometry'].centroid
final_block_centroid = final_block.drop('geometry', axis=1)
final_block_centroid['geometry'] = final_block_centroid['centroid']
final_block_centroid = final_block_centroid.drop('centroid', axis = 1)
final_block_centroid_fc = geemap.geopandas_to_ee(final_block_centroid)

The following produces results

In [281]:
full_process(final_block_centroid_fc, 'final block export Try 3 centroid')

exporting merged


The following does not produce results

In [294]:
final_block = blocks_nonzero[blocks_nonzero['FIPS'] == 20500001004073]
final_block_fc = geemap.geopandas_to_ee(final_block)

final_block_fc_simple5000 = final_block_fc.map(simplify_map5000)
full_process(final_block_fc_simple5000, "final block export Try 5 simplify 5000")

exporting merged


With the centroids for block 020500001004073 upload, check that we have all the data.

In [295]:
export_dir = "../DSSG2023-Heating-Loads-Data/Data/temp_export_raw/"

file_list = []
for root, dirs, files in os.walk(export_dir):
    for filename in files:
        if filename.lower().endswith(('.csv')):
            file_list.append(export_dir + filename)
print(len(file_list))

32


In [296]:
temp, empty = process_exports_full(file_list)

In [297]:
tempFIPS = set(temp['FIPS'].unique())
exportFIPS = emptyFIPS.union(tempFIPS)
trulyEmptyFIPS = empty.difference(tempFIPS)

print("the number of blocks in both empty and temp is", len(empty.intersection(tempFIPS)), "(intersection).")
print("the exports are for a total of", len(exportFIPS), "blocks regardless of if temperature data was grabbed or not (union).")
print("the number of blocks we have temperature data for is", len(tempFIPS), ".")
print("the number of blocks still with empty data is", len(trulyEmptyFIPS), "(difference).")

the number of blocks in both empty and temp is 628 (intersection).
the exports are for a total of 11765 blocks regardless of if temperature data was grabbed or not (union).
the number of blocks we have temperature data for is 11765 .
the number of blocks still with empty data is 0 (difference).


Export final CSV for all blocks

In [None]:
temp.to_csv('../DSSG2023-Heating-Loads-Data/Data/temp_export_clean/all_temp.csv', index=False)