<a href="https://colab.research.google.com/github/webb-e/S2_Landsat_Comparison/blob/main/Landsat_lake_wise_export.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Authenticate private account (only required for exporting to drive/gee/gcp)
from google.colab import auth
auth.authenticate_user()

# Earth Engine setup
import ee # Trigger the authentication flow.
ee.Authenticate()
ee.Initialize(project=" ") # Initialize the library.

# Google Drive setup
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
### Load in Landsat products
## special loading of pickens collection since 2021 doesn't have the correct metadata
pickenscollection = ee.ImageCollection("projects/glad/water/annual")
imageList = pickenscollection.toList(pickenscollection.size())
last6Images = imageList.slice(pickenscollection.size().subtract(6), pickenscollection.size());
pickens_collection = ee.ImageCollection(last6Images).map(lambda img: img.rename(['wp']))


### pekel
pekel_collection = ee.ImageCollection("JRC/GSW1_4/YearlyHistory").filter(ee.Filter.date('2016-01-01', '2021-12-31'))

### define region info
lakeshp = None
region_label = None
def regionfun(region):
  global  lakeshp, region_label
  if region == 'TUK':
    lakeshp = ee.FeatureCollection("projects/alpod-412314/assets/Lake_extractions/TUK_extraction")
    region_label = 'TUK'
  elif region == 'AND':
    lakeshp = ee.FeatureCollection("projects/alpod-412314/assets/Lake_extractions/AND_extraction")
    region_label = 'AND'
  elif region == 'MRD':
    lakeshp = ee.FeatureCollection("projects/alpod-412314/assets/Lake_extractions/MRD_extraction")
    region_label = 'MRD'
  elif region == 'AKCP':
    lakeshp = ee.FeatureCollection("projects/alpod-412314/assets/Lake_extractions/AKCP_extraction")
    region_label = 'AKCP'
  elif region == 'YKD':
    lakeshp = ee.FeatureCollection("projects/alpod-412314/assets/Lake_extractions/YKD_extraction")
    region_label = 'YKD'
  elif region == 'YKF':
    lakeshp = ee.FeatureCollection("projects/alpod-412314/assets/Lake_extractions/YKF_extraction")
    region_label = 'YKF'
  else:
    print("Invalid region")

Run this code for all regions except the MRD, which will time out due to memory issues; MRD code is below.

In [None]:
## choose region
regionfun('TUK') # options = TUK,AND, AKCP, YKD, YKF

############
###### PICKENS
############
def pickens_fun(image):
  ## get image year
  pickensyear = ee.String(image.get('system:id'))
  year = pickensyear.split('/').get(4)
  ## unmask and get binary image (masked/notmasked)
  maskedimg = image.unmask(-1).eq(-1).clip(lakeshp)
  ## get total number of pixels in region
  totalimg = image.unmask(-1).gte(-1).clip(lakeshp)
  ## clip to lake shapefile and create a binary image
  waterimg = image.clip(lakeshp).gt(0)
  ## get area of water
  waterarea = waterimg.multiply(ee.Image.pixelArea())
  ## get area of masked
  maskedarea = maskedimg.multiply(ee.Image.pixelArea())
  ## total area of region
  totalarea = totalimg.multiply(ee.Image.pixelArea())
  ## add bands
  areaImage = waterarea.addBands(maskedarea).addBands(totalarea)
  ## sum water, masked, and total areas
  reduceroutput =  areaImage.reduceRegions(collection=  lakeshp,
                                                scale = 30,
                                                reducer = ee.Reducer.sum())
    ## add feature properties to output
  def pickens_dealwithoutput(f):
    waterarea = ee.Number(f.get('wp'))#.divide(1e6).round()
    maskedarea = ee.Number(f.get('wp_1')).round()
    totalarea = ee.Number(f.get('wp_2')).round()
    unmaskedarea = totalarea.subtract(maskedarea)
    permasked = maskedarea.divide(totalarea).multiply(100).round()
    return f.set({"year" : year, "Landsat_Pickens": waterarea, "percent_masked":permasked})

  results = reduceroutput.map(pickens_dealwithoutput)
  return results.filter(ee.Filter.eq('percent_masked', 0))

pickens_intermediate = pickens_collection.map(pickens_fun)
pickens_results = ee.FeatureCollection(pickens_intermediate).flatten()


  #############
  ###### PEKEL
  ############
def pekel_fun(image):
  ## get image year
  pekyear = ee.String(image.get('system:id'))
  year = pekyear.split('/').get(3)
  ## clip to lake shapefile and create a binary image
                                  ## 0 = no observations
                                  ## 1 = not water
                                  ## 2 = seasonal water
                                  ## 3 = permanent water
  clipimage = image.clip(lakeshp)
  waterimg = clipimage.eq(2).Or(clipimage.eq(3))
  ## get values with no observations
  noobs = image.eq(0).clip(lakeshp).multiply(ee.Image.pixelArea())
  ## get total area
  totalarea = image.gte(0).clip(lakeshp).multiply(ee.Image.pixelArea())
  ## get area of water
  waterarea = waterimg.multiply(ee.Image.pixelArea())
  areaImage = waterarea.addBands(noobs).addBands(totalarea)
  reduceroutput =  areaImage.reduceRegions(collection = lakeshp,
                                                scale = 30,
                                                reducer = ee.Reducer.sum())

    ## add feature properties to  output and convert m2 to km2
  def pekeloutput(f):
    waterarea = ee.Number(f.get('waterClass'))#.divide(1e6).round()
    maskedarea = ee.Number(f.get('waterClass_1'))#.divide(1e6).round()
    totalarea = ee.Number(f.get('waterClass_2')).divide(1e6).round()
    unmaskedarea = totalarea.subtract(maskedarea)
    permasked = maskedarea.divide(totalarea).multiply(100).round()
    return f.set({"year" : year ,  "Landsat_Pekel": waterarea,  "percent_masked":permasked})

  results = reduceroutput.map(pekeloutput)
  return results.filter(ee.Filter.eq('percent_masked', 0))

pekel_intermediate = pekel_collection.map(pekel_fun)
pekel_results = ee.FeatureCollection(pekel_intermediate).flatten()

  #############
  ###### PUT TOGETHER AND EXPORT
  ############
allresults = ee.FeatureCollection([pekel_results,  pickens_results]).flatten();

description = 'Landsat_lake_areas_' + str(region_label)
task = ee.batch.Export.table.toDrive(**{
          'collection': allresults,
          'selectors': ['year', 'Landsat_Pekel', 'Landsat_Pickens', 'lake_id'],
          'folder': 'Lakewise_csvs',
          'description': description,
          'fileFormat': 'CSV',
    })
task.start()

# RUN THIS FOR THE MRD
if you use the above code, it will run into memory issues

In [None]:
# Define chunk size
regionfun('MRD')
chunk_size = 1000

def split_feature_collection(fc, chunk_size):
    # Get the total number of features in the collection
    num_features = fc.size().getInfo()

    # Calculate the number of chunks
    num_chunks = (num_features + chunk_size - 1) // chunk_size

    # Create a list of chunks
    chunks = []
    for i in range(num_chunks):
        # Use slice() to get the desired chunk
        chunk = fc.toList(num_features).slice(i * chunk_size, (i + 1) * chunk_size)
        chunks.append(ee.FeatureCollection(chunk))

    return chunks

# Split the FeatureCollection into chunks
chunks = split_feature_collection(lakeshp, chunk_size)

# Print the first chunk (or any chunk you want to inspect)
print(len(chunks))

10


In [None]:
sub_list = chunks[6:8]
sub_list = ee.FeatureCollection(sub_list).flatten()
chunksize = 500

def split_feature_collection(fc, chunk_size):
    # Get the total number of features in the collection
    num_features = fc.size().getInfo()

    # Calculate the number of chunks
    num_chunks = (num_features + chunksize - 1) // chunksize

    # Create a list of chunks
    chunks = []
    for i in range(num_chunks):
        # Use slice() to get the desired chunk
        chunk = fc.toList(num_features).slice(i * chunksize, (i + 1) * chunksize)
        chunks.append(ee.FeatureCollection(chunk))

    return chunks

# Split the FeatureCollection into chunks
chunks2 = split_feature_collection(sub_list, chunksize)


In [None]:
sub_list2 = chunks2[1:3]
sub_list2 = ee.FeatureCollection(sub_list2).flatten()
chunksize2 = 50

def split_feature_collection(fc, chunksize2):
    # Get the total number of features in the collection
    num_features = fc.size().getInfo()

    # Calculate the number of chunks
    num_chunks = (num_features + chunksize2 - 1) // chunksize2

    # Create a list of chunks
    chunks = []
    for i in range(num_chunks):
        # Use slice() to get the desired chunk
        chunk = fc.toList(num_features).slice(i * chunksize2, (i + 1) * chunksize2)
        chunks.append(ee.FeatureCollection(chunk))

    return chunks

# Split the FeatureCollection into chunks
chunks3 = split_feature_collection(sub_list2, chunksize2)

In [None]:
sub_list3 = chunks3[11:13]
sub_list3 = ee.FeatureCollection(sub_list3).flatten()
chunksize3 = 10

def split_feature_collection(fc, chunksize3):
    # Get the total number of features in the collection
    num_features = fc.size().getInfo()

    # Calculate the number of chunks
    num_chunks = (num_features + chunksize3 - 1) // chunksize3

    # Create a list of chunks
    chunks = []
    for i in range(num_chunks):
        # Use slice() to get the desired chunk
        chunk = fc.toList(num_features).slice(i * chunksize3, (i + 1) * chunksize3)
        chunks.append(ee.FeatureCollection(chunk))

    return chunks

# Split the FeatureCollection into chunks
chunks4 = split_feature_collection(sub_list3, chunksize3)

In [None]:
sub_list4 = chunks4[6:7]
sub_list4 = ee.FeatureCollection(sub_list4).flatten()
chunksize4 = 1

def split_feature_collection(fc, chunksize4):
    # Get the total number of features in the collection
    num_features = fc.size().getInfo()

    # Calculate the number of chunks
    num_chunks = (num_features + chunksize4 - 1) // chunksize4

    # Create a list of chunks
    chunks = []
    for i in range(num_chunks):
        # Use slice() to get the desired chunk
        chunk = fc.toList(num_features).slice(i * chunksize4, (i + 1) * chunksize4)
        chunks.append(ee.FeatureCollection(chunk))

    return chunks

# Split the FeatureCollection into chunks
chunks5 = split_feature_collection(sub_list4, chunksize4)

Run once for each chunk (chunks, chunks2, chunks3, chunks4, chunks5)

In [None]:
## choose region
#regionfun('MRD') # options = TUK, MRD, AND, AKCP, YKD, YKF

for index, lakeshp in enumerate(chunks5):

  #############
  ###### PICKENS
  ############
  def pickens_fun(image):
  ## get image year
    pickensyear = ee.String(image.get('system:id'))
    year = pickensyear.split('/').get(4)
  ## unmask and get binary image (masked/notmasked)
    maskedimg = image.unmask(-1).eq(-1).clip(lakeshp)
  ## get total number of pixels in region
    totalimg = image.unmask(-1).gte(-1).clip(lakeshp)
  ## clip to lake shapefile and create a binary image
    waterimg = image.clip(lakeshp).gt(0)
  ## get area of water
    waterarea = waterimg.multiply(ee.Image.pixelArea())
  ## get area of masked
    maskedarea = maskedimg.multiply(ee.Image.pixelArea())
  ## total area of region
    totalarea = totalimg.multiply(ee.Image.pixelArea())
  ## add bands
    areaImage = waterarea.addBands(maskedarea).addBands(totalarea)
  ## sum water, masked, and total areas
    reduceroutput =  areaImage.reduceRegions(collection=  lakeshp,
                                                scale = 30,
                                                reducer = ee.Reducer.sum())
  ## add feature properties to output
    def pickens_dealwithoutput(f):
      waterarea = ee.Number(f.get('wp'))#.divide(1e6).round()
      maskedarea = ee.Number(f.get('wp_1')).round()
      totalarea = ee.Number(f.get('wp_2')).round()
      unmaskedarea = totalarea.subtract(maskedarea)
      permasked = maskedarea.divide(totalarea).multiply(100).round()
      return f.set({"year" : year, "Landsat_Pickens": waterarea, "percent_masked":permasked})

    results = reduceroutput.map(pickens_dealwithoutput)
    return results.filter(ee.Filter.eq('percent_masked', 0))

  pickens_intermediate = pickens_collection.map(pickens_fun)
  pickens_results = ee.FeatureCollection(pickens_intermediate).flatten()

  #############
  ###### PEKEL
  ############
  def pekel_fun(image):
  ## get image year
    pekyear = ee.String(image.get('system:id'))
    year = pekyear.split('/').get(3)
  ## clip to lake shapefile and create a binary image
                                  ## 0 = no observations
                                  ## 1 = not water
                                  ## 2 = seasonal water
                                  ## 3 = permanent water
    clipimage = image.clip(lakeshp)
    waterimg = clipimage.eq(2).Or(clipimage.eq(3))
  ## get values with no observations
    noobs = image.eq(0).clip(lakeshp).multiply(ee.Image.pixelArea())
  ## get total area
    totalarea = image.gte(0).clip(lakeshp).multiply(ee.Image.pixelArea())
  ## get area of water
    waterarea = waterimg.multiply(ee.Image.pixelArea())
    areaImage = waterarea.addBands(noobs).addBands(totalarea)
    reduceroutput =  areaImage.reduceRegions(collection = lakeshp,
                                                scale = 30,
                                                reducer = ee.Reducer.sum())
  ## add feature properties to  output and convert m2 to km2
    def pekeloutput(f):
      waterarea = ee.Number(f.get('waterClass'))#.divide(1e6).round()
      maskedarea = ee.Number(f.get('waterClass_1'))#.divide(1e6).round()
      totalarea = ee.Number(f.get('waterClass_2')).divide(1e6).round()
      unmaskedarea = totalarea.subtract(maskedarea)
      permasked = maskedarea.divide(totalarea).multiply(100).round()
      return f.set({"year" : year ,  "Landsat_Pekel": waterarea,  "percent_masked":permasked})

    results = reduceroutput.map(pekeloutput)
    return results.filter(ee.Filter.eq('percent_masked', 0))

  pekel_intermediate = pekel_collection.map(pekel_fun)
  pekel_results = ee.FeatureCollection(pekel_intermediate).flatten()

  #############
  ###### PUT TOGETHER AND EXPORT
  ############
  allresults = ee.FeatureCollection([pekel_results, pickens_results]).flatten();

  description = 'Landsat_lake_areas_' + str(region_label) + '_chunk5_' + f"{index}"
  task = ee.batch.Export.table.toDrive(**{
          'collection': allresults,
          'selectors': ['year', 'Landsat_Pekel', 'Landsat_Pickens', 'lake_id'],
          'folder': 'Landsat_MRD_chunks',
          'description': description,
          'fileFormat': 'CSV',
    })
  task.start()