# Getting started with authentication and package installation

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
!earthengine authenticate
import ee
ee.Initialize()

In [None]:
import subprocess
try:
  import geemap
except ImportError:
  print('geemap package not installed. Installing ...')
  subprocess.check_call(["python", '-m', 'pip', 'install', 'geemap'])

In [None]:
!pip install geopandas

In [None]:
import geemap
import os
import geopandas as gpd
import pandas as pd
import math

# Prepare datasets for quality check

In [None]:
# Import Remote Sensing datasets that can be quality controled and masked with bitmasks
MODISGPP = ee.ImageCollection("MODIS/006/MOD17A2H")
MODISLST = ee.ImageCollection('MODIS/006/MOD11A1')

In [None]:
# Import record dataset for quality control
#siteyear = ee.FeatureCollection("users/username/Moisture/site_year")
sitedate = ee.FeatureCollection("users/username/Moisture/Site_date")

# Define quality control masks


In [None]:
def maskgpp (image):
  modisgppQA = image.select("Psn_QC");
  def extractQABits (qaBand, bitStart, bitEnd):
    numBits = bitEnd - bitStart + 1;
    qaBits = qaBand.rightShift(bitStart).mod(math.pow(2, numBits));
    return qaBits;
  qcMask = extractQABits(modisgppQA, 0, 0).gte(1);
  cloudMask = extractQABits(modisgppQA, 3, 4).gte(1).And(extractQABits(modisgppQA, 3, 4).lt(3));
  qsMask = extractQABits(modisgppQA, 5, 7).gte(2).And(extractQABits(modisgppQA, 5, 7).lt(7));
  mask = qcMask.Or(cloudMask).Or(qsMask);
  return image.updateMask(mask);

In [None]:
def masklst (image):
  modislstQA = image.select("QC_Day");
  def extractQABits (qaBand, bitStart, bitEnd):
    numBits = bitEnd - bitStart + 1;
    qaBits = qaBand.rightShift(bitStart).mod(math.pow(2, numBits));
    return qaBits;
  mask = extractQABits(modislstQA, 0, 1).eq(1); 
  return image.updateMask(mask);

# Create list with low-quality remote sensing covariates

In [None]:
# Define output directory
out_dir = os.path.expanduser('.')
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

In [None]:
for k in range (0, 500000, 5000): # Customize the value based on the total record number; set a reasonable step size for intermediate exports
  gppfail_list = []
  lstfail_list = []
  low = k;
  high = k+5000 # Set the value to the step size
  for j in range (low, high, 5): # Define a step size for processing subsample; smaller value usually corresponds to faster processing but can take more memory space
    lb = j;
    hb = j+5;
    subset = sitedate.filter(ee.Filter.And(ee.Filter.gte('Num', lb),ee.Filter.lt('Num', hb)))
    site_date = geemap.ee_to_geopandas(subset, selectors = ['Num','Site','Water_year','Date'])
    site = geemap.ee_to_geopandas(subset, selectors = ['Site', 'Num', 'ID', 'Network', 'Depth', 'Date', 'Water_day', 'Water_year'])
    site_buffer = site.buffer(0.0008084837557075693617); #90m 
    site_Buffer = gpd.GeoDataFrame(geometry=gpd.GeoSeries(site_buffer),crs="EPSG:4326")
    ROI_Buffer = geemap.geopandas_to_ee(site_Buffer)
    good_list = []
    for i in range(0,subset.size().getInfo(),1):
      modisgpp_test = MODISGPP.filter(ee.Filter.date(ee.Date.parse('MM/dd/YYYY',site_date['Date'].get(i)) \
                               .advance(-8, 'day'), ee.Date.parse('MM/dd/YYYY',site_date['Date'] \
                               .get(i)).advance(8, 'day'))).size().getInfo();
      modislst_test = MODISLST.filter(ee.Filter.date(ee.Date.parse('MM/dd/YYYY',site_date['Date'].get(i)))) \
                               .size().getInfo(); 
      if (modisgpp_test != 0) and (modislst_test != 0):
        good_list.append(i)
    site_date_filtered = site_date[site_date.index.isin(good_list)].reset_index() 
    for i in range(0,len(good_list),1):
      ROI_Buffer = geemap.geopandas_to_ee(site_Buffer.iloc[[i]]);
      modisgppmasked = MODISGPP.filter(ee.Filter.date(ee.Date.parse('MM/dd/YYYY',site_date_filtered['Date'].get(i)) \
                              .advance(-8, 'day'), ee.Date.parse('MM/dd/YYYY',site_date_filtered['Date'].get(i)) \
                              .advance(8, 'day'))).map(maskgpp) \
                              .select("Psn_QC").first().clip(ROI_Buffer.geometry())
      modislstmasked = MODISLST.filter(ee.Filter.date(ee.Date.parse('MM/dd/YYYY',site_date_filtered['Date'].get(i)))) \
                                    .map(masklst).select("QC_Day").first().clip(ROI_Buffer.geometry())
      gppmasked = modisgppmasked.reduceRegion(reducer = ee.Reducer.toList(), scale=90).get('Psn_QC').getInfo();  
      lstmasked = modislstmasked.reduceRegion(reducer = ee.Reducer.toList(), scale=90).get('QC_Day').getInfo();    
      if (len(gppmasked) != 0):
         gppfail_list.append(site_date_filtered['Num'].get(i)) 
      if (len(lstmasked) != 0):
         lstfail_list.append(site_date_filtered['Num'].get(i))
  gppfail_list = pd.DataFrame(gppfail_list)
  gppfail_list.to_csv('gppfail_list.csv')
  lstfail_list = pd.DataFrame(lstfail_list)
  lstfail_list.to_csv('lstfail_list.csv')   
  !gsutil cp gppfail_list.csv gs://bucket/Moisture/Test3/"failgppset_"$k".csv"
  !gsutil cp lstfail_list.csv gs://bucket/Moisture/Test3/"faillstset_"$k".csv"
  !rm *.csv                           