In [3]:
# Setup
!pip install --quiet --upgrade pip

# Install the dependencies.
!pip install --quiet -r requirements.txt

# Restart the runtime by ending the process.
exit()

In [1]:
from __future__ import annotations
import os

import ee
import google.auth

import folium
from serving import data
import json
from branca.element import Figure
from folium import plugins
import sys
from serving.data import get_input_image_ee, SCALE
from utils.constants import  BUCKET, PROJECT
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from importlib import reload
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
from google.cloud import storage
import logging

In [2]:
current_dir = os.getcwd()
sys.path.append(current_dir+'/serving')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
# Google cloud parameters
project = PROJECT
bucket = BUCKET
location = "us-west2-a"  # @param {type:"string"}

# Quick input validations.
assert project, "⚠️ Please provide a Google Cloud project ID"
assert bucket, "⚠️ Please provide a Cloud Storage bucket name"
assert not bucket.startswith(
    "gs://"
), f"⚠️ Please remove the gs:// prefix from the bucket name: {bucket}"
assert location, "⚠️ Please provide a Google Cloud location"

# Set GOOGLE_CLOUD_PROJECT for google.auth.default().
os.environ["GOOGLE_CLOUD_PROJECT"] = project

# Set the gcloud project for other gcloud commands.
!gcloud config set project {project}

# Initialise goofle earth engine
credentials, _ = google.auth.default()
ee.Initialize(
    credentials.with_quota_project(None),
    project=project,
    opt_url="https://earthengine-highvolume.googleapis.com",
)

Updated property [core/project].


In [5]:
#CONSTANTS
YEAR_START=2017
YEAR_END=2023

SEASON_START=5
SEASON_END=10

CROP=1 #Corn

# True vision params
vis_params = {
    "min": 0,
    "max": 3000,
    "bands": ["B4", "B3", "B2"],}

In [5]:
# reload(data)

# Show sample filter

start_year = 2017
start_month = 4
end_month = 7
crop_type = 1 # Corn
county = "Lancaster"

county_geom = (
    ee.FeatureCollection("TIGER/2018/Counties")
    .filter(ee.Filter.eq("NAME", county))
).geometry()

coords = county_geom.centroid().coordinates().getInfo()

s2_img_start = get_input_image_ee(county, crop_type, start_year, start_month)["image"]
s2_img_end = get_input_image_ee(county, crop_type, start_year, end_month)["image"]

In [6]:
# Create the map
m1 = folium.Map(coords[::-1])

# Add the CDL layer
layer_left = folium.TileLayer(
    tiles=s2_img_start.getMapId(vis_params)["tile_fetcher"].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    name='Cropland Data Layer',
    overlay=True,
    control=True
)

layer_right = folium.TileLayer(
    tiles=s2_img_end.getMapId(vis_params)["tile_fetcher"].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    name='Cropland Data Layer',
    overlay=True,
    control=True,
)

sbs = plugins.SideBySideLayers(layer_left=layer_left, layer_right=layer_right)

layer_left.add_to(m1)
layer_right.add_to(m1)
sbs.add_to(m1)    

# Display the map
m1

In [7]:
"""
Changes in crops can be observable in the satellite images. Once can hope that the colour saturation and temporal change would hold predictive power
"""

'\nChanges in crops can be observable in the satellite images. Once can hope that the colour saturation and temporal change would hold predictive power\n'

In [None]:
# DATA RETRIVAL

In [33]:
#BE super careful not to retrive the same images more than once
# Checking for existance is quite expensive
counties = ["Lancaster", "Crawford", "Orleans", "Tulare", "Wharton", "Story", "Canyon", "Kit Carson"]
# years = range(2018,2023)
years = range(2017,2019)
months = [5,7,9]

get_input_img_params = [{"county": county,
                         "crop": CROP,
                        "year": year,
                        "month": month} for county in counties
                                       for year in years
                                       for month in months
                       ]


In [13]:

def check_blob_prefix_exists(bucket_name, prefix):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    
    blobs = bucket.list_blobs(prefix=prefix, max_results=1)
    return any(blobs)

def batch_check_blobs(bucket_name, prefixes):
    with ThreadPoolExecutor(max_workers=10) as executor:
        future_to_prefix = {executor.submit(check_blob_prefix_exists, bucket_name, prefix): prefix for prefix in prefixes}
        results = {}
        for future in as_completed(future_to_prefix):
            prefix = future_to_prefix[future]
            results[prefix] = future.result()
    return results

# Generate all prefixes
bucket_name = bucket
counties = ["Lancaster", "Crawford", "Orleans", "Tulare", "Wharton", "Story", "Canyon", "Kit Carson"]
years = range(2017, 2019)
months = [5, 7, 9]

prefixes = [f'images/{county}_{year}_{month}-{month+1}_{SCALE}' 
            for county in counties
            for year in years
            for month in months]

# Batch check all prefixes
results = batch_check_blobs(bucket_name, prefixes)

# Generate get_input_img_params based on results
get_input_img_params = [
    {"county": county, "crop": CROP, "year": year, "month": month}
    for county in counties
    for year in years
    for month in months
    if not results[f'images/{county}_{year}_{month}-{month+1}_{SCALE}']
]

print(f"Number of items to process: {len(get_input_img_params)}")

Number of items to process: 0


In [11]:
get_input_img_params

[]

In [3]:
def export_img(image:ee.Image, image_name: str, county_geom: ee.geometry):
    
    image_name += f"_{SCALE}_"
    img_task = ee.batch.Export.image.toCloudStorage(
    image=image,
    description=image_name,
    bucket=bucket,
    fileNamePrefix=f"images/{image_name}",
    scale=SCALE,
    region=county_geom,
    fileDimensions = 4*2048,
    skipEmptyTiles = True,
    fileFormat = "GeoTIFF",
    maxPixels = 1e9
    )

    img_task.start()
    logging.info(f"Export task started for: {image_name}")

def apply_get_input_image(params):
    
    return get_input_image_ee(**params)

def unpack_for_export(list_input):
    county = list_input["image_name"].split("_")[0]
    county_geom = (
    ee.FeatureCollection("TIGER/2018/Counties")
    .filter(ee.Filter.eq("NAME", county))
        ).geometry()
    
    return export_img(list_input["image"], list_input["image_name"], county_geom)

In [10]:
with beam.Pipeline() as pipeline:
    (
        pipeline
        |"Create parameter sets" >>  beam.Create(get_input_img_params)
        | "Sample counties" >> beam.Map(apply_get_input_image)
        | "Export image to GCS bucket" >> beam.Map(unpack_for_export)
    )

In [67]:
get_input_img_params

[{'county': 'Lancaster', 'crop': 1, 'year': 2022, 'month': 9},
 {'county': 'Crawford', 'crop': 1, 'year': 2022, 'month': 9},
 {'county': 'Orleans', 'crop': 1, 'year': 2022, 'month': 9},
 {'county': 'Tulare', 'crop': 1, 'year': 2022, 'month': 9}]

In [8]:
from google.cloud import storage
import time
import re

def time_operation(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Operation took {end_time - start_time:.4f} seconds")
        return result
    return wrapper

@time_operation
def list_blobs_with_prefix(bucket_name, prefix):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    
    # This operation is very efficient
    blobs = bucket.list_blobs(prefix=prefix)
    
    for blob in blobs:
        print(blob.name)

# Usage examples
bucket_name = bucket
image_name = 'images/'
# List all files from 2023
image_name += "Orleans"
list_blobs_with_prefix(bucket_name, image_name)

# List all files from March 2023
image_name += "_2017"
list_blobs_with_prefix(bucket_name, image_name)

# List all files from March 15, 2023
image_name += "_9_10_100"
list_blobs_with_prefix(bucket_name, image_name)

images/Orleans_2017_5-6_100_0000000000-0000008192.tif
images/Orleans_2017_5-6_100_0000000000-0000016384.tif
images/Orleans_2017_5-6_100_0000016384-0000000000.tif
images/Orleans_2017_7-8_100_0000000000-0000008192.tif
images/Orleans_2017_7-8_100_0000000000-0000016384.tif
images/Orleans_2017_7-8_100_0000016384-0000000000.tif
images/Orleans_2017_9-10_100_0000000000-0000008192.tif
images/Orleans_2017_9-10_100_0000000000-0000016384.tif
images/Orleans_2017_9-10_100_0000016384-0000000000.tif
images/Orleans_2018_5-6_100_0000000000-0000008192.tif
images/Orleans_2018_5-6_100_0000000000-0000016384.tif
images/Orleans_2018_7-8_100_0000000000-0000008192.tif
images/Orleans_2018_7-8_100_0000000000-0000016384.tif
images/Orleans_2018_9-10_100_0000000000-0000008192.tif
images/Orleans_2018_9-10_100_0000000000-0000016384.tif
Operation took 0.0665 seconds
images/Orleans_2017_5-6_100_0000000000-0000008192.tif
images/Orleans_2017_5-6_100_0000000000-0000016384.tif
images/Orleans_2017_5-6_100_0000016384-00000000