In [1]:
import ee
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [2]:
ee.Authenticate()

True

In [4]:
# Initialize Earth Engine
ee.Initialize()


In [94]:
# Define a small date range in August 2022
start_date = '2022-08-01'
end_date = '2022-08-05'

# Define a smaller sampling region within the Midwest (e.g., around Iowa)
midwest = ee.FeatureCollection("TIGER/2018/States") \
    .filter(ee.Filter.eq('NAME', 'Iowa'))  # Limit to Iowa to reduce complexity

# Load the 2022 Cropland Data Layer (CDL) and clip it to Iowa
cdl = ee.ImageCollection("USDA/NASS/CDL").filterDate('2022-01-01', '2022-12-31').first()
cdl = cdl.select(['cropland']).clip(midwest.geometry())  # Use only cropland

# Filter Sentinel-2 imagery for the specified date range with low cloud cover
sentinel = ee.ImageCollection('COPERNICUS/S2_SR') \
    .filterDate(start_date, end_date) \
    .filterBounds(midwest.geometry()) \
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)) \
    .first()

# Check if a valid image is available
if sentinel is None:
    raise ValueError("No suitable Sentinel-2 image found within the date range.")

# Calculate only NDVI to reduce computational load
ndvi = sentinel.normalizedDifference(['B8', 'B4']).rename('NDVI')

# Mask NDVI with the CDL to focus only on cropland areas
masked_ndvi = ndvi.updateMask(cdl.gt(0))

# Convert CDL cropland areas to polygons with simplified geometries
fields = cdl.reduceToVectors(
    geometry=midwest.geometry(),
    scale=300,  # Use a larger scale to reduce computational load
    geometryType='polygon',
    maxPixels=1e12  # Lower maxPixels to avoid memory issues
)

# Randomly sample 100 fields
random_fields = fields.randomColumn('random').sort('random').limit(100)

# Compute field-level mean NDVI for each field
def compute_field_stats(field):
    stats = masked_ndvi.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=field.geometry(),
        scale=30,
        maxPixels=1e12,  # Adjust maxPixels to avoid memory issues
        bestEffort=True
    )
    return field.set(stats)

# Map the statistics function over the random fields
field_data = random_fields.map(compute_field_stats)

# Convert the EE FeatureCollection to a Pandas DataFrame
def ee_to_pandas(fc):
    """Convert an Earth Engine FeatureCollection to a Pandas DataFrame."""
    features = fc.getInfo()['features']
    data = [{**f['properties'], 'geometry': f['geometry']} for f in features]
    return pd.DataFrame(data)

# Convert the field data to a DataFrame and display it
data = ee_to_pandas(field_data)

In [95]:
data

Unnamed: 0,count,label,random,geometry,NDVI
0,2,141,1.399752e-08,"{'geodesic': False, 'type': 'Polygon', 'coordi...",
1,1,123,3.780494e-06,"{'geodesic': False, 'type': 'Polygon', 'coordi...",
2,2,5,9.724878e-06,"{'geodesic': False, 'type': 'Polygon', 'coordi...",
3,2,5,1.640998e-05,"{'geodesic': False, 'type': 'Polygon', 'coordi...",
4,1,1,1.830877e-05,"{'geodesic': False, 'type': 'Polygon', 'coordi...",
...,...,...,...,...,...
95,1,176,5.747692e-04,"{'geodesic': False, 'type': 'Polygon', 'coordi...",
96,3,36,5.771594e-04,"{'geodesic': False, 'type': 'Polygon', 'coordi...",
97,1,176,5.779030e-04,"{'geodesic': False, 'type': 'Polygon', 'coordi...",
98,1,1,5.794768e-04,"{'geodesic': False, 'type': 'Polygon', 'coordi...",
