In [5]:
import ee
import pandas as pd
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Set display options to show all columns
pd.set_option('display.max_columns', None)

# Define the points of interest (coordinates for which data will be extracted)
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

# Create EE geometry points (server-side objects)
ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer (server-side object)
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)  # 30-meter buffer around the central point

# Setup image and cloud score collections (server-side objects)
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10  # Spatial resolution in meters
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

# Fetch pixel and cloud values from Earth Engine and bring them to the client side
try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Convert the extracted data to pandas DataFrames and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Verify the columns to ensure 'id' is present
print("Pixel Values DataFrame Columns:", pixel_values_df.columns)
print("Cloud Values DataFrame Columns:", cloud_values_df.columns)

# Function to extract sun angles and tile ID from image metadata
def extract_sun_angles(image):
    # Extract the sun zenith and azimuth angles as ee.Number objects
    sun_zenith = ee.Number(image.get('MEAN_SOLAR_ZENITH_ANGLE'))
    sun_azimuth = ee.Number(image.get('MEAN_SOLAR_AZIMUTH_ANGLE'))
    # Extract the tile ID
    tile_id = image.get('system:index')
    # Return an ee.Feature with these properties for standardized handling
    return ee.Feature(None, {
        'system:time_start': image.get('system:time_start'), 
        'sun_zenith': sun_zenith, 
        'sun_azimuth': sun_azimuth,
        'tile_id': tile_id
    })

# Apply the function to each image in the collection and fetch the data to the client side
sun_angles = images.map(extract_sun_angles).getInfo()

# Convert the sun angles information to a pandas DataFrame
sun_angles_df = pd.DataFrame([{
    'tile_id': image['properties']['tile_id'],
    'timestamp': pd.to_datetime(image['properties']['system:time_start'], unit='ms').tz_localize('UTC').tz_convert('Etc/GMT-12').strftime('%Y-%m-%d %H:%M:%S'),
    'sun_zenith': image['properties']['sun_zenith'],
    'sun_azimuth': image['properties']['sun_azimuth']
} for image in sun_angles['features']])

# Verify the sun_angles_df columns
print("Sun Angles DataFrame Columns:", sun_angles_df.columns)

# Function to find the nearest points
def find_nearest_points(gee_points, poi_points):
    tree = cKDTree(poi_points)
    distances, indices = tree.query(gee_points)
    return indices

# Get unique GEE returned points and find the nearest POIs for each GEE returned point
gee_points = list(zip(pixel_values_df['longitude'].unique(), pixel_values_df['latitude'].unique()))
pixel_values_df['poi_index'] = find_nearest_points(list(zip(pixel_values_df['longitude'], pixel_values_df['latitude'])), points_of_interest)
cloud_values_df['poi_index'] = find_nearest_points(list(zip(cloud_values_df['longitude'], cloud_values_df['latitude'])), points_of_interest)

# Extract tile IDs for pixel values and cloud values
pixel_values_df['tile_id'] = pixel_values_df['id'].apply(lambda x: x.split('_')[0])
cloud_values_df['tile_id'] = cloud_values_df['id'].apply(lambda x: x.split('_')[0])

# Verify the addition of the tile_id column
print("Pixel Values DataFrame with Tile ID Columns:", pixel_values_df.columns)
print("Cloud Values DataFrame with Tile ID Columns:", cloud_values_df.columns)

# Print sample rows to inspect the tile_id values
print("Sample rows from Pixel Values DataFrame:")
print(pixel_values_df[['id', 'tile_id']].head())
print("Sample rows from Cloud Values DataFrame:")
print(cloud_values_df[['id', 'tile_id']].head())
print("Sample rows from Sun Angles DataFrame:")
print(sun_angles_df.head())

# Merge DataFrames based on the tile ID and include sun angles
def merge_dataframes_by_tile_id(pixels_df, clouds_df, sun_df):
    merged_dfs = []
    for i in range(len(points_of_interest)):
        # Select rows corresponding to the current point of interest
        point_pixels = pixels_df[pixels_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index', 'time'])
        point_clouds = clouds_df[clouds_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index', 'time'])
        
        # Merge the pixel and cloud dataframes based on the tile ID
        point_df = pd.merge(point_pixels, point_clouds, on='tile_id', how='outer', suffixes=('_pixel', '_cloud'))
        point_df = point_df.add_prefix(f'point_{i+1}_').rename(columns={f'point_{i+1}_timestamp': 'timestamp'})
        
        # Drop redundant columns
        point_df = point_df.drop(columns=[f'point_{i+1}_id_cloud'])
        merged_dfs.append(point_df)
    
    # Concatenate all point DataFrames and remove duplicate columns
    merged_df = pd.concat(merged_dfs, axis=1)
    merged_df = merged_df.loc[:, ~merged_df.columns.duplicated()]
    
    # Merge with sun angles DataFrame based on the tile ID
    merged_df = pd.merge(merged_df, sun_df, on='tile_id', how='left')
    
    return merged_df

# Merge dataframes and print the final result
final_df = merge_dataframes_by_tile_id(pixel_values_df, cloud_values_df, sun_angles_df)
print(final_df)

# Save the final DataFrame to a CSV file without including the index
final_df.to_csv('s2222.csv', index=False)


Pixel Values DataFrame Columns: Index(['id', 'longitude', 'latitude', 'time', 'B2', 'B3', 'B4', 'B8', 'B8A',
       'B11', 'B12', 'timestamp'],
      dtype='object')
Cloud Values DataFrame Columns: Index(['id', 'longitude', 'latitude', 'time', 'cs', 'cs_cdf', 'timestamp'], dtype='object')
Sun Angles DataFrame Columns: Index(['tile_id', 'timestamp', 'sun_zenith', 'sun_azimuth'], dtype='object')
Pixel Values DataFrame with Tile ID Columns: Index(['id', 'longitude', 'latitude', 'time', 'B2', 'B3', 'B4', 'B8', 'B8A',
       'B11', 'B12', 'timestamp', 'poi_index', 'tile_id'],
      dtype='object')
Cloud Values DataFrame with Tile ID Columns: Index(['id', 'longitude', 'latitude', 'time', 'cs', 'cs_cdf', 'timestamp',
       'poi_index', 'tile_id'],
      dtype='object')
Sample rows from Pixel Values DataFrame:
                                       id          tile_id
0  20180827T222541_20180827T222807_T59GMM  20180827T222541
1  20181213T223659_20181213T223656_T59GMM  20181213T223659
2  20181

KeyError: 'tile_id'

In [8]:
import ee
import pandas as pd
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Set display options to show all columns
pd.set_option('display.max_columns', None)

# Define the points of interest (coordinates for which data will be extracted)
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

# Create EE geometry points (server-side objects)
ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer (server-side object)
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)  # 30-meter buffer around the central point

# Setup image and cloud score collections (server-side objects)
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10  # Spatial resolution in meters
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

# Fetch pixel and cloud values from Earth Engine and bring them to the client side
try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Convert the extracted data to pandas DataFrames and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Verify the columns to ensure 'id' is present
print("Pixel Values DataFrame Columns:", pixel_values_df.columns)
print("Cloud Values DataFrame Columns:", cloud_values_df.columns)

# Function to extract sun angles and tile ID from image metadata
def extract_sun_angles(image):
    # Extract the sun zenith and azimuth angles as ee.Number objects
    sun_zenith = ee.Number(image.get('MEAN_SOLAR_ZENITH_ANGLE'))
    sun_azimuth = ee.Number(image.get('MEAN_SOLAR_AZIMUTH_ANGLE'))
    # Extract the tile ID
    tile_id = image.get('system:index')
    # Return an ee.Feature with these properties for standardized handling
    return ee.Feature(None, {
        'system:time_start': image.get('system:time_start'), 
        'sun_zenith': sun_zenith, 
        'sun_azimuth': sun_azimuth,
        'tile_id': tile_id
    })

# Apply the function to each image in the collection and fetch the data to the client side
sun_angles = images.map(extract_sun_angles).getInfo()

# Convert the sun angles information to a pandas DataFrame
sun_angles_df = pd.DataFrame([{
    'tile_id': image['properties']['tile_id'],
    'timestamp': pd.to_datetime(image['properties']['system:time_start'], unit='ms').tz_localize('UTC').tz_convert('Etc/GMT-12').strftime('%Y-%m-%d %H:%M:%S'),
    'sun_zenith': image['properties']['sun_zenith'],
    'sun_azimuth': image['properties']['sun_azimuth']
} for image in sun_angles['features']])

# Verify the sun_angles_df columns
print("Sun Angles DataFrame Columns:", sun_angles_df.columns)

# Function to find the nearest points
def find_nearest_points(gee_points, poi_points):
    tree = cKDTree(poi_points)
    distances, indices = tree.query(gee_points)
    return indices

# Get unique GEE returned points and find the nearest POIs for each GEE returned point
gee_points = list(zip(pixel_values_df['longitude'].unique(), pixel_values_df['latitude'].unique()))
pixel_values_df['poi_index'] = find_nearest_points(list(zip(pixel_values_df['longitude'], pixel_values_df['latitude'])), points_of_interest)
cloud_values_df['poi_index'] = find_nearest_points(list(zip(cloud_values_df['longitude'], cloud_values_df['latitude'])), points_of_interest)

# Extract tile IDs for pixel values and cloud values
pixel_values_df['tile_id'] = pixel_values_df['id'].apply(lambda x: x.split('_')[0])
cloud_values_df['tile_id'] = cloud_values_df['id'].apply(lambda x: x.split('_')[0])

# Verify the addition of the tile_id column
print("Pixel Values DataFrame with Tile ID Columns:", pixel_values_df.columns)
print("Cloud Values DataFrame with Tile ID Columns:", cloud_values_df.columns)



# Merge the dataframes on 'tile_id' and 'timestamp'
merged_df = pd.merge(pixel_values_df, cloud_values_df, on=['tile_id', 'timestamp'], suffixes=('_pixel', '_cloud'))
final_df = pd.merge(merged_df, sun_angles_df, on=['tile_id', 'timestamp'])

# Save the final DataFrame to a CSV file without including the index
final_df.to_csv('s2222.csv', index=False)



# Save the final DataFrame to a CSV file without including the index
final_df.to_csv('s2222.csv', index=False)

Pixel Values DataFrame Columns: Index(['id', 'longitude', 'latitude', 'time', 'B2', 'B3', 'B4', 'B8', 'B8A',
       'B11', 'B12', 'timestamp'],
      dtype='object')
Cloud Values DataFrame Columns: Index(['id', 'longitude', 'latitude', 'time', 'cs', 'cs_cdf', 'timestamp'], dtype='object')
Sun Angles DataFrame Columns: Index(['tile_id', 'timestamp', 'sun_zenith', 'sun_azimuth'], dtype='object')
Pixel Values DataFrame with Tile ID Columns: Index(['id', 'longitude', 'latitude', 'time', 'B2', 'B3', 'B4', 'B8', 'B8A',
       'B11', 'B12', 'timestamp', 'poi_index', 'tile_id'],
      dtype='object')
Cloud Values DataFrame with Tile ID Columns: Index(['id', 'longitude', 'latitude', 'time', 'cs', 'cs_cdf', 'timestamp',
       'poi_index', 'tile_id'],
      dtype='object')
