In [1]:
import ee
import pandas as pd
import pytz
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Find the nearest point
def find_nearest_points(gee_points, poi_points):
    tree = cKDTree(poi_points)
    distances, indices = tree.query(gee_points)
    return indices

# Get unique GEE returned points
gee_points = list(zip(pixel_values_df['longitude'].unique(), pixel_values_df['latitude'].unique()))

# Find nearest POIs for each GEE returned point
poi_indices = find_nearest_points(gee_points, points_of_interest)

# Map GEE points to POIs
pixel_values_df['poi_index'] = find_nearest_points(list(zip(pixel_values_df['longitude'], pixel_values_df['latitude'])), points_of_interest)
cloud_values_df['poi_index'] = find_nearest_points(list(zip(cloud_values_df['longitude'], cloud_values_df['latitude'])), points_of_interest)

# Merge DataFrames based on Nearest POI
def merge_dataframes_by_poi(pixels_df, clouds_df):
    merged_dfs = []
    for i in range(len(points_of_interest)):
        point_pixels = pixels_df[pixels_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        point_clouds = clouds_df[clouds_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        
        point_df = pd.merge(point_pixels, point_clouds, on='timestamp', how='outer', suffixes=('', '_cloud'))
        point_df = point_df.rename(columns=lambda x: f'point_{i+1}_{x}' if x not in ['timestamp'] else x)
        merged_dfs.append(point_df)
    
    # Concatenate all point DataFrames
    merged_df = pd.concat(merged_dfs, axis=1)
    return merged_df

# Example usage
final_df = merge_dataframes_by_poi(pixel_values_df, cloud_values_df)
print(final_df)


                                 point_1_id  point_1_time  point_1_B2  \
0                                       NaN           NaN         NaN   
1                                       NaN           NaN         NaN   
2                                       NaN           NaN         NaN   
3                                       NaN           NaN         NaN   
4                                       NaN           NaN         NaN   
..                                      ...           ...         ...   
931  20240606T222551_20240606T222547_T59GMM  1.717713e+12        75.0   
932  20240609T223711_20240609T223714_T59GMM  1.717973e+12      9368.0   
933  20240611T222549_20240611T222545_T59GMM  1.718145e+12      2002.0   
934  20240614T223709_20240614T223711_T59GMM  1.718405e+12       484.0   
935  20240616T222551_20240616T222548_T59GMM  1.718577e+12      2908.0   

     point_1_B3  point_1_B4  point_1_B8  point_1_B8A  point_1_B11  \
0           NaN         NaN         NaN          NaN  

In [4]:
import ee
import pandas as pd
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Extract unique GEE returned points
gee_points = list(zip(pixel_values_df['longitude'].unique(), pixel_values_df['latitude'].unique()))

# Find the nearest point
def find_nearest_points(gee_points, poi_points):
    tree = cKDTree(poi_points)
    distances, indices = tree.query(gee_points)
    return indices

# Find nearest POIs for each GEE returned point
poi_indices = find_nearest_points(gee_points, points_of_interest)

# Create comparison table
comparison_data = {
    "POI_Lon": [points_of_interest[i][0] for i in poi_indices],
    "POI_Lat": [points_of_interest[i][1] for i in poi_indices],
    "GEE_Lon": [lon for lon, lat in gee_points],
    "GEE_Lat": [lat for lon, lat in gee_points]
}

comparison_df = pd.DataFrame(comparison_data)

# Print comparison table
print(comparison_df)


      POI_Lon    POI_Lat     GEE_Lon    GEE_Lat
0  170.891426 -43.999248  170.891412 -43.999258
1  170.891551 -43.999248  170.891592 -43.999168
2  170.891676 -43.999248  170.891682 -43.999078


In [5]:
import ee
import pandas as pd
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

# Convert points of interest to ee.Geometry.Point objects
ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Extract unique GEE returned points
gee_points = list(zip(pixel_values_df['longitude'].unique(), pixel_values_df['latitude'].unique()))

# Find the nearest point
def find_nearest_points(gee_points, poi_points):
    tree = cKDTree(gee_points)
    distances, indices = tree.query(poi_points)
    return indices

# Find nearest GEE points for each POI
gee_indices = find_nearest_points(gee_points, points_of_interest)

# Create comparison table
comparison_data = {
    "POI_Lon": [points_of_interest[i][0] for i in range(len(points_of_interest))],
    "POI_Lat": [points_of_interest[i][1] for i in range(len(points_of_interest))],
    "GEE_Lon": [gee_points[gee_indices[i]][0] for i in range(len(points_of_interest))],
    "GEE_Lat": [gee_points[gee_indices[i]][1] for i in range(len(points_of_interest))]
}

comparison_df = pd.DataFrame(comparison_data)

# Print comparison table
print(comparison_df)


      POI_Lon    POI_Lat     GEE_Lon    GEE_Lat
0  170.891426 -43.999248  170.891412 -43.999258
1  170.891551 -43.999248  170.891592 -43.999168
2  170.891676 -43.999248  170.891592 -43.999168
3  170.891801 -43.999248  170.891682 -43.999078
4  170.891926 -43.999248  170.891682 -43.999078
5  170.891426 -43.999158  170.891412 -43.999258
6  170.891926 -43.999158  170.891682 -43.999078
7  170.891426 -43.999068  170.891412 -43.999258
8  170.891926 -43.999068  170.891682 -43.999078


In [1]:
import ee
import pandas as pd
import pytz
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Extract sun's zenith and azimuth angles from image metadata
def extract_sun_angles(image):
    sun_zenith = ee.Number(image.get('MEAN_SOLAR_ZENITH_ANGLE'))
    sun_azimuth = ee.Number(image.get('MEAN_SOLAR_AZIMUTH_ANGLE'))
    return ee.Feature(None, {'sun_zenith': sun_zenith, 'sun_azimuth': sun_azimuth})

# Apply the function to each image in the collection
sun_angles = images.map(extract_sun_angles).getInfo()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Convert sun angles to dataframe
sun_angles_df = pd.DataFrame([{'timestamp': pd.to_datetime(image['properties']['system:time_start'], unit='ms').tz_localize('UTC').tz_convert('Etc/GMT-12').strftime('%Y-%m-%d %H:%M:%S'), 
                               'sun_zenith': image['properties']['sun_zenith'], 
                               'sun_azimuth': image['properties']['sun_azimuth']} 
                              for image in sun_angles['features']])

# Find the nearest point
def find_nearest_points(gee_points, poi_points):
    tree = cKDTree(poi_points)
    distances, indices = tree.query(gee_points)
    return indices

# Get unique GEE returned points
gee_points = list(zip(pixel_values_df['longitude'].unique(), pixel_values_df['latitude'].unique()))

# Find nearest POIs for each GEE returned point
poi_indices = find_nearest_points(gee_points, points_of_interest)

# Map GEE points to POIs
pixel_values_df['poi_index'] = find_nearest_points(list(zip(pixel_values_df['longitude'], pixel_values_df['latitude'])), points_of_interest)
cloud_values_df['poi_index'] = find_nearest_points(list(zip(cloud_values_df['longitude'], cloud_values_df['latitude'])), points_of_interest)

# Merge DataFrames based on Nearest POI
def merge_dataframes_by_poi(pixels_df, clouds_df, sun_df):
    merged_dfs = []
    for i in range(len(points_of_interest)):
        point_pixels = pixels_df[pixels_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        point_clouds = clouds_df[clouds_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        
        point_df = pd.merge(point_pixels, point_clouds, on='timestamp', how='outer', suffixes=('', '_cloud'))
        point_df = pd.merge(point_df, sun_df, on='timestamp', how='outer')
        point_df = point_df.rename(columns=lambda x: f'point_{i+1}_{x}' if x not in ['timestamp'] else x)
        merged_dfs.append(point_df)
    
    # Concatenate all point DataFrames
    merged_df = pd.concat(merged_dfs, axis=1)
    return merged_df

# Example usage
final_df = merge_dataframes_by_poi(pixel_values_df, cloud_values_df, sun_angles_df)
print(final_df)


KeyError: 'system:time_start'

In [2]:
import ee
import pandas as pd
import pytz
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Extract sun's zenith and azimuth angles from image metadata
def extract_sun_angles(image):
    sun_zenith = ee.Number(image.get('MEAN_SOLAR_ZENITH_ANGLE'))
    sun_azimuth = ee.Number(image.get('MEAN_SOLAR_AZIMUTH_ANGLE'))
    return ee.Feature(None, {'system:time_start': image.get('system:time_start'), 'sun_zenith': sun_zenith, 'sun_azimuth': sun_azimuth})

# Apply the function to each image in the collection
sun_angles = images.map(extract_sun_angles).getInfo()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Convert sun angles to dataframe
sun_angles_df = pd.DataFrame([{'timestamp': pd.to_datetime(image['properties']['system:time_start'], unit='ms').tz_localize('UTC').tz_convert('Etc/GMT-12').strftime('%Y-%m-%d %H:%M:%S'), 
                               'sun_zenith': image['properties']['sun_zenith'], 
                               'sun_azimuth': image['properties']['sun_azimuth']} 
                              for image in sun_angles['features']])

# Merge DataFrames based on timestamp
def merge_dataframes_by_timestamp(pixels_df, clouds_df, sun_df):
    merged_df = pd.merge(pixels_df, clouds_df, on=['timestamp', 'longitude', 'latitude'], how='outer', suffixes=('', '_cloud'))
    merged_df = pd.merge(merged_df, sun_df, on='timestamp', how='outer')
    return merged_df

# Example usage
final_df = merge_dataframes_by_timestamp(pixel_values_df, cloud_values_df, sun_angles_df)
print(final_df)


                                          id   longitude   latitude  \
0                                        NaN  170.891412 -43.999258   
1                                        NaN  170.891412 -43.999168   
2                                        NaN  170.891412 -43.999078   
3                                        NaN  170.891592 -43.999258   
4                                        NaN  170.891682 -43.999258   
...                                      ...         ...        ...   
8419  20240616T222551_20240616T222548_T59GMM  170.891682 -43.999258   
8420  20240616T222551_20240616T222548_T59GMM  170.891772 -43.999258   
8421  20240616T222551_20240616T222548_T59GMM  170.891951 -43.999258   
8422  20240616T222551_20240616T222548_T59GMM  170.891951 -43.999168   
8423  20240616T222551_20240616T222548_T59GMM  170.891951 -43.999078   

              time      B2      B3      B4      B8     B8A     B11     B12  \
0              NaN     NaN     NaN     NaN     NaN     NaN     NaN   

In [3]:
import ee
import pandas as pd
import pytz

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Extract sun's zenith and azimuth angles from image metadata
def extract_sun_angles(image):
    sun_zenith = ee.Number(image.get('MEAN_SOLAR_ZENITH_ANGLE'))
    sun_azimuth = ee.Number(image.get('MEAN_SOLAR_AZIMUTH_ANGLE'))
    return ee.Feature(None, {'system:time_start': image.get('system:time_start'), 'sun_zenith': sun_zenith, 'sun_azimuth': sun_azimuth})

# Apply the function to each image in the collection
sun_angles = images.map(extract_sun_angles).getInfo()

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Convert sun angles to dataframe
sun_angles_df = pd.DataFrame([{'timestamp': pd.to_datetime(image['properties']['system:time_start'], unit='ms').tz_localize('UTC').tz_convert('Etc/GMT-12').strftime('%Y-%m-%d %H:%M:%S'), 
                               'sun_zenith': image['properties']['sun_zenith'], 
                               'sun_azimuth': image['properties']['sun_azimuth']} 
                              for image in sun_angles['features']])

# Merge DataFrames based on timestamp and coordinates
def merge_dataframes(pixels_df, clouds_df, sun_df):
    merged_df = pd.merge(pixels_df, clouds_df, on=['timestamp', 'longitude', 'latitude'], how='outer', suffixes=('', '_cloud'))
    merged_df = pd.merge(merged_df, sun_df, on='timestamp', how='outer')
    return merged_df

# Example usage
final_df = merge_dataframes(pixel_values_df, cloud_values_df, sun_angles_df)
print(final_df)


                                          id   longitude   latitude  \
0                                        NaN  170.891412 -43.999258   
1                                        NaN  170.891412 -43.999168   
2                                        NaN  170.891412 -43.999078   
3                                        NaN  170.891592 -43.999258   
4                                        NaN  170.891682 -43.999258   
...                                      ...         ...        ...   
8419  20240616T222551_20240616T222548_T59GMM  170.891682 -43.999258   
8420  20240616T222551_20240616T222548_T59GMM  170.891772 -43.999258   
8421  20240616T222551_20240616T222548_T59GMM  170.891951 -43.999258   
8422  20240616T222551_20240616T222548_T59GMM  170.891951 -43.999168   
8423  20240616T222551_20240616T222548_T59GMM  170.891951 -43.999078   

              time      B2      B3      B4      B8     B8A     B11     B12  \
0              NaN     NaN     NaN     NaN     NaN     NaN     NaN   

In [5]:
import ee
import pandas as pd

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Extract sun's zenith and azimuth angles from image metadata
def extract_sun_angles(image):
    sun_zenith = ee.Number(image.get('MEAN_SOLAR_ZENITH_ANGLE'))
    sun_azimuth = ee.Number(image.get('MEAN_SOLAR_AZIMUTH_ANGLE'))
    return ee.Feature(None, {'system:time_start': image.get('system:time_start'), 'sun_zenith': sun_zenith, 'sun_azimuth': sun_azimuth})

# Apply the function to each image in the collection
sun_angles = images.map(extract_sun_angles).getInfo()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Convert sun angles to dataframe
sun_angles_df = pd.DataFrame([{'timestamp': pd.to_datetime(image['properties']['system:time_start'], unit='ms').tz_localize('UTC').tz_convert('Etc/GMT-12').strftime('%Y-%m-%d %H:%M:%S'), 
                               'sun_zenith': image['properties']['sun_zenith'], 
                               'sun_azimuth': image['properties']['sun_azimuth']} 
                              for image in sun_angles['features']])

# Merge DataFrames based on timestamp and coordinates
def merge_dataframes(pixels_df, clouds_df, sun_df):
    merged_df = pd.merge(pixels_df, clouds_df, on=['timestamp', 'longitude', 'latitude'], how='outer', suffixes=('', '_cloud'))
    merged_df = pd.merge(merged_df, sun_df, on='timestamp', how='outer')
    return merged_df

# Ensure each timestamp has one row with all columns
def ensure_single_row_per_timestamp(df):
    df = df.sort_values(by='timestamp')
    df = df.groupby('timestamp').first().reset_index()
    return df

# Example usage
final_df = merge_dataframes(pixel_values_df, cloud_values_df, sun_angles_df)
final_df = ensure_single_row_per_timestamp(final_df)

print(final_df)


               timestamp                                      id   longitude  \
0    2018-01-03 10:36:55                                    None  170.891412   
1    2018-01-05 10:25:28                                    None  170.891951   
2    2018-01-08 10:36:53                                    None  170.891951   
3    2018-01-10 10:25:29                                    None  170.891772   
4    2018-01-13 10:36:54                                    None  170.891412   
..                   ...                                     ...         ...   
931  2024-06-07 10:28:31  20240606T222551_20240606T222547_T59GMM  170.891951   
932  2024-06-10 10:38:28  20240609T223711_20240609T223714_T59GMM  170.891951   
933  2024-06-12 10:28:28  20240611T222549_20240611T222545_T59GMM  170.891951   
934  2024-06-15 10:38:25  20240614T223709_20240614T223711_T59GMM  170.891951   
935  2024-06-17 10:28:31  20240616T222551_20240616T222548_T59GMM  170.891951   

      latitude          time      B2   

In [15]:
import ee
import pandas as pd
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Extract sun's zenith and azimuth angles from image metadata
def extract_sun_angles(image):
    sun_zenith = ee.Number(image.get('MEAN_SOLAR_ZENITH_ANGLE'))
    sun_azimuth = ee.Number(image.get('MEAN_SOLAR_AZIMUTH_ANGLE'))
    return ee.Feature(None, {'system:time_start': image.get('system:time_start'), 'sun_zenith': sun_zenith, 'sun_azimuth': sun_azimuth})

# Apply the function to each image in the collection
sun_angles = images.map(extract_sun_angles).getInfo()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Convert sun angles to dataframe
sun_angles_df = pd.DataFrame([{'timestamp': pd.to_datetime(image['properties']['system:time_start'], unit='ms').tz_localize('UTC').tz_convert('Etc/GMT-12').strftime('%Y-%m-%d %H:%M:%S'), 
                               'sun_zenith': image['properties']['sun_zenith'], 
                               'sun_azimuth': image['properties']['sun_azimuth']} 
                              for image in sun_angles['features']])

# Find the nearest point
def find_nearest_points(gee_points, poi_points):
    tree = cKDTree(poi_points)
    distances, indices = tree.query(gee_points)
    return indices

# Get unique GEE returned points
gee_points = list(zip(pixel_values_df['longitude'].unique(), pixel_values_df['latitude'].unique()))

# Find nearest POIs for each GEE returned point
poi_indices = find_nearest_points(gee_points, points_of_interest)

# Map GEE points to POIs
pixel_values_df['poi_index'] = find_nearest_points(list(zip(pixel_values_df['longitude'], pixel_values_df['latitude'])), points_of_interest)
cloud_values_df['poi_index'] = find_nearest_points(list(zip(cloud_values_df['longitude'], cloud_values_df['latitude'])), points_of_interest)

# Merge DataFrames based on Nearest POI and include sun angles
def merge_dataframes_by_poi(pixels_df, clouds_df, sun_df):
    merged_dfs = []
    for i in range(len(points_of_interest)):
        point_pixels = pixels_df[pixels_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        point_clouds = clouds_df[clouds_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        
        point_df = pd.merge(point_pixels, point_clouds, on='timestamp', how='outer', suffixes=('', '_cloud'))
        point_df = pd.merge(point_df, sun_df, on='timestamp', how='outer')
        point_df = point_df.rename(columns=lambda x: f'point_{i+1}_{x}' if x not in ['timestamp'] else x)
        merged_dfs.append(point_df)
    
    # Concatenate all point DataFrames
    merged_df = pd.concat(merged_dfs, axis=1)
    return merged_df

# Example usage
final_df = merge_dataframes_by_poi(pixel_values_df, cloud_values_df, sun_angles_df)
print(final_df)


KeyboardInterrupt: 

In [None]:
final_df

Unnamed: 0,point_1_id,point_1_time,point_1_B2,point_1_B3,point_1_B4,point_1_B8,point_1_B8A,point_1_B11,point_1_B12,timestamp,...,point_9_B8A,point_9_B11,point_9_B12,timestamp.1,point_9_id_cloud,point_9_time_cloud,point_9_cs,point_9_cs_cdf,point_9_sun_zenith,point_9_sun_azimuth
0,,,,,,,,,,2018-01-03 10:36:55,...,,,,2018-01-03 10:36:55,20180102T223701_20180102T223655_T59GMM,1.514933e+12,0.427451,0.674510,,
1,,,,,,,,,,2018-01-05 10:25:28,...,,,,2018-01-05 10:25:28,20180104T222529_20180104T222528_T59GMM,1.515105e+12,0.027451,0.117647,,
2,,,,,,,,,,2018-01-08 10:36:53,...,,,,2018-01-08 10:36:53,20180107T223659_20180107T223653_T59GMM,1.515365e+12,0.054902,0.113725,,
3,,,,,,,,,,2018-01-10 10:25:29,...,,,,2018-01-10 10:25:29,20180109T222531_20180109T222529_T59GMM,1.515537e+12,0.000000,0.074510,,
4,,,,,,,,,,2018-01-13 10:36:54,...,,,,2018-01-13 10:36:54,20180112T223651_20180112T223654_T59GMM,1.515797e+12,0.007843,0.047059,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,20240606T222551_20240606T222547_T59GMM,1.717713e+12,75.0,110.0,79.0,32.0,21.0,141.0,133.0,2024-06-07 10:28:31,...,105.0,245.0,173.0,2024-06-07 10:28:31,20240606T222551_20240606T222547_T59GMM,1.717713e+12,0.831373,0.929412,72.853098,30.895219
932,20240609T223711_20240609T223714_T59GMM,1.717973e+12,9368.0,8528.0,8352.0,8304.0,7926.0,3071.0,3138.0,2024-06-10 10:38:28,...,7843.0,2985.0,3066.0,2024-06-10 10:38:28,20240609T223711_20240609T223714_T59GMM,1.717973e+12,0.011765,0.062745,72.248076,28.705300
933,20240611T222549_20240611T222545_T59GMM,1.718145e+12,2002.0,2026.0,2002.0,2544.0,2420.0,1666.0,1487.0,2024-06-12 10:28:28,...,2495.0,1687.0,1528.0,2024-06-12 10:28:28,20240611T222549_20240611T222545_T59GMM,1.718145e+12,0.341176,0.564706,73.307209,30.958897
934,20240614T223709_20240614T223711_T59GMM,1.718405e+12,484.0,453.0,511.0,752.0,1011.0,792.0,715.0,2024-06-15 10:38:25,...,541.0,440.0,370.0,2024-06-15 10:38:25,20240614T223709_20240614T223711_T59GMM,1.718405e+12,0.356863,0.635294,72.610559,28.834850


In [16]:
import ee
import pandas as pd
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Extract sun's zenith and azimuth angles from image metadata
def extract_sun_angles(image):
    sun_zenith = ee.Number(image.get('MEAN_SOLAR_ZENITH_ANGLE'))
    sun_azimuth = ee.Number(image.get('MEAN_SOLAR_AZIMUTH_ANGLE'))
    return ee.Feature(None, {'system:time_start': image.get('system:time_start'), 'sun_zenith': sun_zenith, 'sun_azimuth': sun_azimuth})

# Apply the function to each image in the collection
sun_angles = images.map(extract_sun_angles).getInfo()

# Convert sun angles to dataframe
sun_angles_df = pd.DataFrame([{'timestamp': pd.to_datetime(image['properties']['system:time_start'], unit='ms').tz_localize('UTC').tz_convert('Etc/GMT-12').strftime('%Y-%m-%d %H:%M:%S'), 
                               'sun_zenith': image['properties']['sun_zenith'], 
                               'sun_azimuth': image['properties']['sun_azimuth']} 
                              for image in sun_angles['features']])

# Check the unique coordinates returned
unique_coords = pixel_values_df[['longitude', 'latitude']].drop_duplicates()
print("Unique coordinates returned by GEE:")
print(unique_coords)

# Find the nearest point
def find_nearest_points(gee_points, poi_points):
    tree = cKDTree(poi_points)
    distances, indices = tree.query(gee_points)
    return indices

# Get unique GEE returned points
gee_points = list(zip(pixel_values_df['longitude'].unique(), pixel_values_df['latitude'].unique()))

# Find nearest POIs for each GEE returned point
poi_indices = find_nearest_points(gee_points, points_of_interest)

# Map GEE points to POIs
pixel_values_df['poi_index'] = find_nearest_points(list(zip(pixel_values_df['longitude'], pixel_values_df['latitude'])), points_of_interest)
cloud_values_df['poi_index'] = find_nearest_points(list(zip(cloud_values_df['longitude'], cloud_values_df['latitude'])), points_of_interest)

# Merge DataFrames based on Nearest POI and include sun angles
def merge_dataframes_by_poi(pixels_df, clouds_df, sun_df):
    merged_dfs = []
    for i in range(len(points_of_interest)):
        point_pixels = pixels_df[pixels_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        point_clouds = clouds_df[clouds_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        
        point_df = pd.merge(point_pixels, point_clouds, on='timestamp', how='outer', suffixes=('', '_cloud'))
        point_df = pd.merge(point_df, sun_df, on='timestamp', how='outer')
        point_df = point_df.rename(columns=lambda x: f'point_{i+1}_{x}' if x not in ['timestamp'] else x)
        merged_dfs.append(point_df)
    
    # Concatenate all point DataFrames
    merged_df = pd.concat(merged_dfs, axis=1)
    return merged_df

# Example usage
final_df = merge_dataframes_by_poi(pixel_values_df, cloud_values_df, sun_angles_df)
print(final_df)


Unique coordinates returned by GEE:
       longitude   latitude
0     170.891412 -43.999258
798   170.891592 -43.999258
1596  170.891682 -43.999258
2394  170.891772 -43.999258
3192  170.891951 -43.999258
3990  170.891412 -43.999168
4788  170.891951 -43.999168
5586  170.891412 -43.999078
6384  170.891951 -43.999078
                                 point_1_id  point_1_time  point_1_B2  \
0                                       NaN           NaN         NaN   
1                                       NaN           NaN         NaN   
2                                       NaN           NaN         NaN   
3                                       NaN           NaN         NaN   
4                                       NaN           NaN         NaN   
..                                      ...           ...         ...   
931  20240606T222551_20240606T222547_T59GMM  1.717713e+12        75.0   
932  20240609T223711_20240609T223714_T59GMM  1.717973e+12      9368.0   
933  20240611T222549_202406

In [20]:
final_df=final_df.dropna()

In [22]:
final_df.to_csv('improved_sentinel2_extraction.csv', index=False)