In [1]:
import ee
import pandas as pd
from scipy.spatial import cKDTree

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)

# Extract sun's zenith and azimuth angles from image metadata
def extract_sun_angles(image):
    sun_zenith = ee.Number(image.get('MEAN_SOLAR_ZENITH_ANGLE'))
    sun_azimuth = ee.Number(image.get('MEAN_SOLAR_AZIMUTH_ANGLE'))
    return ee.Feature(None, {'system:time_start': image.get('system:time_start'), 'sun_zenith': sun_zenith, 'sun_azimuth': sun_azimuth})

# Apply the function to each image in the collection
sun_angles = images.map(extract_sun_angles).getInfo()

# Convert sun angles to dataframe
sun_angles_df = pd.DataFrame([{'timestamp': pd.to_datetime(image['properties']['system:time_start'], unit='ms').tz_localize('UTC').tz_convert('Etc/GMT-12').strftime('%Y-%m-%d %H:%M:%S'), 
                               'sun_zenith': image['properties']['sun_zenith'], 
                               'sun_azimuth': image['properties']['sun_azimuth']} 
                              for image in sun_angles['features']])

# Check the unique coordinates returned
unique_coords = pixel_values_df[['longitude', 'latitude']].drop_duplicates()
print("Unique coordinates returned by GEE:")
print(unique_coords)

# Find the nearest point
def find_nearest_points(gee_points, poi_points):
    tree = cKDTree(poi_points)
    distances, indices = tree.query(gee_points)
    return indices

# Get unique GEE returned points
gee_points = list(zip(pixel_values_df['longitude'].unique(), pixel_values_df['latitude'].unique()))

# Find nearest POIs for each GEE returned point
poi_indices = find_nearest_points(gee_points, points_of_interest)

# Map GEE points to POIs
pixel_values_df['poi_index'] = find_nearest_points(list(zip(pixel_values_df['longitude'], pixel_values_df['latitude'])), points_of_interest)
cloud_values_df['poi_index'] = find_nearest_points(list(zip(cloud_values_df['longitude'], cloud_values_df['latitude'])), points_of_interest)

# Merge DataFrames based on Nearest POI and include sun angles
def merge_dataframes_by_poi(pixels_df, clouds_df, sun_df):
    merged_dfs = []
    for i in range(len(points_of_interest)):
        point_pixels = pixels_df[pixels_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        point_clouds = clouds_df[clouds_df['poi_index'] == i].drop(columns=['longitude', 'latitude', 'poi_index'])
        
        point_df = pd.merge(point_pixels, point_clouds, on='timestamp', how='outer', suffixes=('', '_cloud'))
        point_df = pd.merge(point_df, sun_df, on='timestamp', how='outer')
        point_df = point_df.rename(columns=lambda x: f'point_{i+1}_{x}' if x not in ['timestamp'] else x)
        merged_dfs.append(point_df)
    
    # Concatenate all point DataFrames
    merged_df = pd.concat(merged_dfs, axis=1)
    return merged_df

# Example usage
final_df = merge_dataframes_by_poi(pixel_values_df, cloud_values_df, sun_angles_df)
print(final_df)


Unique coordinates returned by GEE:
       longitude   latitude
0     170.891412 -43.999258
798   170.891592 -43.999258
1596  170.891682 -43.999258
2394  170.891772 -43.999258
3192  170.891951 -43.999258
3990  170.891412 -43.999168
4788  170.891951 -43.999168
5586  170.891412 -43.999078
6384  170.891951 -43.999078
                                 point_1_id  point_1_time  point_1_B2  \
0                                       NaN           NaN         NaN   
1                                       NaN           NaN         NaN   
2                                       NaN           NaN         NaN   
3                                       NaN           NaN         NaN   
4                                       NaN           NaN         NaN   
..                                      ...           ...         ...   
931  20240606T222551_20240606T222547_T59GMM  1.717713e+12        75.0   
932  20240609T223711_20240609T223714_T59GMM  1.717973e+12      9368.0   
933  20240611T222549_202406

In [2]:
final_df=final_df.dropna()

In [3]:
final_df

Unnamed: 0,point_1_id,point_1_time,point_1_B2,point_1_B3,point_1_B4,point_1_B8,point_1_B8A,point_1_B11,point_1_B12,timestamp,...,point_9_B8A,point_9_B11,point_9_B12,timestamp.1,point_9_id_cloud,point_9_time_cloud,point_9_cs,point_9_cs_cdf,point_9_sun_zenith,point_9_sun_azimuth
139,20181213T223659_20181213T223656_T59GMM,1.544741e+12,2264.0,2252.0,2136.0,3318.0,3803.0,3274.0,2938.0,2018-12-14 10:38:15,...,3837.0,3379.0,2901.0,2018-12-14 10:38:15,20181213T223659_20181213T223656_T59GMM,1.544741e+12,0.203922,0.235294,31.254377,57.960822
140,20181215T222531_20181215T222536_T59GMM,1.544913e+12,8392.0,7808.0,7360.0,7536.0,7384.0,5487.0,4050.0,2018-12-16 10:28:16,...,7365.0,5410.0,3990.0,2018-12-16 10:28:16,20181215T222531_20181215T222536_T59GMM,1.544913e+12,0.003922,0.039216,32.880865,61.583752
141,20181218T223651_20181218T223734_T59GMM,1.545173e+12,6812.0,6896.0,6976.0,7540.0,7385.0,3879.0,2740.0,2018-12-19 10:38:13,...,7437.0,3919.0,2792.0,2018-12-19 10:38:13,20181218T223651_20181218T223734_T59GMM,1.545173e+12,0.011765,0.054902,31.473882,59.043502
142,20181220T222539_20181220T222533_T59GMM,1.545345e+12,2152.0,2330.0,2066.0,3628.0,3513.0,2676.0,2317.0,2018-12-21 10:28:20,...,3761.0,2801.0,2477.0,2018-12-21 10:28:20,20181220T222539_20181220T222533_T59GMM,1.545345e+12,0.090196,0.196078,33.162840,62.481786
144,20181225T222531_20181225T222541_T59GMM,1.545777e+12,11640.0,10616.0,9968.0,9464.0,9040.0,5518.0,4178.0,2018-12-26 10:28:18,...,9267.0,5845.0,4379.0,2018-12-26 10:28:18,20181225T222531_20181225T222541_T59GMM,1.545777e+12,0.003922,0.043137,33.609343,63.157576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,20240606T222551_20240606T222547_T59GMM,1.717713e+12,75.0,110.0,79.0,32.0,21.0,141.0,133.0,2024-06-07 10:28:31,...,105.0,245.0,173.0,2024-06-07 10:28:31,20240606T222551_20240606T222547_T59GMM,1.717713e+12,0.831373,0.929412,72.853098,30.895219
932,20240609T223711_20240609T223714_T59GMM,1.717973e+12,9368.0,8528.0,8352.0,8304.0,7926.0,3071.0,3138.0,2024-06-10 10:38:28,...,7843.0,2985.0,3066.0,2024-06-10 10:38:28,20240609T223711_20240609T223714_T59GMM,1.717973e+12,0.011765,0.062745,72.248076,28.705300
933,20240611T222549_20240611T222545_T59GMM,1.718145e+12,2002.0,2026.0,2002.0,2544.0,2420.0,1666.0,1487.0,2024-06-12 10:28:28,...,2495.0,1687.0,1528.0,2024-06-12 10:28:28,20240611T222549_20240611T222545_T59GMM,1.718145e+12,0.341176,0.564706,73.307209,30.958897
934,20240614T223709_20240614T223711_T59GMM,1.718405e+12,484.0,453.0,511.0,752.0,1011.0,792.0,715.0,2024-06-15 10:38:25,...,541.0,440.0,370.0,2024-06-15 10:38:25,20240614T223709_20240614T223711_T59GMM,1.718405e+12,0.356863,0.635294,72.610559,28.834850


In [4]:
final_df.to_csv('improved_sentinel2_extraction.csv', index=False)

In [5]:
cleaned_df = final_df.drop(columns=['point_1_time', 
                                    'point_2_time', 
                                    'point_3_time', 
                                    'point_4_time', 
                                    'point_5_time', 
                                    'point_6_time', 
                                    'point_7_time', 
                                    'point_8_time', 
                                    'point_9_time',
                                    'point_1_time_cloud',
                                    'point_2_time_cloud',
                                    'point_3_time_cloud',
                                    'point_4_time_cloud',
                                    'point_5_time_cloud',
                                    'point_6_time_cloud',
                                    'point_7_time_cloud',
                                    'point_8_time_cloud',
                                    'point_9_time_cloud',
                                    'point_1_id_cloud',
                                    'point_2_id_cloud',
                                    'point_3_id_cloud',
                                    'point_4_id_cloud',
                                    'point_5_id_cloud',
                                    'point_6_id_cloud',
                                    'point_7_id_cloud',
                                    'point_8_id_cloud',
                                    'point_9_id_cloud',
                                    'point_2_sun_zenith',
                                    'point_3_sun_zenith',
                                    'point_4_sun_zenith',
                                    'point_5_sun_zenith',
                                    'point_6_sun_zenith',
                                    'point_7_sun_zenith',
                                    'point_8_sun_zenith',
                                    'point_9_sun_zenith',
                                    'point_2_sun_azimuth',
                                    'point_3_sun_azimuth',
                                    'point_4_sun_azimuth',
                                    'point_5_sun_azimuth',
                                    'point_6_sun_azimuth',
                                    'point_7_sun_azimuth',
                                    'point_8_sun_azimuth',
                                    'point_9_sun_azimuth'],axis=1)


In [6]:
cleaned_df

Unnamed: 0,point_1_id,point_1_B2,point_1_B3,point_1_B4,point_1_B8,point_1_B8A,point_1_B11,point_1_B12,timestamp,point_1_cs,...,point_9_B2,point_9_B3,point_9_B4,point_9_B8,point_9_B8A,point_9_B11,point_9_B12,timestamp.1,point_9_cs,point_9_cs_cdf
139,20181213T223659_20181213T223656_T59GMM,2264.0,2252.0,2136.0,3318.0,3803.0,3274.0,2938.0,2018-12-14 10:38:15,0.196078,...,2620.0,2732.0,2564.0,3980.0,3837.0,3379.0,2901.0,2018-12-14 10:38:15,0.203922,0.235294
140,20181215T222531_20181215T222536_T59GMM,8392.0,7808.0,7360.0,7536.0,7384.0,5487.0,4050.0,2018-12-16 10:28:16,0.003922,...,8304.0,7736.0,7284.0,7480.0,7365.0,5410.0,3990.0,2018-12-16 10:28:16,0.003922,0.039216
141,20181218T223651_20181218T223734_T59GMM,6812.0,6896.0,6976.0,7540.0,7385.0,3879.0,2740.0,2018-12-19 10:38:13,0.011765,...,6828.0,6964.0,7032.0,7568.0,7437.0,3919.0,2792.0,2018-12-19 10:38:13,0.011765,0.054902
142,20181220T222539_20181220T222533_T59GMM,2152.0,2330.0,2066.0,3628.0,3513.0,2676.0,2317.0,2018-12-21 10:28:20,0.047059,...,1614.0,1868.0,2014.0,3034.0,3761.0,2801.0,2477.0,2018-12-21 10:28:20,0.090196,0.196078
144,20181225T222531_20181225T222541_T59GMM,11640.0,10616.0,9968.0,9464.0,9040.0,5518.0,4178.0,2018-12-26 10:28:18,0.003922,...,11784.0,10736.0,10032.0,9568.0,9267.0,5845.0,4379.0,2018-12-26 10:28:18,0.003922,0.043137
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,20240606T222551_20240606T222547_T59GMM,75.0,110.0,79.0,32.0,21.0,141.0,133.0,2024-06-07 10:28:31,0.839216,...,42.0,148.0,118.0,21.0,105.0,245.0,173.0,2024-06-07 10:28:31,0.831373,0.929412
932,20240609T223711_20240609T223714_T59GMM,9368.0,8528.0,8352.0,8304.0,7926.0,3071.0,3138.0,2024-06-10 10:38:28,0.011765,...,9416.0,8520.0,8168.0,8448.0,7843.0,2985.0,3066.0,2024-06-10 10:38:28,0.011765,0.062745
933,20240611T222549_20240611T222545_T59GMM,2002.0,2026.0,2002.0,2544.0,2420.0,1666.0,1487.0,2024-06-12 10:28:28,0.349020,...,1990.0,2090.0,2002.0,2668.0,2495.0,1687.0,1528.0,2024-06-12 10:28:28,0.341176,0.564706
934,20240614T223709_20240614T223711_T59GMM,484.0,453.0,511.0,752.0,1011.0,792.0,715.0,2024-06-15 10:38:25,0.325490,...,295.0,288.0,248.0,430.0,541.0,440.0,370.0,2024-06-15 10:38:25,0.356863,0.635294


In [7]:
cleaned_df.to_csv('cleaned_improved_sentinel2_extraction.csv', index=False)

In [16]:
cols_to_drop = cleaned_df.columns[[21, 32, 43, 54, 65, 76, 87, 98]]


In [17]:
cols_to_drop

Index(['timestamp', 'timestamp', 'timestamp', 'timestamp', 'timestamp',
       'timestamp', 'timestamp', 'timestamp'],
      dtype='object')

In [18]:
cleaned_df=cleaned_df.drop(columns=cols_to_drop)

In [20]:
cleaned_df.to_csv('final_cleaned_improved_sentinel2_extraction_with_1_timestamp.csv', index=False)