In [3]:
import ee
import pandas as pd
import pytz

# Initialize Earth Engine
ee.Initialize(project='data690-zhouhaomatt')

# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the ROI using a central point and buffer
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)

# Setup image and cloud score collections
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED').filterDate('2018-01-01', '2024-06-19').filterBounds(roi)

# Data extraction setup
region = ee.Geometry.MultiPoint(ee_points)
scale = 10
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

try:
    pixel_values = images.select(band_list).getRegion(region, scale).getInfo()
    cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()
except Exception as e:
    print(f"Failed to retrieve data: {e}")
    exit()

# Convert to pandas dataframes and adjust timestamps
def convert_df(data, timezone='Etc/GMT-12'):
    df = pd.DataFrame(data[1:], columns=data[0])
    df['timestamp'] = pd.to_datetime(df['time'], unit='ms').dt.tz_localize('UTC').dt.tz_convert(timezone)
    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

pixel_values_df = convert_df(pixel_values)
cloud_values_df = convert_df(cloud_values)





In [5]:
pixel_values_df

Unnamed: 0,id,longitude,latitude,time,B2,B3,B4,B8,B8A,B11,B12,timestamp
0,20180827T222541_20180827T222807_T59GMM,170.891412,-43.999258,1535408903936,4720.0,4724.0,4792.0,5224.0,4954.0,2263,2195,2018-08-28 10:28:23
1,20181213T223659_20181213T223656_T59GMM,170.891412,-43.999258,1544740695731,2264.0,2252.0,2136.0,3318.0,3803.0,3274,2938,2018-12-14 10:38:15
2,20181215T222531_20181215T222536_T59GMM,170.891412,-43.999258,1544912896000,8392.0,7808.0,7360.0,7536.0,7384.0,5487,4050,2018-12-16 10:28:16
3,20181218T223651_20181218T223734_T59GMM,170.891412,-43.999258,1545172693000,6812.0,6896.0,6976.0,7540.0,7385.0,3879,2740,2018-12-19 10:38:13
4,20181220T222539_20181220T222533_T59GMM,170.891412,-43.999258,1545344900000,2152.0,2330.0,2066.0,3628.0,3513.0,2676,2317,2018-12-21 10:28:20
...,...,...,...,...,...,...,...,...,...,...,...,...
7177,20240606T222551_20240606T222547_T59GMM,170.891951,-43.999078,1717712911071,42.0,148.0,118.0,21.0,105.0,245,173,2024-06-07 10:28:31
7178,20240609T223711_20240609T223714_T59GMM,170.891951,-43.999078,1717972708111,9416.0,8520.0,8168.0,8448.0,7843.0,2985,3066,2024-06-10 10:38:28
7179,20240611T222549_20240611T222545_T59GMM,170.891951,-43.999078,1718144908758,1990.0,2090.0,2002.0,2668.0,2495.0,1687,1528,2024-06-12 10:28:28
7180,20240614T223709_20240614T223711_T59GMM,170.891951,-43.999078,1718404705157,295.0,288.0,248.0,430.0,541.0,440,370,2024-06-15 10:38:25


In [6]:
merged_df = pd.merge(pixel_values_df, cloud_values_df, on=('timestamp','longitude','latitude'), how='outer', suffixes=('', '_cloud'))

In [31]:
pixel_values_df.to_csv('pixel_values.csv', index=False)
cloud_values_df.to_csv('cloud_values.csv', index=False)

In [32]:
pixel_values_df = pd.read_csv('pixel_values.csv')
cloud_values_df = pd.read_csv('cloud_values.csv')


In [33]:


def merge_dataframes_optimized(pixels_df, clouds_df, points):
    # Pre-filter DataFrames to include only relevant rows
    # Calculate bounds for filtering
    lon_min, lon_max = min(points, key=lambda x: x[0])[0], max(points, key=lambda x: x[0])[0]
    lat_min, lat_max = min(points, key=lambda x: x[1])[1], max(points, key=lambda x: x[1])[1]
    # Expand bounds by 0.0001 to ensure coverage
    lon_min -= 0.0001
    lon_max += 0.0001
    lat_min -= 0.0001
    lat_max += 0.0001
    
    # Filter DataFrames
    pixels_filtered = pixels_df[(pixels_df['longitude'].between(lon_min, lon_max)) & 
                                (pixels_df['latitude'].between(lat_min, lat_max))]
    clouds_filtered = clouds_df[(clouds_df['longitude'].between(lon_min, lon_max)) & 
                                (clouds_df['latitude'].between(lat_min, lat_max))]
    
    merged_dfs = []
    for i, (lon, lat) in enumerate(points, start=1):
        point_pixels = pixels_filtered[(pixels_filtered['longitude'].between(lon - 0.0001, lon + 0.0001)) & 
                                       (pixels_filtered['latitude'].between(lat - 0.0001, lat + 0.0001))]
        point_clouds = clouds_filtered[(clouds_filtered['longitude'].between(lon - 0.0001, lon + 0.0001)) & 
                                       (clouds_filtered['latitude'].between(lat - 0.0001, lat + 0.0001))]
        # Drop 'longitude' and 'latitude' columns before merging
        point_pixels = point_pixels.drop(columns=['longitude', 'latitude'])
        point_clouds = point_clouds.drop(columns=['longitude', 'latitude'])
        
        point_df = pd.merge(point_pixels, point_clouds, on='timestamp', how='outer', suffixes=('', '_cloud'))
        point_df = point_df.rename(columns=lambda x: f'point_{i}_{x}' if x not in ['timestamp'] else x)
        merged_dfs.append(point_df)
    
    # Merge all point DataFrames at once if possible
    merged_df = pd.concat(merged_dfs, axis=1)
    return merged_df

# Example usage
final_df = merge_dataframes_optimized(pixel_values_df, cloud_values_df, points_of_interest)
print(final_df)

: 