In [100]:
import ee
import pandas as pd
ee.Initialize(project='data690-zhouhaomatt')
import pytz



In [101]:


# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

# Convert points to EE objects
ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the central point and buffer for the region of interest (ROI)
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)


In [102]:
# Get the image collection
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
    .filterDate('2018-01-01', '2024-06-19') \
    .filterBounds(roi)
# Get the cloud score data
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED') \
    .filterDate('2018-01-01', '2024-06-19') \
    .filterBounds(roi)




In [103]:
# Setup the data extration pipline

region = ee.Geometry.MultiPoint(ee_points)
scale = 10  # the two data sources remain the same 10 meter resolutions
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']

# Extract sentinel 2 pixel values 

pixel_values = images.select(band_list).getRegion(region, scale).getInfo()


# Extract cloud score values

cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()





In [107]:
# Convert to panda dataframes

pixel_values_df = pd.DataFrame(pixel_values[1:], columns=pixel_values[0])

cloud_values_df = pd.DataFrame(cloud_values[1:], columns=cloud_values[0])

# Convert the original timestamp to UTC and then to UTC+12
pixel_values_df['timestamp'] = pd.to_datetime(pixel_values_df['time'], unit='ms', utc=True).dt.tz_convert('Etc/GMT-12')
cloud_values_df['timestamp'] = pd.to_datetime(cloud_values_df['time'], unit='ms', utc=True).dt.tz_convert('Etc/GMT-12')

# Ensure the timestamp format matches the ground data format
pixel_values_df['timestamp'] = pixel_values_df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
cloud_values_df['timestamp'] = cloud_values_df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')


In [108]:
pixel_values_df

Unnamed: 0,id,longitude,latitude,time,B2,B3,B4,B8,B8A,B11,B12,timestamp
0,20180827T222541_20180827T222807_T59GMM,170.891412,-43.999258,1535408903936,4720.0,4724.0,4792.0,5224.0,4954.0,2263,2195,2018-08-28 10:28:23
1,20181213T223659_20181213T223656_T59GMM,170.891412,-43.999258,1544740695731,2264.0,2252.0,2136.0,3318.0,3803.0,3274,2938,2018-12-14 10:38:15
2,20181215T222531_20181215T222536_T59GMM,170.891412,-43.999258,1544912896000,8392.0,7808.0,7360.0,7536.0,7384.0,5487,4050,2018-12-16 10:28:16
3,20181218T223651_20181218T223734_T59GMM,170.891412,-43.999258,1545172693000,6812.0,6896.0,6976.0,7540.0,7385.0,3879,2740,2018-12-19 10:38:13
4,20181220T222539_20181220T222533_T59GMM,170.891412,-43.999258,1545344900000,2152.0,2330.0,2066.0,3628.0,3513.0,2676,2317,2018-12-21 10:28:20
...,...,...,...,...,...,...,...,...,...,...,...,...
7177,20240606T222551_20240606T222547_T59GMM,170.891951,-43.999078,1717712911071,42.0,148.0,118.0,21.0,105.0,245,173,2024-06-07 10:28:31
7178,20240609T223711_20240609T223714_T59GMM,170.891951,-43.999078,1717972708111,9416.0,8520.0,8168.0,8448.0,7843.0,2985,3066,2024-06-10 10:38:28
7179,20240611T222549_20240611T222545_T59GMM,170.891951,-43.999078,1718144908758,1990.0,2090.0,2002.0,2668.0,2495.0,1687,1528,2024-06-12 10:28:28
7180,20240614T223709_20240614T223711_T59GMM,170.891951,-43.999078,1718404705157,295.0,288.0,248.0,430.0,541.0,440,370,2024-06-15 10:38:25


In [109]:
cloud_values_df

Unnamed: 0,id,longitude,latitude,time,cs,cs_cdf,timestamp
0,20180102T223701_20180102T223655_T59GMM,170.891412,-43.999258,1514932615460,0.431373,0.682353,2018-01-03 10:36:55
1,20180104T222529_20180104T222528_T59GMM,170.891412,-43.999258,1515104728460,0.027451,0.129412,2018-01-05 10:25:28
2,20180107T223659_20180107T223653_T59GMM,170.891412,-43.999258,1515364613460,0.050980,0.101961,2018-01-08 10:36:53
3,20180109T222531_20180109T222529_T59GMM,170.891412,-43.999258,1515536729460,0.003922,0.078431,2018-01-10 10:25:29
4,20180112T223651_20180112T223654_T59GMM,170.891412,-43.999258,1515796614460,0.011765,0.050980,2018-01-13 10:36:54
...,...,...,...,...,...,...,...
8401,20240606T222551_20240606T222547_T59GMM,170.891951,-43.999078,1717712911071,0.831373,0.929412,2024-06-07 10:28:31
8402,20240609T223711_20240609T223714_T59GMM,170.891951,-43.999078,1717972708111,0.011765,0.062745,2024-06-10 10:38:28
8403,20240611T222549_20240611T222545_T59GMM,170.891951,-43.999078,1718144908758,0.341176,0.564706,2024-06-12 10:28:28
8404,20240614T223709_20240614T223711_T59GMM,170.891951,-43.999078,1718404705157,0.356863,0.635294,2024-06-15 10:38:25
