In [1]:
import ee
import pandas as pd
ee.Initialize(project='data690-zhouhaomatt')
import pytz



In [2]:


# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

# Convert points to EE objects
ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the central point and buffer for the region of interest (ROI)
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)


In [3]:
# Get the image collection
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
    .filterDate('2018-01-01', '2024-06-19') \
    .filterBounds(roi)
# Get the cloud score data
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED') \
    .filterDate('2018-01-01', '2024-06-19') \
    .filterBounds(roi)


In [4]:
# Setup the data extration pipline

region = ee.Geometry.MultiPoint(ee_points)
scale = 10  # the two data sources remain the same 10 meter resolutions
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']


# Extract sentinel 2 pixel values 
pixel_values = images.select(band_list).getRegion(region, scale).getInfo()

# Extract cloud score values
cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()





In [12]:
pixel_values[:3]

[['id',
  'longitude',
  'latitude',
  'time',
  'B2',
  'B3',
  'B4',
  'B8',
  'B8A',
  'B11',
  'B12'],
 ['20180827T222541_20180827T222807_T59GMM',
  170.89141231635503,
  -43.99925803735313,
  1535408903936,
  4720,
  4724,
  4792,
  5224,
  4954,
  2263,
  2195],
 ['20181213T223659_20181213T223656_T59GMM',
  170.89141231635503,
  -43.99925803735313,
  1544740695731,
  2264,
  2252,
  2136,
  3318,
  3803,
  3274,
  2938]]

In [5]:
# Convert to panda dataframes

pixel_values_df = pd.DataFrame(pixel_values[1:], columns=pixel_values[0])

cloud_values_df = pd.DataFrame(cloud_values[1:], columns=cloud_values[0])

# Convert the original timestamp to UTC and then to UTC+12
pixel_values_df['timestamp'] = pd.to_datetime(pixel_values_df['time'], unit='ms', utc=True).dt.tz_convert('Etc/GMT-12')
cloud_values_df['timestamp'] = pd.to_datetime(cloud_values_df['time'], unit='ms', utc=True).dt.tz_convert('Etc/GMT-12')

# Ensure the timestamp format matches the ground data format
pixel_values_df['timestamp'] = pixel_values_df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
cloud_values_df['timestamp'] = cloud_values_df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')


In [13]:
pixel_values_df[:3]

Unnamed: 0,id,longitude,latitude,time,B2,B3,B4,B8,B8A,B11,B12,timestamp
0,20180827T222541_20180827T222807_T59GMM,170.891412,-43.999258,1535408903936,4720.0,4724.0,4792.0,5224.0,4954.0,2263,2195,2018-08-28 10:28:23
1,20181213T223659_20181213T223656_T59GMM,170.891412,-43.999258,1544740695731,2264.0,2252.0,2136.0,3318.0,3803.0,3274,2938,2018-12-14 10:38:15
2,20181215T222531_20181215T222536_T59GMM,170.891412,-43.999258,1544912896000,8392.0,7808.0,7360.0,7536.0,7384.0,5487,4050,2018-12-16 10:28:16


In [14]:
cloud_values_df[:3]

Unnamed: 0,id,longitude,latitude,time,cs,cs_cdf,timestamp
0,20180102T223701_20180102T223655_T59GMM,170.891412,-43.999258,1514932615460,0.431373,0.682353,2018-01-03 10:36:55
1,20180104T222529_20180104T222528_T59GMM,170.891412,-43.999258,1515104728460,0.027451,0.129412,2018-01-05 10:25:28
2,20180107T223659_20180107T223653_T59GMM,170.891412,-43.999258,1515364613460,0.05098,0.101961,2018-01-08 10:36:53


In [6]:
for i, point in enumerate(points_of_interest):
    # Filter rows for each point
    selected_rows = pixel_values_df[(pixel_values_df['longitude'].between(point[0]-0.0001, point[0]+0.0001)) & 
                                    (pixel_values_df['latitude'].between(point[1]-0.0001, point[1]+0.0001))]
    
    # Save the selected rows as a CSV file with a structured name
    selected_rows.to_csv(f'point_{i+1}_pixel_values.csv', index=False)

In [7]:
cloud_values_df

Unnamed: 0,id,longitude,latitude,time,cs,cs_cdf,timestamp
0,20180102T223701_20180102T223655_T59GMM,170.891412,-43.999258,1514932615460,0.431373,0.682353,2018-01-03 10:36:55
1,20180104T222529_20180104T222528_T59GMM,170.891412,-43.999258,1515104728460,0.027451,0.129412,2018-01-05 10:25:28
2,20180107T223659_20180107T223653_T59GMM,170.891412,-43.999258,1515364613460,0.050980,0.101961,2018-01-08 10:36:53
3,20180109T222531_20180109T222529_T59GMM,170.891412,-43.999258,1515536729460,0.003922,0.078431,2018-01-10 10:25:29
4,20180112T223651_20180112T223654_T59GMM,170.891412,-43.999258,1515796614460,0.011765,0.050980,2018-01-13 10:36:54
...,...,...,...,...,...,...,...
8401,20240606T222551_20240606T222547_T59GMM,170.891951,-43.999078,1717712911071,0.831373,0.929412,2024-06-07 10:28:31
8402,20240609T223711_20240609T223714_T59GMM,170.891951,-43.999078,1717972708111,0.011765,0.062745,2024-06-10 10:38:28
8403,20240611T222549_20240611T222545_T59GMM,170.891951,-43.999078,1718144908758,0.341176,0.564706,2024-06-12 10:28:28
8404,20240614T223709_20240614T223711_T59GMM,170.891951,-43.999078,1718404705157,0.356863,0.635294,2024-06-15 10:38:25


In [8]:
for i, point in enumerate(points_of_interest):
    # Filter rows for each point
    selected_rows = cloud_values_df[(cloud_values_df['longitude'].between(point[0]-0.0001, point[0]+0.0001)) & 
                                    (cloud_values_df['latitude'].between(point[1]-0.0001, point[1]+0.0001))]
    
    # Save the selected rows as a CSV file with a structured name
    selected_rows.to_csv(f'point_{i+1}_cloud_scores.csv', index=False)

In [31]:
selected_rows

Unnamed: 0,id,longitude,latitude,time,B2,B3,B4,B8,B8A,B11,B12,timestamp


In [44]:
pt1_merge

Unnamed: 0_level_0,id_x,longitude_x,latitude_x,time_x,B2,B3,B4,B8,B8A,B11,B12,id_y,longitude_y,latitude_y,time_y,cs,cs_cdf
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-01-03 10:36:55,,,,,,,,,,,,20180102T223701_20180102T223655_T59GMM,170.891412,-43.999258,1.514933e+12,0.431373,0.682353
2018-01-03 10:36:55,,,,,,,,,,,,20180102T223701_20180102T223655_T59GMM,170.891412,-43.999168,1.514933e+12,0.427451,0.678431
2018-01-05 10:25:28,,,,,,,,,,,,20180104T222529_20180104T222528_T59GMM,170.891412,-43.999258,1.515105e+12,0.027451,0.129412
2018-01-05 10:25:28,,,,,,,,,,,,20180104T222529_20180104T222528_T59GMM,170.891412,-43.999168,1.515105e+12,0.027451,0.129412
2018-01-08 10:36:53,,,,,,,,,,,,20180107T223659_20180107T223653_T59GMM,170.891412,-43.999258,1.515365e+12,0.050980,0.101961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-15 10:38:25,20240614T223709_20240614T223711_T59GMM,170.891412,-43.999168,1.718405e+12,482.0,464.0,421.0,812.0,1011.0,792.0,715.0,20240614T223709_20240614T223711_T59GMM,170.891412,-43.999168,1.718405e+12,0.329412,0.568627
2024-06-17 10:28:31,20240616T222551_20240616T222548_T59GMM,170.891412,-43.999258,1.718577e+12,2908.0,2866.0,3016.0,3536.0,3378.0,1287.0,1397.0,20240616T222551_20240616T222548_T59GMM,170.891412,-43.999258,1.718577e+12,0.019608,0.090196
2024-06-17 10:28:31,20240616T222551_20240616T222548_T59GMM,170.891412,-43.999258,1.718577e+12,2908.0,2866.0,3016.0,3536.0,3378.0,1287.0,1397.0,20240616T222551_20240616T222548_T59GMM,170.891412,-43.999168,1.718577e+12,0.019608,0.090196
2024-06-17 10:28:31,20240616T222551_20240616T222548_T59GMM,170.891412,-43.999168,1.718577e+12,2886.0,2908.0,2990.0,3440.0,3378.0,1287.0,1397.0,20240616T222551_20240616T222548_T59GMM,170.891412,-43.999258,1.718577e+12,0.019608,0.090196


In [None]:


# Define the points of interest
points_of_interest = [
    (170.89142642028511, -43.99924808088317),
    (170.89155129827327, -43.99924808088317),
    (170.89167617626143, -43.99924808088317),
    (170.89180105424958, -43.99924808088317),
    (170.89192593223774, -43.99924808088317),
    (170.89142642028511, -43.99915824976567),
    (170.89192593223774, -43.99915824976567),
    (170.89142642028511, -43.99906841864817),
    (170.89192593223774, -43.99906841864817)
]

# Convert points to EE objects
ee_points = [ee.Geometry.Point(lon, lat) for lon, lat in points_of_interest]

# Define the central point and buffer for the region of interest (ROI)
central_point = ee.Geometry.Point(170.89167617626143, -43.99906841864817)
roi = central_point.buffer(30)


In [None]:
# Get the image collection
images = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
    .filterDate('2018-01-01', '2024-06-19') \
    .filterBounds(roi)
# Get the cloud score data
clouds = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED') \
    .filterDate('2018-01-01', '2024-06-19') \
    .filterBounds(roi)




In [None]:
# Setup the data extration pipline

region = ee.Geometry.MultiPoint(ee_points)
scale = 10  # the two data sources remain the same 10 meter resolutions
band_list = ['B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']
cloud_bands = ['cs', 'cs_cdf']



# Extract sentinel 2 pixel values 

pixel_values = images.select(band_list).getRegion(region, scale).getInfo()


# Extract cloud score values

cloud_values = clouds.select(cloud_bands).getRegion(region, scale).getInfo()





In [None]:
# Convert to panda dataframes

pixel_values_df = pd.DataFrame(pixel_values[1:], columns=pixel_values[0])

cloud_values_df = pd.DataFrame(cloud_values[1:], columns=cloud_values[0])

# Convert the original timestamp to UTC and then to UTC+12
pixel_values_df['timestamp'] = pd.to_datetime(pixel_values_df['time'], unit='ms', utc=True).dt.tz_convert('Etc/GMT-12')
cloud_values_df['timestamp'] = pd.to_datetime(cloud_values_df['time'], unit='ms', utc=True).dt.tz_convert('Etc/GMT-12')

# Ensure the timestamp format matches the ground data format
pixel_values_df['timestamp'] = pixel_values_df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
cloud_values_df['timestamp'] = cloud_values_df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')


In [None]:
for i, point in enumerate(points_of_interest):
    # Filter rows for each point
    selected_rows = pixel_values_df[(pixel_values_df['longitude'].between(point[0]-0.0001, point[0]+0.0001)) & 
                                    (pixel_values_df['latitude'].between(point[1]-0.0001, point[1]+0.0001))]
    
    # Save the selected rows as a CSV file with a structured name
    selected_rows.to_csv(f'point_{i+1}_pixel_values.csv', index=False)

In [None]:
cloud_values_df

Unnamed: 0,id,longitude,latitude,time,cs,cs_cdf,timestamp
0,20180102T223701_20180102T223655_T59GMM,170.891412,-43.999258,1514932615460,0.431373,0.682353,2018-01-03 10:36:55
1,20180104T222529_20180104T222528_T59GMM,170.891412,-43.999258,1515104728460,0.027451,0.129412,2018-01-05 10:25:28
2,20180107T223659_20180107T223653_T59GMM,170.891412,-43.999258,1515364613460,0.050980,0.101961,2018-01-08 10:36:53
3,20180109T222531_20180109T222529_T59GMM,170.891412,-43.999258,1515536729460,0.003922,0.078431,2018-01-10 10:25:29
4,20180112T223651_20180112T223654_T59GMM,170.891412,-43.999258,1515796614460,0.011765,0.050980,2018-01-13 10:36:54
...,...,...,...,...,...,...,...
8401,20240606T222551_20240606T222547_T59GMM,170.891951,-43.999078,1717712911071,0.831373,0.929412,2024-06-07 10:28:31
8402,20240609T223711_20240609T223714_T59GMM,170.891951,-43.999078,1717972708111,0.011765,0.062745,2024-06-10 10:38:28
8403,20240611T222549_20240611T222545_T59GMM,170.891951,-43.999078,1718144908758,0.341176,0.564706,2024-06-12 10:28:28
8404,20240614T223709_20240614T223711_T59GMM,170.891951,-43.999078,1718404705157,0.356863,0.635294,2024-06-15 10:38:25


In [None]:
for i, point in enumerate(points_of_interest):
    # Filter rows for each point
    selected_rows = cloud_values_df[(cloud_values_df['longitude'].between(point[0]-0.0001, point[0]+0.0001)) & 
                                    (cloud_values_df['latitude'].between(point[1]-0.0001, point[1]+0.0001))]
    
    # Save the selected rows as a CSV file with a structured name
    selected_rows.to_csv(f'point_{i+1}_cloud_scores.csv', index=False)

In [None]:
selected_rows

Unnamed: 0,id,longitude,latitude,time,B2,B3,B4,B8,B8A,B11,B12,timestamp


In [None]:
pt1_value  = pd.read_csv('point_1_pixel_values.csv')
pt1_cloud = pd.read_csv('point_1_cloud_scores.csv')

pt1_merge= pd.merge(pt1_value, pt1_cloud, on='timestamp', how='outer')