In [7]:
import io
import sys
import boto3
import h3pandas
import h3
import fiona
import time
import os
import shapely
import geopandas as gpd
import pandas as pd
from unfolded.map_sdk import UnfoldedMap
# from sidecar import Sidecar
from uuid import uuid4
from pyproj import Geod
from shapely import wkt

In [9]:
bucket = 'ffdp-data-general-stage'
folder_path = "geospatial/field_boundary_detection/kml/" 

In [None]:
def get_aws_creds():
    cred = boto3.Session().get_credentials()
    aws_access_key_id=cred.access_key
    aws_secret_access_key=cred.secret_key
    aws_session_token=cred.token
    return aws_access_key_id, aws_secret_access_key, aws_session_token

In [10]:
def initiate_s3_connection():
    aws_access_key_id, aws_secret_access_key, aws_session_token = get_aws_creds()
    client = boto3.client(
    's3',
    aws_access_key_id = aws_access_key_id,
    aws_secret_access_key = aws_secret_access_key,
    aws_session_token = aws_session_token)
    s3 = boto3.resource('s3')
    my_bucket = s3.Bucket('ffdp-data-general-stage')
    return my_bucket,client

In [11]:
def get_ind_h3_indexed_fr(df, h3_resolution):  #generate h3 grids and h3 indices for each file (geopandas dataframe) 
#     print(f'crs system info :{df.crs}')
    df['geometry'] = df['geometry'].apply(lambda x: x if not x.has_z else shapely.wkb.loads(shapely.wkb.dumps(x, output_dimension=2)))
    df = df.loc[df.geometry.geometry.type != 'Point']
    gdf_h3 = df.h3.polyfill(resolution=h3_resolution, explode=True) #fill polygons with hexagon-grids with the given resolution
    gdf_h3_area = gdf_h3.copy()
    gdf_h3_area = gdf_h3_area.dropna(subset=['h3_polyfill'])
    gdf_h3_area=gdf_h3_area.set_index('h3_polyfill')
    gdf_h3_area=gdf_h3_area.h3.cell_area(unit='m^2')
    gdf_h3_area = gdf_h3_area.reset_index()
    gdf_h3_area['geometry_str'] = gdf_h3_area['geometry'].apply(lambda x: str(x))
    gdf_h3_area['area(hectares)'] =  gdf_h3_area['h3_cell_area'].apply(lambda x: x/10000)
    return gdf_h3_area

In [12]:
def get_gross_area(final_fr,resolution):
    gross_area = round(final_fr.h3_cell_area.sum()/10000,2)
    print(f'Gross area covered: {gross_area} hectares')
    unique_fr = final_fr.drop_duplicates(subset=['h3_cell_area'])
    actual_area = round(unique_fr.h3_cell_area.sum()/10000,2)
    print(f'Net area covered: {actual_area} hectares')
#     resolution_check = final_fr.dissolve(by='geometry_str',aggfunc='sum')
#     resolution_check = final_fr.reset_index()
#     resolution_check = resolution_check[['geometry', 'file_name', 'h3_cell_area','area(hectares)']]
#     resolution_check['h3_resolution'] = resolution
#     print(resolution_check.head())
    return gross_area,actual_area

In [13]:
final_fr = pd.DataFrame()

def main(resolution):
    tic = time.time()
    bucket_connection,client = initiate_s3_connection()
    combined_h3_indexed_frame = get_combined_h3_indexed_frame(bucket_connection,client,resolution)
    gross_area, net_area = get_gross_area(combined_h3_indexed_frame,resolution)
    toc = time.time()
    print(f'total time taken to calculate area under management at resolution {resolution}: {toc-tic} seconds ')
    print(combined_h3_indexed_frame.head())
    return combined_h3_indexed_frame
    
def get_combined_h3_indexed_frame(bucket_connection,client,resolution):
    combined_h3_fr = pd.DataFrame()
    file_count = 0
    for file in bucket_connection.objects.all():
        file_key = file.key
        if folder_path in file_key:
            obj = client.get_object(Bucket=bucket, Key=file_key)
            ind_fr = read_file_to_fr(obj,file_key)
            if not ind_fr.empty:
                file_count = file_count+1
#                 print(ind_fr.head())
                ind_h3_fr = get_ind_h3_indexed_fr(ind_fr,resolution) 
                combined_h3_fr = combined_h3_fr.append(ind_h3_fr)
            else:
                file_count = file_count+1
                print(f"{file_key.split('/')[-1]} is empty")
                continue
    print(f'Total files read: {file_count}')
    return combined_h3_fr
    
    
def read_file_to_fr(obj,file_key):        #read the original files and return the geopnadas dataframe
    gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
    if file_key.endswith('.kml'):
        fr = gpd.read_file(io.BytesIO(obj['Body'].read()),driver='KML')
    elif file_key.endswith('.geojson'):
        fr = gpd.read_file(io.BytesIO(obj['Body'].read()))
    if not fr.empty:
        fr = fr.dropna(subset=['geometry'])
        fr = fr[['geometry']]
        fr['file_name'] = file_key.split('/')[-1]
    return fr

# print(f'total number of files:{i}')
# print(f'gross area fr \n {gross_area_fr.head()}')

In [14]:
combined_h3_indexed_frame = main(10)

Бургуджи поле № 6-7 к, площадь 120 га (1).kml is empty
Бургуджи поле № 6-7 к, площадь 120 га.kml is empty
Total files read: 96
Gross area covered: 36316.02 hectares
Net area covered: 29064.37 hectares
total time taken to calculate area under management at resolution 10: 58.181480884552 seconds 
       h3_polyfill                                           geometry  \
0  8a2d659008affff  MULTIPOLYGON (((36.06552 49.62685, 36.07158 49...   
1  8a2d659008e7fff  MULTIPOLYGON (((36.06552 49.62685, 36.07158 49...   
2  8a2d659008dffff  MULTIPOLYGON (((36.06552 49.62685, 36.07158 49...   
3  8a2d65900d4ffff  MULTIPOLYGON (((36.06552 49.62685, 36.07158 49...   
4  8a2d65900ba7fff  MULTIPOLYGON (((36.06552 49.62685, 36.07158 49...   

  file_name  h3_cell_area                                       geometry_str  \
0     1.kml  13956.336395  MULTIPOLYGON (((36.06551576614341 49.626849498...   
1     1.kml  13955.730878  MULTIPOLYGON (((36.06551576614341 49.626849498...   
2     1.kml  13955.729263

In [15]:
combined_h3_indexed_frame.sort_values('h3_polyfill')

Unnamed: 0,h3_polyfill,geometry,file_name,h3_cell_area,geometry_str,area(hectares)
210,8a1192409327fff,"MULTIPOLYGON (((37.88835 49.91300, 37.88916 49...",Ватутін+Гиринко+Зоря+Геліантус+Агроінвест.kml,14271.084387,MULTIPOLYGON (((37.888345854879 49.91300167798...,1.427108
146,8a1192409327fff,"MULTIPOLYGON (((37.88835 49.91300, 37.88916 49...",Зоря+Гиренко+Геліантус+Агроінвест.kml,14271.084387,MULTIPOLYGON (((37.888345854879 49.91300167798...,1.427108
118,8a1192409a07fff,"MULTIPOLYGON (((37.88916 49.91441, 37.88835 49...",Зоря+Гиренко+Геліантус+Агроінвест.kml,14270.183908,MULTIPOLYGON (((37.8891573359755 49.9144061644...,1.427018
175,8a1192409a07fff,"MULTIPOLYGON (((37.88916 49.91441, 37.88835 49...",Ватутін+Гиринко+Зоря+Геліантус+Агроінвест.kml,14270.183908,MULTIPOLYGON (((37.8891573359755 49.9144061644...,1.427018
209,8a1192409a1ffff,"MULTIPOLYGON (((37.88835 49.91300, 37.88916 49...",Ватутін+Гиринко+Зоря+Геліантус+Агроінвест.kml,14270.483803,MULTIPOLYGON (((37.888345854879 49.91300167798...,1.427048
...,...,...,...,...,...,...
523,8a2d65b6eb6ffff,"MULTIPOLYGON (((36.66914 50.01025, 36.66919 50...",Ватутін+Гиринко+Зоря+Геліантус+Агроінвест.kml,14025.354875,MULTIPOLYGON (((36.6691418297865 50.0102456974...,1.402535
528,8a2d65b6eb77fff,"MULTIPOLYGON (((36.66914 50.01025, 36.66919 50...",Ватутін+Гиринко+Зоря+Геліантус+Агроінвест.kml,14025.057351,MULTIPOLYGON (((36.6691418297865 50.0102456974...,1.402506
464,8a2d65b6eb77fff,"MULTIPOLYGON (((36.66914 50.01025, 36.66919 50...",Зоря+Гиренко+Геліантус+Агроінвест.kml,14025.057351,MULTIPOLYGON (((36.6691418297865 50.0102456974...,1.402506
477,8a2d65b6ebaffff,"MULTIPOLYGON (((36.66042 50.00342, 36.67056 50...",Зоря+Гиренко+Геліантус+Агроінвест.kml,14025.066408,MULTIPOLYGON (((36.6604199879793 50.0034233607...,1.402507


# Display in unfloded

In [25]:
from unfolded.map_sdk import UnfoldedMap
widget = UnfoldedMap()

In [8]:
# # With a UUID:
dataset_id = uuid4()

# Or with a custom string
# dataset_id = 'h3-data'
widget.add_dataset({
    'uuid': dataset_id,
    'label': 'h3_dataset',
    'data': gross_area_fr_13
})

<Future pending>

In [13]:
# # With a UUID:
dup_dataset_id = uuid4()

# Or with a custom string
# dataset_id = 'h3-data'
widget.add_dataset({
    'uuid': dup_dataset_id,
    'label': 'h3_dup_dataset',
    'data': duplicate
})

In [10]:
widget

UnfoldedMap()

In [None]:
final_fr.sort_values('file_name')