This notebook transforms pre-computed area/gadm/natural lands zonal stats data into usable format for zeno tooling

In [1]:
import pandas as pd
from dateutil.relativedelta import relativedelta
from datetime import date

In [2]:
numeric_to_alpha3 = {
    4: 'AFG', 248: 'ALA', 8: 'ALB', 12: 'DZA', 16: 'ASM', 20: 'AND', 24: 'AGO', 660: 'AIA',
    10: 'ATA', 28: 'ATG', 32: 'ARG', 51: 'ARM', 533: 'ABW', 36: 'AUS', 40: 'AUT', 31: 'AZE',
    44: 'BHS', 48: 'BHR', 50: 'BGD', 52: 'BRB', 112: 'BLR', 56: 'BEL', 84: 'BLZ', 204: 'BEN',
    60: 'BMU', 64: 'BTN', 68: 'BOL', 535: 'BES', 70: 'BIH', 72: 'BWA', 74: 'BVT', 76: 'BRA',
    86: 'IOT', 96: 'BRN', 100: 'BGR', 854: 'BFA', 108: 'BDI', 132: 'CPV', 116: 'KHM', 120: 'CMR',
    124: 'CAN', 136: 'CYM', 140: 'CAF', 148: 'TCD', 152: 'CHL', 156: 'CHN', 162: 'CXR', 166: 'CCK',
    170: 'COL', 174: 'COM', 178: 'COG', 180: 'COD', 184: 'COK', 188: 'CRI', 384: 'CIV', 191: 'HRV',
    192: 'CUB', 531: 'CUW', 196: 'CYP', 203: 'CZE', 208: 'DNK', 262: 'DJI', 212: 'DMA', 214: 'DOM',
    218: 'ECU', 818: 'EGY', 222: 'SLV', 226: 'GNQ', 232: 'ERI', 233: 'EST', 748: 'SWZ', 231: 'ETH',
    238: 'FLK', 234: 'FRO', 242: 'FJI', 246: 'FIN', 250: 'FRA', 254: 'GUF', 258: 'PYF', 260: 'ATF',
    266: 'GAB', 270: 'GMB', 268: 'GEO', 276: 'DEU', 288: 'GHA', 292: 'GIB', 300: 'GRC', 304: 'GRL',
    308: 'GRD', 312: 'GLP', 316: 'GUM', 320: 'GTM', 831: 'GGY', 324: 'GIN', 624: 'GNB', 328: 'GUY',
    332: 'HTI', 334: 'HMD', 336: 'VAT', 340: 'HND', 344: 'HKG', 348: 'HUN', 352: 'ISL', 356: 'IND',
    360: 'IDN', 364: 'IRN', 368: 'IRQ', 372: 'IRL', 833: 'IMN', 376: 'ISR', 380: 'ITA', 388: 'JAM',
    392: 'JPN', 832: 'JEY', 400: 'JOR', 398: 'KAZ', 404: 'KEN', 296: 'KIR', 408: 'PRK', 410: 'KOR',
    414: 'KWT', 417: 'KGZ', 418: 'LAO', 428: 'LVA', 422: 'LBN', 426: 'LSO', 430: 'LBR', 434: 'LBY',
    438: 'LIE', 440: 'LTU', 442: 'LUX', 446: 'MAC', 450: 'MDG', 454: 'MWI', 458: 'MYS', 462: 'MDV',
    466: 'MLI', 470: 'MLT', 584: 'MHL', 474: 'MTQ', 478: 'MRT', 480: 'MUS', 175: 'MYT', 484: 'MEX',
    583: 'FSM', 498: 'MDA', 492: 'MCO', 496: 'MNG', 499: 'MNE', 500: 'MSR', 504: 'MAR', 508: 'MOZ',
    104: 'MMR', 516: 'NAM', 520: 'NRU', 524: 'NPL', 528: 'NLD', 540: 'NCL', 554: 'NZL', 558: 'NIC',
    562: 'NER', 566: 'NGA', 570: 'NIU', 574: 'NFK', 807: 'MKD', 580: 'MNP', 578: 'NOR', 512: 'OMN',
    586: 'PAK', 585: 'PLW', 275: 'PSE', 591: 'PAN', 598: 'PNG', 600: 'PRY', 604: 'PER', 608: 'PHL',
    612: 'PCN', 616: 'POL', 620: 'PRT', 630: 'PRI', 634: 'QAT', 638: 'REU', 642: 'ROU', 643: 'RUS',
    646: 'RWA', 652: 'BLM', 654: 'SHN', 659: 'KNA', 662: 'LCA', 663: 'MAF', 666: 'SPM', 670: 'VCT',
    882: 'WSM', 674: 'SMR', 678: 'STP', 682: 'SAU', 686: 'SEN', 688: 'SRB', 690: 'SYC', 694: 'SLE',
    702: 'SGP', 534: 'SXM', 703: 'SVK', 705: 'SVN', 90: 'SLB', 706: 'SOM', 710: 'ZAF', 239: 'SGS',
    728: 'SSD', 724: 'ESP', 144: 'LKA', 729: 'SDN', 740: 'SUR', 744: 'SJM', 752: 'SWE', 756: 'CHE',
    760: 'SYR', 158: 'TWN', 762: 'TJK', 834: 'TZA', 764: 'THA', 626: 'TLS', 768: 'TGO', 772: 'TKL',
    776: 'TON', 780: 'TTO', 788: 'TUN', 792: 'TUR', 795: 'TKM', 796: 'TCA', 798: 'TUV', 800: 'UGA',
    804: 'UKR', 784: 'ARE', 826: 'GBR', 840: 'USA', 581: 'UMI', 858: 'URY', 860: 'UZB', 548: 'VUT',
    862: 'VEN', 704: 'VNM', 92: 'VGB', 850: 'VIR', 876: 'WLF', 732: 'ESH', 887: 'YEM', 894: 'ZMB',
    716: 'ZWE'
}

In [3]:
sbtn_natural_lands_classes = {
    2: "Forest",
    3: "Short vegetation",
    4: "Water",
    5: "Mangroves",
    6: "Bare",
    7: "Snow/Ice",
    8: "Wetland forest",
    9: "Peat forest",
    10: "Wetland short vegetation",
    11: "Peat short vegetation",
    12: "Cropland",
    13: "Built-up",
    14: "Tree cover",
    15: "Short vegetation",
    16: "Water",
    17: "Wetland tree cover",
    18: "Peat tree cover",
    19: "Wetland short vegetation",
    20: "Peat short vegetation",
    21: "Bare"
}

In [4]:
alerts_confidence = {
    2: "low",
    3: "high"
}

In [5]:
adm2 = pd.read_parquet('s3://gfw-data-lake/sbtn_natural_lands/zarr/area_by_natural_lands_all_adm2_raw.parquet')

In [6]:
adm2['countries'] = adm2.countries.apply(lambda x: numeric_to_alpha3.get(x, None))

In [7]:
adm2.dropna(subset='countries', inplace=True)

In [8]:
adm2.rename(columns={'value': 'area'}, inplace=True)

In [9]:
adm2

Unnamed: 0,countries,regions,subregions,natural_lands,area
0,AFG,1,1,2,8.022464e+04
1,AFG,1,1,3,1.931114e+09
2,AFG,1,1,4,2.087068e+06
3,AFG,1,1,6,9.407709e+08
4,AFG,1,1,7,1.962260e+06
...,...,...,...,...,...
259086,GUF,2,7,10,4.089954e+06
259087,GUF,2,7,11,2.964203e+05
259088,GUF,2,7,12,4.491208e+07
259089,GUF,2,7,13,2.111077e+07


In [10]:
adm2['natural_lands'] = adm2.natural_lands.apply(lambda x: sbtn_natural_lands_classes.get(x, 'unclassified'))

In [11]:
adm2

Unnamed: 0,countries,regions,subregions,natural_lands,area
0,AFG,1,1,Forest,8.022464e+04
1,AFG,1,1,Short vegetation,1.931114e+09
2,AFG,1,1,Water,2.087068e+06
3,AFG,1,1,Bare,9.407709e+08
4,AFG,1,1,Snow/Ice,1.962260e+06
...,...,...,...,...,...
259086,GUF,2,7,Wetland short vegetation,4.089954e+06
259087,GUF,2,7,Peat short vegetation,2.964203e+05
259088,GUF,2,7,Cropland,4.491208e+07
259089,GUF,2,7,Built-up,2.111077e+07


In [13]:
adm2.to_parquet('s3://gfw-data-lake/sbtn_natural_lands/zarr/area_by_natural_lands_all_adm2.parquet', index=False)

In [15]:
adm2.head()

Unnamed: 0,countries,regions,subregions,natural_lands,area
1481,AFG,1,0,unclassified,30537040.0
1482,AFG,1,0,Forest,1370876.0
1483,AFG,1,0,Short vegetation,69729320000.0
1484,AFG,1,0,Water,177844100.0
1485,AFG,1,0,Bare,90772250000.0
