# Compile Landuse stats into Admin Polygons
For Landuse we are downloading ECA WorldView cover. We also have district admin2 polygons for Ghana. We want to compile metrics for each admin that has the percentage and area of each landuse class. To do this, we will do zonal statistics using rasterstats.



In [60]:
import sys, os, importlib
import rasterio, geojson
import numpy as np
import pandas as pd
import geopandas as gpd

In [61]:
sys.path.append(r"/Users/tomgertin/repos/gostrocks/src")
import GOSTRocks.rasterMisc as rMisc
importlib.reload(rMisc)

<module 'GOSTRocks.rasterMisc' from '/Users/tomgertin/repos/gostrocks/src/GOSTRocks/rasterMisc.py'>

In [62]:
from rasterstats import zonal_stats

# Input District Admin Areas

In [63]:
# define admin
inAdmin = gpd.read_file(r"/Users/tomgertin/Documents/world_bank/CityScan/ghana_landuse_product/input/ghana_districts_mollweide.shp")


In [64]:
inAdmin.crs

<Projected CRS: ESRI:54009>
Name: World_Mollweide
Axis Info [cartesian]:
- [east]: Easting (metre)
- [north]: Northing (metre)
Area of Use:
- undefined
Coordinate Operation:
- name: unnamed
- method: Mollweide
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Import WordCover raster
In QGIS I merged the different rasters into one raster, then saved it as mollweide projection because we will calculate area.

In [65]:
merged_world_cover = r"input/world_cover_ghana_merged_mollweide2.tif"


## User zonal stats to get the categories for each Admin

In [68]:
stats = zonal_stats(inAdmin,
            merged_world_cover,
            categorical=True)

















In [69]:
stats

[{10: 23181,
  20: 52621,
  30: 141501,
  40: 5205,
  50: 684592,
  60: 6215,
  80: 2088,
  90: 511},
 {10: 3557,
  20: 338,
  30: 25123,
  40: 744,
  50: 307812,
  60: 5917,
  80: 1123,
  90: 12},
 {10: 47372,
  20: 711989,
  30: 3943481,
  40: 11385600,
  50: 66906,
  60: 7067,
  80: 10643,
  90: 9039},
 {10: 1455696,
  20: 20317985,
  30: 24388333,
  40: 4326864,
  50: 90190,
  60: 11088,
  80: 1646,
  90: 3231},
 {10: 64269,
  20: 1371856,
  30: 3003332,
  40: 1929210,
  50: 430038,
  60: 16082,
  80: 4093,
  90: 2010},
 {10: 162466,
  20: 1369186,
  30: 6028960,
  40: 12904081,
  50: 77849,
  60: 8015,
  80: 59423,
  90: 1806},
 {10: 86542,
  20: 2101905,
  30: 7769490,
  40: 3856225,
  50: 138716,
  60: 23269,
  80: 10914,
  90: 6774},
 {10: 46255,
  20: 2347743,
  30: 9850556,
  40: 19125781,
  50: 113437,
  60: 9462,
  80: 430,
  90: 894},
 {10: 306454,
  20: 5382264,
  30: 9561568,
  40: 8744700,
  50: 105915,
  60: 16630,
  80: 9199,
  90: 8213},
 {10: 328259,
  20: 4971401,


In [70]:
inAdmin

Unnamed: 0,shapeName,shapeISO,shapeID,shapeGroup,shapeType,geometry
0,Adenta Municipal,,2480657B31663195588056,GHA,ADM2,"POLYGON ((-9732.475 702346.917, -8374.825 7016..."
1,Ledzokuku Municipal,,2480657B81218626769676,GHA,ADM2,"POLYGON ((-14032.270 696375.455, -12774.748 69..."
2,Chereponi,,2480657B59683408463573,GHA,ADM2,"POLYGON ((7115.078 1229905.419, 7125.975 12312..."
3,Wa East,,2480657B86087610655767,GHA,ADM2,"POLYGON ((-165103.465 1255389.061, -164705.267..."
4,Wa Municipal,,2480657B6168445767624,GHA,ADM2,"POLYGON ((-238618.736 1254044.427, -238555.285..."
...,...,...,...,...,...,...
256,Akwapem North,,2480657B49720933786015,GHA,ADM2,"POLYGON ((-1260.243 740845.856, -812.879 74053..."
257,Fanteakwa South,,2480657B97880290767835,GHA,ADM2,"POLYGON ((-20445.994 800147.817, -20450.484 80..."
258,Birim Central Municipal,,2480657B13510680910521,GHA,ADM2,"POLYGON ((-94840.785 735523.423, -93923.241 73..."
259,Hohoe Municipal,,2480657B21040345642773,GHA,ADM2,"POLYGON ((39032.925 892526.691, 39611.139 8826..."


In [71]:
stats_df = pd.DataFrame(stats)

In [72]:
stats_df

Unnamed: 0,10,20,30,40,50,60,80,90,95
0,23181,52621.0,141501,5205.0,684592,6215.0,2088.0,511.0,
1,3557,338.0,25123,744.0,307812,5917.0,1123.0,12.0,
2,47372,711989.0,3943481,11385600.0,66906,7067.0,10643.0,9039.0,
3,1455696,20317985.0,24388333,4326864.0,90190,11088.0,1646.0,3231.0,
4,64269,1371856.0,3003332,1929210.0,430038,16082.0,4093.0,2010.0,
...,...,...,...,...,...,...,...,...,...
256,3064704,231865.0,147982,323.0,164851,1551.0,7.0,,
257,2450674,70201.0,445701,3045.0,85657,26169.0,3265.0,9.0,
258,918434,87485.0,112920,312.0,130759,1169.0,5601.0,2.0,
259,2833985,571426.0,286034,37975.0,157590,555.0,,4.0,


In [None]:
# These are the WorldView Codes
# 10 Tree cover
# 20 Shrubland
# 30 Grassland
# 40 Cropland
# 50 Built-up
# 60 Bare / sparse vegetation
# 70 Snow and Ice
# 80 Permanent water bodies
# 90 Herbaceous wetland
# 95 Mangroves
# 100 Moss and lichen

In [75]:
# Define a dictionary mapping the old names to the new names
rename_dict = {10: 'tree', 20: 'shrub', 30: 'grass', 40: 'crop', 50: 'built', 60: 'bare', 80: 'water', 90: 'wetland', 95: 'mangrove'}

# Rename the columns
stats_df.rename(columns=rename_dict, inplace=True)


In [76]:
stats_df

Unnamed: 0,tree,shrub,grass,crop,built,bare,water,wetland,mangrove
0,23181,52621.0,141501,5205.0,684592,6215.0,2088.0,511.0,
1,3557,338.0,25123,744.0,307812,5917.0,1123.0,12.0,
2,47372,711989.0,3943481,11385600.0,66906,7067.0,10643.0,9039.0,
3,1455696,20317985.0,24388333,4326864.0,90190,11088.0,1646.0,3231.0,
4,64269,1371856.0,3003332,1929210.0,430038,16082.0,4093.0,2010.0,
...,...,...,...,...,...,...,...,...,...
256,3064704,231865.0,147982,323.0,164851,1551.0,7.0,,
257,2450674,70201.0,445701,3045.0,85657,26169.0,3265.0,9.0,
258,918434,87485.0,112920,312.0,130759,1169.0,5601.0,2.0,
259,2833985,571426.0,286034,37975.0,157590,555.0,,4.0,


## Add new columns for the DataFrame for the area in square kilometers

In [77]:
stats_df_w_sqkm = stats_df.copy()

In [78]:
for col in stats_df_w_sqkm.columns:
    stats_df_w_sqkm[col + '_sqkm'] = stats_df_w_sqkm[col] * 0.0001

In [79]:
stats_df_w_sqkm

Unnamed: 0,tree,shrub,grass,crop,built,bare,water,wetland,mangrove,tree_sqkm,shrub_sqkm,grass_sqkm,crop_sqkm,built_sqkm,bare_sqkm,water_sqkm,wetland_sqkm,mangrove_sqkm
0,23181,52621.0,141501,5205.0,684592,6215.0,2088.0,511.0,,2.3181,5.2621,14.1501,0.5205,68.4592,0.6215,0.2088,0.0511,
1,3557,338.0,25123,744.0,307812,5917.0,1123.0,12.0,,0.3557,0.0338,2.5123,0.0744,30.7812,0.5917,0.1123,0.0012,
2,47372,711989.0,3943481,11385600.0,66906,7067.0,10643.0,9039.0,,4.7372,71.1989,394.3481,1138.5600,6.6906,0.7067,1.0643,0.9039,
3,1455696,20317985.0,24388333,4326864.0,90190,11088.0,1646.0,3231.0,,145.5696,2031.7985,2438.8333,432.6864,9.0190,1.1088,0.1646,0.3231,
4,64269,1371856.0,3003332,1929210.0,430038,16082.0,4093.0,2010.0,,6.4269,137.1856,300.3332,192.9210,43.0038,1.6082,0.4093,0.2010,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
256,3064704,231865.0,147982,323.0,164851,1551.0,7.0,,,306.4704,23.1865,14.7982,0.0323,16.4851,0.1551,0.0007,,
257,2450674,70201.0,445701,3045.0,85657,26169.0,3265.0,9.0,,245.0674,7.0201,44.5701,0.3045,8.5657,2.6169,0.3265,0.0009,
258,918434,87485.0,112920,312.0,130759,1169.0,5601.0,2.0,,91.8434,8.7485,11.2920,0.0312,13.0759,0.1169,0.5601,0.0002,
259,2833985,571426.0,286034,37975.0,157590,555.0,,4.0,,283.3985,57.1426,28.6034,3.7975,15.7590,0.0555,,0.0004,


In [80]:
# substitue NaN values with 0
stats_df_w_sqkm = stats_df_w_sqkm.fillna(0)


In [81]:
stats_df_w_sqkm

Unnamed: 0,tree,shrub,grass,crop,built,bare,water,wetland,mangrove,tree_sqkm,shrub_sqkm,grass_sqkm,crop_sqkm,built_sqkm,bare_sqkm,water_sqkm,wetland_sqkm,mangrove_sqkm
0,23181,52621.0,141501,5205.0,684592,6215.0,2088.0,511.0,0.0,2.3181,5.2621,14.1501,0.5205,68.4592,0.6215,0.2088,0.0511,0.0
1,3557,338.0,25123,744.0,307812,5917.0,1123.0,12.0,0.0,0.3557,0.0338,2.5123,0.0744,30.7812,0.5917,0.1123,0.0012,0.0
2,47372,711989.0,3943481,11385600.0,66906,7067.0,10643.0,9039.0,0.0,4.7372,71.1989,394.3481,1138.5600,6.6906,0.7067,1.0643,0.9039,0.0
3,1455696,20317985.0,24388333,4326864.0,90190,11088.0,1646.0,3231.0,0.0,145.5696,2031.7985,2438.8333,432.6864,9.0190,1.1088,0.1646,0.3231,0.0
4,64269,1371856.0,3003332,1929210.0,430038,16082.0,4093.0,2010.0,0.0,6.4269,137.1856,300.3332,192.9210,43.0038,1.6082,0.4093,0.2010,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
256,3064704,231865.0,147982,323.0,164851,1551.0,7.0,0.0,0.0,306.4704,23.1865,14.7982,0.0323,16.4851,0.1551,0.0007,0.0000,0.0
257,2450674,70201.0,445701,3045.0,85657,26169.0,3265.0,9.0,0.0,245.0674,7.0201,44.5701,0.3045,8.5657,2.6169,0.3265,0.0009,0.0
258,918434,87485.0,112920,312.0,130759,1169.0,5601.0,2.0,0.0,91.8434,8.7485,11.2920,0.0312,13.0759,0.1169,0.5601,0.0002,0.0
259,2833985,571426.0,286034,37975.0,157590,555.0,0.0,4.0,0.0,283.3985,57.1426,28.6034,3.7975,15.7590,0.0555,0.0000,0.0004,0.0


In [82]:
stats_df_final = stats_df_w_sqkm.copy()

In [85]:
# Define the columns to convert to percentages
cols_to_convert = ['tree', 'shrub', 'grass', 'crop', 'built', 'bare', 'water', 'wetland', 'mangrove']

# Add more columns with the percentages
for col in cols_to_convert:
    stats_df_final[col + '_pct'] = stats_df_final[col] / stats_df_final[cols_to_convert].sum(axis=1) * 100


In [86]:
stats_df_final

Unnamed: 0,tree,shrub,grass,crop,built,bare,water,wetland,mangrove,tree_sqkm,...,mangrove_sqkm,tree_pct,shrub_pct,grass_pct,crop_pct,built_pct,bare_pct,water_pct,wetland_pct,mangrove_pct
0,23181,52621.0,141501,5205.0,684592,6215.0,2088.0,511.0,0.0,2.3181,...,0.0,2.530914,5.745190,15.449158,0.568285,74.744135,0.678557,0.227969,0.055791,0.0
1,3557,338.0,25123,744.0,307812,5917.0,1123.0,12.0,0.0,0.3557,...,0.0,1.032133,0.098077,7.289932,0.215886,89.317695,1.716934,0.325860,0.003482,0.0
2,47372,711989.0,3943481,11385600.0,66906,7067.0,10643.0,9039.0,0.0,4.7372,...,0.0,0.292743,4.399856,24.369407,70.359237,0.413457,0.043672,0.065770,0.055858,0.0
3,1455696,20317985.0,24388333,4326864.0,90190,11088.0,1646.0,3231.0,0.0,145.5696,...,0.0,2.877152,40.158063,48.203018,8.551954,0.178259,0.021915,0.003253,0.006386,0.0
4,64269,1371856.0,3003332,1929210.0,430038,16082.0,4093.0,2010.0,0.0,6.4269,...,0.0,0.942238,20.112566,44.031380,28.283846,6.304720,0.235776,0.060007,0.029468,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
256,3064704,231865.0,147982,323.0,164851,1551.0,7.0,0.0,0.0,306.4704,...,0.0,84.864687,6.420571,4.097768,0.008944,4.564887,0.042949,0.000194,0.000000,0.0
257,2450674,70201.0,445701,3045.0,85657,26169.0,3265.0,9.0,0.0,245.0674,...,0.0,79.445564,2.275765,14.448665,0.098712,2.776815,0.848343,0.105844,0.000292,0.0
258,918434,87485.0,112920,312.0,130759,1169.0,5601.0,2.0,0.0,91.8434,...,0.0,73.084042,6.961586,8.985567,0.024827,10.405099,0.093023,0.445697,0.000159,0.0
259,2833985,571426.0,286034,37975.0,157590,555.0,0.0,4.0,0.0,283.3985,...,0.0,72.898642,14.698800,7.357657,0.976832,4.053690,0.014276,0.000000,0.000103,0.0


In [87]:
# Concatenate dataframes along the column axis (axis=1)
admin_stats_df = pd.concat([inAdmin, stats_df_final], axis=1)


In [88]:
admin_stats_df

Unnamed: 0,shapeName,shapeISO,shapeID,shapeGroup,shapeType,geometry,tree,shrub,grass,crop,...,mangrove_sqkm,tree_pct,shrub_pct,grass_pct,crop_pct,built_pct,bare_pct,water_pct,wetland_pct,mangrove_pct
0,Adenta Municipal,,2480657B31663195588056,GHA,ADM2,"POLYGON ((-9732.475 702346.917, -8374.825 7016...",23181,52621.0,141501,5205.0,...,0.0,2.530914,5.745190,15.449158,0.568285,74.744135,0.678557,0.227969,0.055791,0.0
1,Ledzokuku Municipal,,2480657B81218626769676,GHA,ADM2,"POLYGON ((-14032.270 696375.455, -12774.748 69...",3557,338.0,25123,744.0,...,0.0,1.032133,0.098077,7.289932,0.215886,89.317695,1.716934,0.325860,0.003482,0.0
2,Chereponi,,2480657B59683408463573,GHA,ADM2,"POLYGON ((7115.078 1229905.419, 7125.975 12312...",47372,711989.0,3943481,11385600.0,...,0.0,0.292743,4.399856,24.369407,70.359237,0.413457,0.043672,0.065770,0.055858,0.0
3,Wa East,,2480657B86087610655767,GHA,ADM2,"POLYGON ((-165103.465 1255389.061, -164705.267...",1455696,20317985.0,24388333,4326864.0,...,0.0,2.877152,40.158063,48.203018,8.551954,0.178259,0.021915,0.003253,0.006386,0.0
4,Wa Municipal,,2480657B6168445767624,GHA,ADM2,"POLYGON ((-238618.736 1254044.427, -238555.285...",64269,1371856.0,3003332,1929210.0,...,0.0,0.942238,20.112566,44.031380,28.283846,6.304720,0.235776,0.060007,0.029468,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
256,Akwapem North,,2480657B49720933786015,GHA,ADM2,"POLYGON ((-1260.243 740845.856, -812.879 74053...",3064704,231865.0,147982,323.0,...,0.0,84.864687,6.420571,4.097768,0.008944,4.564887,0.042949,0.000194,0.000000,0.0
257,Fanteakwa South,,2480657B97880290767835,GHA,ADM2,"POLYGON ((-20445.994 800147.817, -20450.484 80...",2450674,70201.0,445701,3045.0,...,0.0,79.445564,2.275765,14.448665,0.098712,2.776815,0.848343,0.105844,0.000292,0.0
258,Birim Central Municipal,,2480657B13510680910521,GHA,ADM2,"POLYGON ((-94840.785 735523.423, -93923.241 73...",918434,87485.0,112920,312.0,...,0.0,73.084042,6.961586,8.985567,0.024827,10.405099,0.093023,0.445697,0.000159,0.0
259,Hohoe Municipal,,2480657B21040345642773,GHA,ADM2,"POLYGON ((39032.925 892526.691, 39611.139 8826...",2833985,571426.0,286034,37975.0,...,0.0,72.898642,14.698800,7.357657,0.976832,4.053690,0.014276,0.000000,0.000103,0.0


### export admins with stats

In [90]:
admin_stats_df.to_file(r"output/admin_stats_ghana.shp")


  admin_stats_df.to_file(r"output/admin_stats_ghana.shp")


### Save a CSV file

In [93]:
# Remove the geometry column, it was causing the CSV to have bad formatting
admin_stats_df_csv = admin_stats_df.drop('geometry', axis=1)

In [94]:
admin_stats_df_csv.to_csv(r"output/admin_stats_ghana2.csv")