In [1]:
import os, sys, time, pandas as pd, geopandas as gpd, rasterio as rt, numpy as np

In [2]:
import affine
from rasterio.warp import reproject
from rasterio.warp import Resampling

In [52]:
infil = r'pop15.tif'
ndv = -1

src = rt.open(os.path.join(r'C:\Users\charl\Documents\GOST\Yemen\worldpop', infil))

In [53]:
arr = src.read(masked=True)
arr[0].data[0,0]

-3.402823e+38

#### Remove insane values

In [38]:
max_int32 = 2147483647
small = -0.000001
arr[arr > 1E6] = max_int32
arr[arr < 0] = small
arr = np.ma.masked_where(arr <= small, arr)
arr = np.ma.masked_where(arr >= max_int32, arr)
arr.sum()

28956732.0

In [39]:
profile = src.profile

d_type = rt.uint32

profile.update(nodata = 0,dtype = d_type)

with rt.open(os.path.join(r'C:\Users\charl\Documents\GOST\Yemen\worldpop',infil.replace('.tif','_norm.tif')), 'w', **profile) as dst:
    dst.write(arr.astype(d_type))

In [40]:
factah = 0.1

newarr = np.empty(shape=(arr.shape[0],  # same number of bands
                         round(arr.shape[1] * factah), 
                         round(arr.shape[2] * factah)))

In [41]:
# adjust the new affine transform to the smaller cell size
aff = src.transform

newaff = affine.Affine(aff[0] / factah, 
                aff[1],
                aff[2],
                aff[3], 
                aff[4] / factah,  
                aff[5],
               )

reproject(
    arr, newarr,
    src_transform = aff,
    dst_transform = newaff,
    src_crs = src.crs,
    dst_crs = src.crs,
    resampling = Resampling.average
)

newarr = newarr * ((1/factah) ** 2)

### Remove resampled insane values

In [42]:
newarr[newarr >= max_int32] = max_int32
newarr[newarr <= small] = 0

In [43]:
newarr = np.ma.masked_where(newarr <= small, newarr)
newarr = np.ma.masked_where(newarr >= max_int32, newarr)
newarr.sum()

28711247.37282433

In [44]:
((arr.sum() - newarr.sum()) / arr.sum())

0.008477635776567263

In [45]:
# Write an array as a raster band to a new 8-bit file. For
# the new file's profile, we start with the profile of the source
profile = src.profile

d_type = rt.int32

# And then change the band count to 1, set the
# dtype to uint8, and specify LZW compression.
profile.update(width = newarr.shape[2],
              height = newarr.shape[1],
              transform = newaff, dtype = d_type, nodata = max_int32)
outfil = infil.replace('.tif','_resampled.tif')

with rt.open(os.path.join(r'C:\Users\charl\Documents\GOST\Yemen\worldpop',outfil), 'w', **profile) as dst:
    dst.write(newarr.astype(d_type))

### Convert to .csv
use the toolbox 'vector creation --> raster pixels to points' function in QGIS to turn the resultant raster into a point layer. Then, run this step

In [55]:
pth = r'C:\Users\charl\Documents\GOST\Yemen\origins'
fil = r'origins_1km_2018.shp'
loc = os.path.join(pth, fil)

points_shp = gpd.read_file(loc)
points_shp['Longitude'] = points_shp['geometry'].apply(lambda x: x.x)
points_shp['Latitude'] = points_shp['geometry'].apply(lambda x: x.y)
points_shp.to_csv(os.path.join(pth, fil.replace('.shp','.csv')))