<a href="https://colab.research.google.com/github/yodavo/moja-global-analysis/blob/main/2021_12_Task1-Worldclim%5BColombia-Belize%5D/Wordclim_clips.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install GDAL and Geopandas
!apt install gdal-bin python-gdal python3-gdal -q
!apt install python3-rtree -q
!pip install geopandas -q
!pip install descartes -q

# Folium 
!pip install folium

# for rasters
!pip install Rasterio
!pip install georasters -q

# Install Pysal - Spatial Statistics
!pip install pysal -q

!pip install splot -q

In [195]:
# for download and decompress the dataset
import os, zipfile, glob, io, requests

# control
import numpy as np

import pandas as pd
import geopandas as gpd
from osgeo import gdal
import georasters as gr

#plotting
import matplotlib.pyplot as plt


# **Arangements**

In [181]:
# creating folder to store information
%mkdir clip_shapefiles
%mkdir outcomes
%mkdir geodataframes

mkdir: cannot create directory ‘clip_shapefiles’: File exists
mkdir: cannot create directory ‘outcomes’: File exists


In [160]:
# Function to clip rasters
def clip_raster(list_raster, inshp, wordclim_dataset):
  ''' Get a clipped raster from the Worldclim data for specified countries:
  - list_raster : geotiff fetched from Worldclim
  - inshp : path clipped shapefile with the name of the country
  - wordclim_dataset: name of the dataset for outraster file name '''
  for i in list_raster:
    name, raster = i
    outraster = '/content/outcomes/' + str(wordclim_dataset) + '_' + str.upper(name) + '.tif'
    dsClip = gdal.Warp(outraster, raster, cutlineDSName=inshp, cropToCutline=True, dstNodata=np.nan)
    
    # visualization
    # plt.imshow(dsClip.GetRasterBand(1).ReadAsArray())
    # plt.colorbar()
    # plt.title(str.capitalize(wordclim_dataset) + ' - ' + str.upper(name))

# **Belize and Colombi Boundaries**

In [100]:
# importing the data
world_df = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# subseting
world_df = world_df[['continent','name', 'iso_a3','geometry']]

# croppping boundaries
colombia_bd = world_df.loc[world_df.name == 'Colombia', ].reset_index(drop=True)
colombia_bd.to_file("/content/clip_shapefiles/colombia.shp")
belize_bd = world_df.loc[world_df.name == 'Belize', ].reset_index(drop=True)
belize_bd.to_file("/content/clip_shapefiles/belize.shp")

In [144]:
# path to shapefiles
colombia = '/content/clip_shapefiles/colombia.shp'
belize = '/content/clip_shapefiles/belize.shp'

# **Getting clipped tiffs**

## **1. Wordclim 10m**

In [140]:
# fetching geotif form worldclime
! wget https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_bio.zip -q --show-progress
with zipfile.ZipFile('wc2.1_10m_bio.zip', 'r') as worldclim10m:
  worldclim10m.extractall("worldclim10")

# removing zip folder
%rm "wc2.1_10m_bio.zip"

# path to the folder
files = glob.glob(os.path.join('/content/worldclim10', '*tif'))
files.sort(key=lambda x:int(x[x.rfind('_')+1:-4]))

###### list of tiff files ######
# Chosing BIO2 and BIO12
subset = files[1], files[11]

# Rename and arrange the information
names = ['BIO'+ i[i.rfind('_')+1:-4] for i in subset]

# merging all the metadata
tiff_bioclim10 = list(zip(names, [gdal.Open(f) for f in subset]))
tiff_bioclim10.sort(key=lambda x:int(x[0][3:]))

# print the results
print('%2d tiff files were imported.' %(len(tiff_bioclim10)))

 2 tiff files were imported.


### **1.1.Clip the tiff files**

In [161]:
# Colombia
clip_raster(tiff_bioclim10, colombia, 'Colombia-BioClim10')

# Belize
clip_raster(tiff_bioclim10, belize, 'Belize-BioClim10')



---



## **2. Wordclim 5m**

In [170]:
# fetching geotif form worldclime
! wget https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_5m_bio.zip -q --show-progress
with zipfile.ZipFile('wc2.1_5m_bio.zip', 'r') as worldclim5m:
  worldclim5m.extractall("worldclim5")

# removing zip folder
%rm "wc2.1_5m_bio.zip"

# path to the folder
files = glob.glob(os.path.join('/content/worldclim5', '*tif'))
files.sort(key=lambda x:int(x[x.rfind('_')+1:-4]))

###### list of tiff files ######
# Chosing BIO2 and BIO12
subset = files[1], files[11]

# Rename and arrange the information
names = ['BIO'+ i[i.rfind('_')+1:-4] for i in subset]

# merging all the metadata
tiff_bioclim5 = list(zip(names, [gdal.Open(f) for f in subset]))
tiff_bioclim5.sort(key=lambda x:int(x[0][3:]))

# print the results
print('%2d tiff files were imported.' %(len(tiff_bioclim5)))

 2 tiff files were imported.


### **2.1.Clip the tiff files**

In [172]:
# Colombia
clip_raster(tiff_bioclim5, colombia, 'Colombia-BioClim5')

# Belize
clip_raster(tiff_bioclim5, belize, 'Belize-BioClim5')



---



## **3. Wordclim 2.5m**

In [174]:
# fetching geotif form worldclime
! wget https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_2.5m_bio.zip -q --show-progress
with zipfile.ZipFile('wc2.1_2.5m_bio.zip', 'r') as worldclim2_5m:
  worldclim2_5m.extractall("worldclim2_5")

# removing zip folder
%rm "wc2.1_2.5m_bio.zip"

# path to the folder
files = glob.glob(os.path.join('/content/worldclim2_5', '*tif'))
files.sort(key=lambda x:int(x[x.rfind('_')+1:-4]))

###### list of tiff files ######
# Chosing BIO2 and BIO12
subset = files[1], files[11]

# Rename and arrange the information
names = ['BIO'+ i[i.rfind('_')+1:-4] for i in subset]

# merging all the metadata
tiff_bioclim2_5 = list(zip(names, [gdal.Open(f) for f in subset]))
tiff_bioclim2_5.sort(key=lambda x:int(x[0][3:]))

# print the results
print('%2d tiff files were imported.' %(len(tiff_bioclim2_5)))

 2 tiff files were imported.


### **3.1.Clip the tiff files**

In [175]:
# Colombia
clip_raster(tiff_bioclim2_5, colombia, 'Colombia-BioClim2_5')

# Belize
clip_raster(tiff_bioclim2_5, belize, 'Belize-BioClim2_5')



---



## **4. Wordclim 30s**

In [176]:
# fetching geotif form worldclime
! wget https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_bio.zip -q --show-progress
with zipfile.ZipFile('wc2.1_30s_bio.zip', 'r') as worldclim30:
  worldclim30.extractall("worldclim30s")

# removing zip folder
%rm "wc2.1_30s_bio.zip"

# path to the folder
files = glob.glob(os.path.join('/content/worldclim30s', '*tif'))
files.sort(key=lambda x:int(x[x.rfind('_')+1:-4]))

###### list of tiff files ######
# Chosing BIO2 and BIO12
subset = files[1], files[11]

# Rename and arrange the information
names = ['BIO'+ i[i.rfind('_')+1:-4] for i in subset]

# merging all the metadata
tiff_bioclim30s = list(zip(names, [gdal.Open(f) for f in subset]))
tiff_bioclim30s.sort(key=lambda x:int(x[0][3:]))

# print the results
print('%2d tiff files were imported.' %(len(tiff_bioclim30s)))

 2 tiff files were imported.


### **4.1.Clip the tiff files**

In [177]:
# Colombia
clip_raster(tiff_bioclim30s, colombia, 'Colombia-BioClim30s')

# Belize
clip_raster(tiff_bioclim30s, belize, 'Belize-BioClim30s')

## **5. Exporting clips for GDrive**

In [178]:
# compressing the data in zip files
!zip -r clip_shapefiles[Colombia-Belize].zip clip_shapefiles/
!zip -r clip_tiffWorldclim[Colombia-Belize].zip outcomes/

  adding: clip_shapefiles/ (stored 0%)
  adding: clip_shapefiles/belize.shx (deflated 46%)
  adding: clip_shapefiles/belize.prj (deflated 17%)
  adding: clip_shapefiles/colombia.dbf (deflated 77%)
  adding: clip_shapefiles/belize.shp (deflated 27%)
  adding: clip_shapefiles/belize.cpg (stored 0%)
  adding: clip_shapefiles/colombia.cpg (stored 0%)
  adding: clip_shapefiles/colombia.shx (deflated 45%)
  adding: clip_shapefiles/colombia.shp (deflated 8%)
  adding: clip_shapefiles/colombia.prj (deflated 17%)
  adding: clip_shapefiles/belize.dbf (deflated 77%)
  adding: outcomes/ (stored 0%)
  adding: outcomes/Colombia-BioClim30s_BIO12.tif (deflated 79%)
  adding: outcomes/Colombia-BioClim2_5_BIO12.tif (deflated 78%)
  adding: outcomes/Colombia-BioClim2_5_BIO2.tif (deflated 67%)
  adding: outcomes/Colombia-BioClim30s_BIO2.tif (deflated 84%)
  adding: outcomes/Colombia-BioClim10_BIO12.tif (deflated 73%)
  adding: outcomes/Colombia-BioClim10_BIO2.tif (deflated 60%)
  adding: outcomes/Belize-B

In [None]:
# download files
from google.colab import files
files.download("/content/clip_shapefiles[Colombia-Belize].zip")
files.download("/content/clip_tiffWorldclim[Colombia-Belize].zip")

# **Getting GeoDataframes**

In [185]:
###### list of geodataframes ######
tiffs = glob.glob(os.path.join('/content/outcomes', '*tif'))

for i in tiffs:
  name = i[i.rfind('/')+1:-4]
  geo_df = gr.from_file(i).to_pandas()
  geo_df = geo_df.loc[:, ['value', 'x', 'y']]
  geo_df = geo_df.rename(columns  ={'x':'Latitude', 'y':'Longitude'})
  geo_df.to_csv('/content/geodataframes/'+ str(name) + '.csv', index=False)

In [194]:
# compressing the data in zip files
!zip -r clip_geodataframes[Colombia-Belize].zip geodataframes/

# download files
from google.colab import files
files.download("/content/clip_geodataframes[Colombia-Belize].zip")

updating: geodataframes/ (stored 0%)
updating: geodataframes/Belize-BioClim30s_BIO2.csv (deflated 89%)
updating: geodataframes/Colombia-BioClim2_5_BIO2.csv (deflated 85%)
updating: geodataframes/Colombia-BioClim10_BIO2.csv (deflated 83%)
updating: geodataframes/Belize-BioClim10_BIO12.csv (deflated 82%)
updating: geodataframes/Colombia-BioClim10_BIO12.csv (deflated 86%)
updating: geodataframes/Belize-BioClim5_BIO12.csv (deflated 85%)
updating: geodataframes/Belize-BioClim2_5_BIO2.csv (deflated 85%)
updating: geodataframes/Colombia-BioClim5_BIO2.csv (deflated 85%)
updating: geodataframes/Colombia-BioClim5_BIO12.csv (deflated 87%)
updating: geodataframes/Belize-BioClim2_5_BIO12.csv (deflated 88%)
updating: geodataframes/Colombia-BioClim2_5_BIO12.csv (deflated 87%)
updating: geodataframes/Belize-BioClim10_BIO2.csv (deflated 79%)
updating: geodataframes/Colombia-BioClim30s_BIO2.csv (deflated 79%)
updating: geodataframes/Colombia-BioClim30s_BIO12.csv (deflated 78%)
updating: geodataframes/Be

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>