# Capao Urban Rate

This notebook aims to estimate the current urban growth rate for the place: Vale do Capão, Palmeiras, BA, Brasil. By using Google Earth satellite images and analyzing RGB pixel data, we can determine the growth rate in areas featuring houses, roads, construction sites, or where forests have been cleared for humans uses. For this case study, we have chosen COPERNICUS satellite images.

In [None]:
import ee
import geemap as geemap
import pandas as pd

from pprint import pprint
from utils.utils import mileseconds_to_date
from utils.features import get_coordinates
from utils.contants import (
  PROJECT, 
  GEO_POINT,
  GEO_PLACE,
  BANDS_STANDARD,
  BANDS_LANDSET_7,
  BANDS,
)

ee.Authenticate()
ee.Initialize(project=PROJECT)
geemap.ee_initialize()

# Feature Collection 

In [None]:
# Pre process urban features
df = pd.read_csv('./data/urban_features.csv')
raw_urban = df['.geo'].tolist()
urban = [get_coordinates(i) for i in raw_urban]

urban_features_list = [
  ee.Feature(ee.Geometry.Point(urban[i][0], urban[i][1]), {'class': 1 }) for i in range(len(urban))
]

# Pre process vegetation features
df = pd.read_csv('./data/vegetation_feature.csv')
raw_vegetation = df['.geo'].tolist()
vegetation = [get_coordinates(i) for i in raw_vegetation]

vegetation_features_list = [
  ee.Feature(ee.Geometry.Point(vegetation[i][0], vegetation[i][1]), {'class': 0 }) for i in range(len(vegetation))
]

# Feature collections
urban_features = ee.FeatureCollection(urban_features_list, 'urban')
vegetation_features = ee.FeatureCollection(vegetation_features_list, 'vegetation')

feature = urban_features.merge(vegetation_features)

# Dataset

In [None]:
geo_point = ee.Geometry.Point(GEO_POINT)
geo_place = ee.Geometry.Polygon(GEO_PLACE)

raw_image = (
    ee.ImageCollection("LANDSAT/LC09/C02/T1")
      .filterBounds(geo_point)
      .filterDate('2023-01-01', '2023-12-30')
      .sort('CLOUD_COVER')
      .first()
)

image = raw_image.select(BANDS_STANDARD).rename(BANDS)

label = 'class'

feature_collection = image.select(BANDS).sampleRegions(
  collection = feature,
  properties = [label],
  scale = 10,
  geometries = True
)

# Random Forest Model

In [None]:
dataset = feature_collection.randomColumn()
training = dataset.filter('random <= 0.8')
validation = dataset.filter('random > 0.2')

cls = ee.Classifier.smileRandomForest(10).train(training, label, BANDS)

train_accuracy = cls.confusionMatrix()
validation_sample = validation.classify(cls)
validation_accuracy = validation_sample.errorMatrix(label, 'classification')

display('Confusion Matrix', train_accuracy.getInfo())
display('Training overall accuracy', train_accuracy.accuracy())
display('Validation error matrix', validation_accuracy.getInfo())
display('Validation accuracy', validation_accuracy.accuracy())



# Image Series
Using Google Earch Image Collection to extract timeseries images to representate the current urban progression 

- USGS Landsat 7 Collection 2 Tier 1 Raw Scenes. Image collection started from 1997
- USGS Landsat 8 Collection 2 Tier 1 Raw Scenes. Image collection started from 2013
- USGS Landsat 9 Collection 2 Tier 1 Raw Scenes. Image collection started from 2021

Images from 1997 and 1998 aren't available in the dataset

In [None]:

image_series = {}
satelite = '' 
cloud_cover = 50

for year in range(1999, 2024):
    current_bands = BANDS_LANDSET_7 if year < 2013 else BANDS_STANDARD

    if year < 2013:
        satelite = 'LANDSAT/LE07/C02/T1'
    elif 2012 < year < 2021:
        satelite = 'LANDSAT/LC08/C02/T1'
    else:
        satelite = 'LANDSAT/LC09/C02/T1'

    image_collection = (ee.ImageCollection(satelite)
        .filterBounds(geo_point)
        .filterDate(f'{year}-01-01', f'{year}-12-30')
    )

    # SUPPORT: Get a mosaic from the first 3 images to get better image result
    # filtered_colletion = image_collection.limit(3)
    # raw_image = ee.ImageCollection(filtered_colletion).mosaic()
    
    # SUPPORT: Get the first image from the collection filterd by cloud cover
    raw_image = image_collection.sort('CLOUD_COVER').first()

    # Get image infos:
    collection_size = image_collection.size().getInfo()
    raw_date = image.date().getInfo().get('value')
    image_date = mileseconds_to_date(raw_date)

    # SUPPORT: Cloud mask to fix the cloud cover incidence
    # image = raw_image.select(current_bands).mask(raw_image.select('CLOUD_COVER').lte(cloud_cover))

    # Changing the band names
    image = raw_image.select(current_bands).rename(BANDS)
    

    # Store data
    image_series[year] = {
        'satelite': satelite,
        'collection size':collection_size,
        'image': image,
        'date': image_date,
        'landcover_classified': image.clip(geo_place).select(BANDS).classify(cls)
    }

# Urban rate 

- The area is given in m² and calculated in km²

In [None]:

#TODO: Create a datase with the urban area amount per year
#TODO: Create a Regression model to predict the urban area amount for next 10 years using the model. 

for year in range(1999, 2024):
    classified_image = image_series[year]['landcover_classified']
    areaImage = ee.Image.pixelArea().addBands(image_series[year]['landcover_classified']);

    area_class_0 = areaImage.updateMask(classified_image.eq(0)).reduceRegion(
        reducer=ee.Reducer.sum(),
        scale=30,
        maxPixels=1e13
    )

    area_class_1 = areaImage.updateMask(classified_image.eq(1)).reduceRegion(
        reducer=ee.Reducer.sum(),
        scale=30,
        maxPixels=1e13
    )

    image_series[year]["vegetation_area"] = area_class_0.get('area').getInfo()
    image_series[year]["urban_area"] = area_class_1.get('area').getInfo()

In [27]:
# Create a DataFrame by classified areas

years = []
satelites = []
vegetation_areas = []
urban_areas = []

for year, data in image_series.items():
    years.append(year)
    satelites.append(data['satelite'])
    vegetation_areas.append(round(data['vegetation_area'] / 1e6, 2))
    urban_areas.append(round(data['urban_area'] / 1e6, 2))

df_classified = pd.DataFrame({
    'year': years,
    'satelite': satelites,
    'vegetation_area': vegetation_areas,
    'urban_area': urban_areas,
})

df_classified

Unnamed: 0,year,satelite,vegetation_area,urban_area
0,1999,LANDSAT/LE07/C02/T1,21.84,0.0
1,2000,LANDSAT/LE07/C02/T1,21.84,0.0
2,2001,LANDSAT/LE07/C02/T1,21.84,0.0
3,2002,LANDSAT/LE07/C02/T1,21.84,0.0
4,2003,LANDSAT/LE07/C02/T1,16.87,0.0
5,2004,LANDSAT/LE07/C02/T1,16.7,0.0
6,2005,LANDSAT/LE07/C02/T1,16.38,0.0
7,2006,LANDSAT/LE07/C02/T1,16.7,0.0
8,2007,LANDSAT/LE07/C02/T1,17.13,0.0
9,2008,LANDSAT/LE07/C02/T1,16.6,0.0


# Map view

In [31]:
map = geemap.Map(center=[-12.609558240448216,-41.501150593949305], zoom=13)

year = 2023

vis_params = {"bands": ["Blue", "Green", "Red"],  min: 0, max: 2000, "gamma": 2.0}
class_vis = {'min': 0,'max': 10,'palette': [
    'green',
    'blue',
    'red',
    'red',
    'red',
    'red',
],}

map.add_layer(image_series[year]['image'], vis_params, 'view')
map.add_layer(image_series[year]['landcover_classified'], class_vis, 'classified', opacity=0.25)

# Display the map
map


Map(center=[-12.609558240448216, -41.501150593949305], controls=(WidgetControl(options=['position', 'transpare…