# Publication Analyses 
This notebook contains analyses and statistics for use in the publication. It is organized by section, and indicates where the output will be used in the paper.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import sys
#sys.path.append('scripts/')
#import statistical_analyses as st

%load_ext autoreload
%autoreload 2

# Aggregate Regional Statistics
Combines country level statistics spreadsheets by region.

In [9]:
files = ['Acre', 'Alagoas', 'Amapa', 'Amazonas', 'Bahia', 'Ceara', 'Distrito Federal',
                        'Espirito Santo', 'Goias', 'Maranhao', 'Mato Grosso', 'Mato Grosso do Sul',
                        'Minas Gerais', 'Para', 'Paraiba', 'Parana', 'Pernambuco', 'Piaui', 'Rio de Janeiro',
                        'Rio Grande do Norte', 'Rondonia', 'Roraima','Sao Paulo', 'Sergipe', 'Tocantins']

files = files + ['Papua', 'Celebes', 'Sumatra', 'Kalimantan', 'Java']
files = files + ['Argentina', 'Belize', 'Bolivia', 'Costa Rica', 'Colombia', 'Chile', 
                        'Ecuador', 'El Salvador', 'French Guiana', 'Guatemala', 'Honduras', 'Mexico', 
                        'Nicaragua', 'Panama', 'Paraguay', 'Peru', 'Suriname', 'Venezuela']
files = files + ['Caribbean', 'Cuba', 'Dominican Republic', 'Haiti', 'Guyana', 'Jamaica']

files = files + ['Benin', 'Burkina Faso', 'Cameroon', 'Gambia', 'Ghana', 'Guinea', 'Guinea-Bissau', 
                      'Ivory Coast', 'Liberia', 'Mali', 'Mauritania', 'Niger', 'Nigeria',
                      'Senegal', 'Sierra Leone', 'Togo']
files = files + ['Burundi', 'Ethiopia', 'Eritrea', 'Kenya', 'Madagascar', 'Malawi', 'Mozambique',
                        'Rwanda', 'Somalia', 'Tanzania', 'Uganda', 'Zambia', 'Zimbabwe']
files = files + ['Botswana', 'Chad', 'Central African Republic', 'Democratic Republic of the Congo',
                        'Equatorial Guinea', 'Swaziland', 'Gabon', 'Republic of Congo', 'Sudan', 'South Sudan',
                        'Angola', 'Lesotho', 'Namibia', 'South Africa']
files = files + ['Australia','Bangladesh', 'Brunei', 'Cambodia', 'China', 'East Timor', 'Hong Kong',
                        'India', 'India islands', 'Laos', 'Malaysia', 'Myanmar', 'New Caledonia',
                        'Solomon Islands', 'Singapore', 'Sri Lanka', 'Taiwan', 'Thailand', 'Philippines',
                        'Vanuatu', 'Vietnam', 'Saudi Arabia', 'Yemen']
# missing fiji?
len(files)

120

In [8]:
# create Brazil (admins will be mix of states and admin2 districts)
st.create_regional_csv(['Acre', 'Alagoas', 'Amapa', 'Amazonas', 'Bahia', 'Ceara', 'Distrito Federal',
                        'Espirito Santo', 'Goias', 'Maranhao', 'Mato Grosso', 'Mato Grosso do Sul',
                        'Minas Gerais', 'Para', 'Paraiba', 'Parana', 'Pernambuco', 'Piaui', 'Rio de Janeiro',
                        'Rio Grande do Norte', 'Rondonia', 'Roraima','Sao Paulo', 'Sergipe', 'Tocantins'],
                        'Brazil')

In [205]:
# create Indonesia
st.create_regional_csv(['Papua', 'Celebes', 'Sumatra', 'Kalimantan', 'Java'],
                       'Indonesia')

In [13]:
# Latin America
st.create_regional_csv(['Argentina', 'Belize', 'Bolivia', 'Brazil', 'Costa Rica', 'Colombia', 'Chile', 
                        'Ecuador', 'El Salvador', 'French Guiana', 'Guatemala', 'Honduras', 'Mexico', 
                        'Nicaragua', 'Panama', 'Paraguay', 'Peru', 'Suriname', 'Venezuela'], 
                        'lat_am')

In [15]:
# Caribbean
st.create_regional_csv(['Caribbean', 'Cuba', 'Dominican Republic', 'Haiti', 'Guyana', 'Jamaica'],
                       'caribbean')

In [16]:
# West Africa
st.create_regional_csv(['Benin', 'Burkina Faso', 'Cameroon', 'Gambia', 'Ghana', 'Guinea', 'Guinea-Bissau', 
                      'Ivory Coast', 'Liberia', 'Mali', 'Mauritania', 'Niger', 'Nigeria',
                      'Senegal', 'Sierra Leone', 'Togo'], 
                      'west_af')

In [91]:
# East Africa
st.create_regional_csv(['Burundi', 'Ethiopia', 'Eritrea', 'Kenya', 'Madagascar', 'Malawi', 'Mozambique',
                        'Rwanda', 'Somalia', 'Tanzania', 'Uganda', 'Zambia', 'Zimbabwe'], 
                        'east_af')

In [92]:
# Central / South Africa
st.create_regional_csv(['Botswana', 'Chad', 'Central African Republic', 'Democratic Republic of the Congo',
                        'Equatorial Guinea', 'Swaziland', 'Gabon', 'Republic of Congo', 'Sudan', 'South Sudan',
                        'Angola', 'Lesotho', 'Namibia', 'South Africa'], 
                        'central_south_af')

In [13]:
# Asia 
st.create_regional_csv(['Australia','Bangladesh', 'Brunei', 'Cambodia', 'China', 'East Timor', 'Hong Kong',
                        'India', 'India islands', 'Indonesia', 'Laos', 'Malaysia', 'Myanmar', 'New Caledonia',
                        'Solomon Islands', 'Singapore', 'Sri Lanka', 'Taiwan', 'Thailand', 'Philippines',
                        'Vanuatu', 'Vietnam', 'Saudi Arabia', 'Yemen'],
                        'asia')

# Supplementary Index

## Table 1

In [18]:
def si_table1(dst_file, regions):
    
    '''
    For insertion in the supplementary index. Creates a table illustrating the following 
    metrics for cropland and urban areas in each country:
    
    1) Average tree cover 
    2) % land containing >10% tree cover.
    
    Saves table as csv file and returns as df.
    
    '''
    
    table = pd.DataFrame(columns=['region',
                                 'country',
                                 'lcc',
                                 'perc_over10',
                                 'avg'], dtype=object)

    for region in regions:
        
        region_df = pd.read_csv(f'statistics/{region}.csv')

        for country in sorted(set(list(region_df.country.values))):

            # Create an ag df for the country
            country_df = region_df[region_df.country == country]
            ag_ids = [10.0, 11.0, 12.0, 20.0, 30.0, 40.0]
            ag_df = country_df[country_df.esa_id.isin(ag_ids)]

            # Calculate total TML ag ha >10%
            ag_ha = ag_df[['country', 'tree_cover_class', 'tof_ha']].groupby(by=['country', 'tree_cover_class']).sum().reset_index() 
            ag_ha_over10 = sum(ag_ha.tof_ha[1:])

            # Calculate total ha (sum of tof ha == sum of esa sampled)
            ag_sampled = sum(ag_ha.tof_ha) 

            # quick assertion that total is accurate
            ag_sampled_check = ag_df[['country', 'admin', 'esa_id', 'esa_sampled_ha']].drop_duplicates()
            ag_sampled_check = ag_sampled_check.groupby(by=['country']).sum().reset_index() 
            ag_sampled_check = ag_sampled_check.esa_sampled_ha[0]
            
            if ag_sampled != ag_sampled_check:
                print(f'Warning: {country} does not pass ag check')
            
            # % ag land with >10% cover
            ag_perc_over10 = (ag_ha_over10 / ag_sampled) * 100

            # avg tree cover 
            ag_avg = ag_df[['country', 'admin', 'tof_mean']].drop_duplicates(ignore_index=True)
            ag_avg = ag_avg.groupby('country').mean().reset_index()

            table = table.append({'region': region, 
                                'country': country,
                                'lcc': 'Cropland',
                                'perc_over10': round(ag_perc_over10, 2),
                                'avg': round(ag_avg.tof_mean[0], 2)},
                                ignore_index=True)


            # Create an urban df for the country
            urban_ids = [190.0]
            urban_df = country_df[country_df.esa_id.isin(urban_ids)]
            
            # Calculate total TML urban ha >10%
            urban_ha = urban_df[['country', 'tree_cover_class', 'tof_ha']].groupby(by=['country', 'tree_cover_class']).sum().reset_index() 
            urban_ha_over10 = sum(urban_ha.tof_ha[1:])
            
            # total sampled urban ha
            urban_sampled = sum(urban_ha.tof_ha)
            
            # quick assertion that total is accurate
            urban_sampled_check = urban_df[['country', 'admin', 'esa_id', 'esa_sampled_ha']].drop_duplicates()
            urban_sampled_check = urban_sampled_check.groupby(by=['country']).sum().reset_index() 
            urban_sampled_check = urban_sampled_check.esa_sampled_ha[0]

            if urban_sampled != urban_sampled_check:
                print(f'Warning: {country} does not pass urban check')
            
            # % urban land with >10% cover
            urban_perc_over10 = (urban_ha_over10 / urban_sampled) * 100

            # avg tree cover
            urban_avg = urban_df[['country', 'admin', 'tof_mean']].drop_duplicates(ignore_index=True)
            urban_avg = urban_avg.groupby('country').mean().reset_index()

            table = table.append({'region': region, 
                                'country': country,
                                'lcc': 'Urban',
                                'perc_over10': round(urban_perc_over10, 2),
                                'avg': round(urban_avg.tof_mean[0], 2)},
                                ignore_index=True)         

    table.to_csv(dst_file, index=False)

    return table

In [15]:
si = si_table1('statistics/si_table1.csv', ['lat_am', 'caribbean', 'west_af', 'east_af', 'central_south_af', 'asia'])



# Main Paper

## Results

In [8]:
def total_ha(dst_file, regions):
    
    '''
    For insertion of statistics in the "Results" section. Creates a table illustrating 
    the following metrics for cropland and urban areas in each country:

    1) Total ha above 10% 
    
    Saves table as csv file and returns as df.
    
    '''
    
    table = pd.DataFrame(columns=['region', 'country', 'lcc', 'ha_over10'], dtype=object)

    for region in regions:
        
        region_df = pd.read_csv(f'stats/{region}.csv')

        for country in sorted(set(list(region_df.country.values))):

            country_df = region_df[region_df.country == country]

            # add cropland (ag) analyses
            ag_ids = [10.0, 11.0, 12.0, 20.0, 30.0, 40.0]
            ag_df = country_df[country_df.esa_id.isin(ag_ids)]
            ag_df = ag_df[['country', 'admin', 'tree_cover_class', 'tof_ha']]
            
            # sum all cropland ha >10
            ag_over10 = ag_df[ag_df.tree_cover_class != '0-9']
            ag_over10 = sum(ag_over10.tof_ha)

           
            table = table.append({'region': region, 
                                'country': country,
                                'lcc': 'Cropland',
                                'ha_over10': ag_over10},
                                ignore_index=True)
            
            # add urban analyses
            urban_ids = [190.0]
            urban_df = country_df[country_df.esa_id.isin(urban_ids)]
            urban_df = urban_df[['country', 'admin', 'tree_cover_class', 'tof_ha']]
            
            # sum all urban ha >10
            urban_over10 = urban_df[urban_df.tree_cover_class != '0-9']
            urban_over10 = sum(urban_over10.tof_ha)

           
            table = table.append({'region': region, 
                                'country': country,
                                'lcc': 'Urban',
                                'ha_over10': urban_over10},
                                ignore_index=True)
            
    #table.to_csv(dst_file, index=False)
    print(f'Total ha of cropland >10%: {table.groupby("lcc")["ha_over10"].sum()[0]}')
    print(f'Total ha of urban land >10%: {table.groupby("lcc")["ha_over10"].sum()[1]}')
    print(f'Total ha of land >10%: {table["ha_over10"].sum()}')
    
    return table


In [9]:
results = total_ha(None, ['Guatemala'])

Total ha of cropland >10%: 1550405
Total ha of urban land >10%: 52494
Total ha of land >10%: 1602899


In [22]:
results = total_ha('statistics/results_totalha.csv', ['lat_am', 'caribbean', 'west_af', 'east_af', 'central_south_af'])

Total ha of cropland >10%: 321516190
Total ha of urban land >10%: 4489620


In [28]:
def total_ha_pantropics(regions):
    
    table = pd.DataFrame(columns=['region', 'country', 'ha_over10'], dtype=object)
    #regions = ['lat_am', 'caribbean', 'west_af', 'east_af', 'central_south_af']
    
    for region in regions:
        
        region_df = pd.read_csv(f'stats/{region}.csv')

        for country in sorted(set(list(region_df.country.values))):

            country_df = region_df[region_df.country == country]
            
            over10 = country_df[country_df.tree_cover_class != '0-9']
            over10 = sum(over10.tof_ha)
            
            table = table.append({'region': region, 
                                 'country': country,
                                 'ha_over10': over10},
                                  ignore_index=True)
    
    #table.to_csv(dst_file, index=False)
        print(f'Total km of land >10% in {region}: {table.ha_over10.sum() / 1e2}')

    return table

In [29]:
total = total_ha_pantropics(['Mali'])
total = total_ha_pantropics(['Guatemala'])
total = total_ha_pantropics(['Tanzania'])

Total km of land >10% in Mali: 209734.66
Total km of land >10% in Guatemala: 86630.13
Total km of land >10% in Tanzania: 600981.16


## Table 3
The extent of trees on, and tree cover of, urban and cropland within the tropics. 

In [5]:
def main_table3(dst_file):
    
    '''
    Creates a table illustrating the following metrics for cropland and urban areas in each region:
    1) Average tree cover per land cover class.
    2) % cropland and urban land containing >10% tree cover.
    
    Saves table as csv file and returns as df.
    '''
    df = pd.read_csv('statistics/si_table1.csv')
    
    region_avg = df.groupby(['region', 'lcc']).mean().reset_index()
    region_avg = region_avg.round(2)
    
    region_avg.to_csv(dst_file, index=False)
    
    return region_avg

In [6]:
df = main_table3('statistics/main_table3.csv')

In [7]:
df

Unnamed: 0,region,lcc,perc_over10,avg
0,asia,Cropland,65.86,37.4
1,asia,Urban,63.26,26.99
2,caribbean,Cropland,65.07,35.48
3,caribbean,Urban,71.45,27.23
4,central_south_af,Cropland,51.45,29.69
5,central_south_af,Urban,50.73,17.75
6,east_af,Cropland,52.2,23.01
7,east_af,Urban,44.61,14.96
8,lat_am,Cropland,55.23,29.53
9,lat_am,Urban,53.27,21.71


In [10]:
# calculate average cropland >10% in tropics
cropland = df[df.lcc == 'Cropland']
mean_cropland = cropland.perc_over10.mean()
mean_cropland

57.346666666666664

In [11]:
# calculate average urban >10% in tropics
urban = df[df.lcc == 'Urban']
mean_urban = urban.perc_over10.mean()
mean_urban

54.803333333333335

In [21]:
def main_table1(region):
    
    '''
    Creates a table illustrating the following metrics for cropland and urban areas in each region:
    1) Average tree cover per land cover class.
    2) % cropland and urban land containing >10% tree cover.
    
    Saves table as csv file and returns as df.
    '''
    
    df = pd.read_csv(f'statistics/{region}.csv')
    
    # filter df to one line per lcc, per admin, per country
    df = df[['country', 'admin', 'esa_id', 'esa_class', 'tof_mean']].drop_duplicates(ignore_index=True)

    # get the average tree cover per country
    country_avg = df[['country', 'tof_mean']].groupby('country').mean().reset_index()
    
    # get the regional average
    regional_avg = round(country_avg.tof_mean.mean(), 2)
    
    return country_avg, regional_avg


In [54]:
la_country_avg, la_regional_avg = avg_tree_cover_regional('lat_am')

In [55]:
la_regional_avg

37.52

In [56]:
la_country_avg

Unnamed: 0,country,tof_mean
0,Belize,33.171111
1,Costa Rica,38.20719
2,El Salvador,37.85995
3,Guatemala,41.923313
4,Honduras,39.987024
5,Nicaragua,36.550468
6,Panama,45.803624
7,Paraguay,26.634319


In [57]:
wa_country_avg, wa_regional_avg = avg_tree_cover_regional('west_af')

In [58]:
wa_regional_avg

27.37

In [59]:
wa_country_avg

Unnamed: 0,country,tof_mean
0,Benin,19.830355
1,Burkina Faso,17.107922
2,Gambia,33.805464
3,Ghana,21.394469
4,Guinea,35.784328
5,Guinea Bissau,38.091484
6,Ivory Coast,39.294233
7,Liberia,55.8272
8,Mali,16.698218
9,Mauritania,9.144545


In [60]:
ea_country_avg, ea_regional_avg = avg_tree_cover_regional('east_af')

In [61]:
ea_regional_avg

23.29

In [62]:
ea_country_avg

Unnamed: 0,country,tof_mean
0,Eritrea,15.137423
1,Ethiopia,23.946181
2,Kenya,29.861059
3,Madagascar,25.360762
4,Rwanda,27.78413
5,South Sudan,25.552124
6,Sudan,15.404814


## Discussion

### Urban tree cover

In [18]:
# use SI table to display results highest avg tree cover in urban areas (top 20)
urban = si[si.lcc == 'Urban']
top20 = urban.sort_values(by='perc_over10', ascending=False)[:20]
top20[top20.region == 'asia']

Unnamed: 0,region,country,lcc,perc_over10,avg
173,asia,Sri Lanka,Urban,88.76,39.01
179,asia,Vanuatu,Urban,88.7,55.38
171,asia,Solomon Islands,Urban,85.94,57.13
141,asia,Brunei,Urban,79.88,29.8
153,asia,India Islands,Urban,78.16,40.1
157,asia,Laos,Urban,76.83,22.34
177,asia,Thailand,Urban,74.87,28.1
161,asia,Myanmar,Urban,74.67,31.57
139,asia,Bangladesh,Urban,73.69,31.35


In [19]:
top20[(top20.region == 'lat_am') | (top20.region == 'caribbean')]

Unnamed: 0,region,country,lcc,perc_over10,avg
31,lat_am,Paraguay,Urban,84.61,28.23
43,caribbean,Dominican Republic,Urban,79.94,28.34
41,caribbean,Cuba,Urban,77.45,25.01
39,caribbean,Caribbean,Urban,76.5,34.08
17,lat_am,El Salvador,Urban,76.29,32.49
49,caribbean,Jamaica,Urban,75.98,33.2
3,lat_am,Belize,Urban,73.91,23.01
27,lat_am,Nicaragua,Urban,73.23,24.47
