## Setup and define emission variables

In [None]:
from google.cloud import bigquery
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

client = bigquery.Client(project='code-for-planet')

# Define emission-related variables to analyze
emission_vars = [
    'area_burned', 'prefire_fuel', 'consumed_fuel',
    'ECO2', 'ECO', 'ECH4', 'EPM2_5', 'cwd_frac', 'duff_frac'
]

## Basic counts - total records and null/zero patterns

In [None]:
basic_counts_query = """
SELECT
    COUNT(*) as total_records,
    COUNT(DISTINCT year) as years_covered,
    COUNT(DISTINCT global_fire_event_id) as unique_fire_events,
    MIN(year) as min_year,
    MAX(year) as max_year
FROM `code-for-planet.emission_db.emissions_db_final`
"""

basic_counts = client.query(basic_counts_query).to_dataframe()
basic_counts

Unnamed: 0,total_records,years_covered,unique_fire_events,min_year,max_year
0,7254534,13,1028764,2003,2015


## Null value analysis for each emission variable

In [None]:
null_analysis_query = f"""
SELECT
    {', '.join([f'COUNTIF({var} IS NULL) as {var}_nulls' for var in emission_vars])},
    {', '.join([f'COUNTIF({var} IS NULL) * 100.0 / COUNT(*) as {var}_null_pct' for var in emission_vars])},
    COUNT(*) as total_records
FROM `code-for-planet.emission_db.emissions_db_final`
"""

null_analysis = client.query(null_analysis_query).to_dataframe()
null_analysis

Unnamed: 0,area_burned_nulls,prefire_fuel_nulls,consumed_fuel_nulls,ECO2_nulls,ECO_nulls,ECH4_nulls,EPM2_5_nulls,cwd_frac_nulls,duff_frac_nulls,area_burned_null_pct,prefire_fuel_null_pct,consumed_fuel_null_pct,ECO2_null_pct,ECO_null_pct,ECH4_null_pct,EPM2_5_null_pct,cwd_frac_null_pct,duff_frac_null_pct,total_records
0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7254534


## Zero value analysis for each emission variable

In [None]:
zero_analysis_query = f"""
SELECT
    {', '.join([f'COUNTIF({var} = 0) as {var}_zeros' for var in emission_vars])},
    {', '.join([f'COUNTIF({var} = 0) * 100.0 / COUNT(*) as {var}_zero_pct' for var in emission_vars])},
    COUNT(*) as total_records
FROM `code-for-planet.emission_db.emissions_db_final`
"""

zero_analysis = client.query(zero_analysis_query).to_dataframe()
zero_analysis


Unnamed: 0,area_burned_zeros,prefire_fuel_zeros,consumed_fuel_zeros,ECO2_zeros,ECO_zeros,ECH4_zeros,EPM2_5_zeros,cwd_frac_zeros,duff_frac_zeros,area_burned_zero_pct,prefire_fuel_zero_pct,consumed_fuel_zero_pct,ECO2_zero_pct,ECO_zero_pct,ECH4_zero_pct,EPM2_5_zero_pct,cwd_frac_zero_pct,duff_frac_zero_pct,total_records
0,2312632,1293962,1293962,1293962,1293962,1293962,1293962,4472065,4472065,31.878436,17.836597,17.836597,17.836597,17.836597,17.836597,17.836597,61.645104,61.645104,7254534


## Combined null and zero analysis

In [None]:
combined_analysis_query = f"""
SELECT
    {', '.join([f'COUNTIF({var} IS NULL OR {var} = 0) as {var}_null_or_zero' for var in emission_vars])},
    {', '.join([f'COUNTIF({var} IS NULL OR {var} = 0) * 100.0 / COUNT(*) as {var}_null_or_zero_pct' for var in emission_vars])},
    {', '.join([f'COUNTIF({var} > 0) as {var}_positive_values' for var in emission_vars])},
    COUNT(*) as total_records
FROM `code-for-planet.emission_db.emissions_db_final`
"""

combined_analysis = client.query(combined_analysis_query).to_dataframe()
combined_analysis

Unnamed: 0,area_burned_null_or_zero,prefire_fuel_null_or_zero,consumed_fuel_null_or_zero,ECO2_null_or_zero,ECO_null_or_zero,ECH4_null_or_zero,EPM2_5_null_or_zero,cwd_frac_null_or_zero,duff_frac_null_or_zero,area_burned_null_or_zero_pct,...,area_burned_positive_values,prefire_fuel_positive_values,consumed_fuel_positive_values,ECO2_positive_values,ECO_positive_values,ECH4_positive_values,EPM2_5_positive_values,cwd_frac_positive_values,duff_frac_positive_values,total_records
0,2312632,1293962,1293962,1293962,1293962,1293962,1293962,4472065,4472065,31.878436,...,4941902,5960572,5960572,5960572,5960572,5960572,5960572,2782469,2782469,7254534


## Year-by-year patterns

In [None]:
yearly_patterns_query = f"""
SELECT
    year,
    COUNT(*) as records_per_year,
    {', '.join([f'COUNTIF({var} = 0) as {var}_zeros' for var in emission_vars])},
    {', '.join([f'COUNTIF({var} IS NULL) as {var}_nulls' for var in emission_vars])},
    {', '.join([f'COUNTIF({var} > 0) as {var}_positive' for var in emission_vars])}
FROM `code-for-planet.emission_db.emissions_db_final`
GROUP BY year
ORDER BY year
"""

yearly_patterns = client.query(yearly_patterns_query).to_dataframe()
yearly_patterns

Unnamed: 0,year,records_per_year,area_burned_zeros,prefire_fuel_zeros,consumed_fuel_zeros,ECO2_zeros,ECO_zeros,ECH4_zeros,EPM2_5_zeros,cwd_frac_zeros,...,duff_frac_nulls,area_burned_positive,prefire_fuel_positive,consumed_fuel_positive,ECO2_positive,ECO_positive,ECH4_positive,EPM2_5_positive,cwd_frac_positive,duff_frac_positive
0,2003,424715,167097,107362,107362,107362,107362,107362,107362,247882,...,0,257618,317353,317353,317353,317353,317353,317353,176833,176833
1,2004,239663,111449,75029,75029,75029,75029,75029,75029,132065,...,0,128214,164634,164634,164634,164634,164634,164634,107598,107598
2,2005,473253,156518,73425,73425,73425,73425,73425,73425,313665,...,0,316735,399828,399828,399828,399828,399828,399828,159588,159588
3,2006,813256,246124,97249,97249,97249,97249,97249,97249,581290,...,0,567132,716007,716007,716007,716007,716007,716007,231966,231966
4,2007,754083,253939,114044,114044,114044,114044,114044,114044,436808,...,0,500144,640039,640039,640039,640039,640039,640039,317275,317275
5,2008,523941,168336,83053,83053,83053,83053,83053,83053,318252,...,0,355605,440888,440888,440888,440888,440888,440888,205689,205689
6,2009,484933,155750,109092,109092,109092,109092,109092,109092,310850,...,0,329183,375841,375841,375841,375841,375841,375841,174083,174083
7,2010,378858,153504,108372,108372,108372,108372,108372,108372,226446,...,0,225354,270486,270486,270486,270486,270486,270486,152412,152412
8,2011,883243,204759,112988,112988,112988,112988,112988,112988,584428,...,0,678484,770255,770255,770255,770255,770255,770255,298815,298815
9,2012,731744,166475,64715,64715,64715,64715,64715,64715,431923,...,0,565269,667029,667029,667029,667029,667029,667029,299821,299821


## Statistical summary for positive values only

In [None]:
positive_stats_query = f"""
SELECT
    {', '.join([f'MIN(CASE WHEN {var} > 0 THEN {var} END) as {var}_min_positive' for var in emission_vars])},
    {', '.join([f'MAX({var}) as {var}_max' for var in emission_vars])},
    {', '.join([f'AVG(CASE WHEN {var} > 0 THEN {var} END) as {var}_mean_positive' for var in emission_vars])},
    {', '.join([f'APPROX_QUANTILES({var}, 100)[OFFSET(50)] as {var}_median_all' for var in emission_vars])},
    {', '.join([f'APPROX_QUANTILES(CASE WHEN {var} > 0 THEN {var} END, 100)[OFFSET(50)] as {var}_median_positive' for var in emission_vars])}
FROM `code-for-planet.emission_db.emissions_db_final`
"""

positive_stats = client.query(positive_stats_query).to_dataframe()
positive_stats

Unnamed: 0,area_burned_min_positive,prefire_fuel_min_positive,consumed_fuel_min_positive,ECO2_min_positive,ECO_min_positive,ECH4_min_positive,EPM2_5_min_positive,cwd_frac_min_positive,duff_frac_min_positive,area_burned_max,...,duff_frac_median_all,area_burned_median_positive,prefire_fuel_median_positive,consumed_fuel_median_positive,ECO2_median_positive,ECO_median_positive,ECH4_median_positive,EPM2_5_median_positive,cwd_frac_median_positive,duff_frac_median_positive
0,62500.0,3.810924,3.544159,5.943555,0.248091,0.009498,0.03615,0.002261,0.026048,62500.0,...,0.0,62500.0,1470.119976,1251.245086,1989.28649,92.652866,3.547281,13.500846,0.10764,0.173202


## Cross-variable zero patterns (which variables are zero together)

In [None]:
cross_zero_patterns_query = """
SELECT
    CASE WHEN area_burned = 0 THEN 'zero' ELSE 'positive' END as area_burned_status,
    CASE WHEN ECO2 = 0 THEN 'zero' ELSE 'positive' END as ECO2_status,
    CASE WHEN ECO = 0 THEN 'zero' ELSE 'positive' END as ECO_status,
    CASE WHEN ECH4 = 0 THEN 'zero' ELSE 'positive' END as ECH4_status,
    CASE WHEN consumed_fuel = 0 THEN 'zero' ELSE 'positive' END as consumed_fuel_status,
    COUNT(*) as record_count,
    COUNT(*) * 100.0 / SUM(COUNT(*)) OVER() as percentage
FROM `code-for-planet.emission_db.emissions_db_final`
GROUP BY 1,2,3,4,5
ORDER BY record_count DESC
"""

cross_zero_patterns = client.query(cross_zero_patterns_query).to_dataframe()
cross_zero_patterns

Unnamed: 0,area_burned_status,ECO2_status,ECO_status,ECH4_status,consumed_fuel_status,record_count,percentage
0,positive,positive,positive,positive,positive,4761421,65.633726
1,zero,positive,positive,positive,positive,1199151,16.529676
2,zero,zero,zero,zero,zero,1113481,15.34876
3,positive,zero,zero,zero,zero,180481,2.487837


## Regional patterns (by covertype and fuelcode)


In [None]:
regional_patterns_query = """
SELECT
    covertype,
    fuelcode,
    COUNT(*) as total_records,
    COUNTIF(ECO2 = 0) as ECO2_zeros,
    COUNTIF(area_burned = 0) as area_burned_zeros,
    COUNTIF(ECO2 = 0) * 100.0 / COUNT(*) as ECO2_zero_pct,
    COUNTIF(area_burned = 0) * 100.0 / COUNT(*) as area_burned_zero_pct,
    AVG(CASE WHEN ECO2 > 0 THEN ECO2 END) as avg_ECO2_when_positive,
    AVG(CASE WHEN area_burned > 0 THEN area_burned END) as avg_area_burned_when_positive
FROM `code-for-planet.emission_db.emissions_db_final`
GROUP BY covertype, fuelcode
HAVING COUNT(*) >= 100
ORDER BY ECO2_zero_pct DESC
"""


regional_patterns = client.query(regional_patterns_query).to_dataframe()
regional_patterns

Unnamed: 0,covertype,fuelcode,total_records,ECO2_zeros,area_burned_zeros,ECO2_zero_pct,area_burned_zero_pct,avg_ECO2_when_positive,avg_area_burned_when_positive
0,0,0,134288,134288,85127,100.0,63.391368,,62500.0
1,95,0,145703,145703,71562,100.0,49.11498,,62500.0
2,90,0,15677,15677,10503,100.0,66.996237,,62500.0
3,22,0,10864,10864,10093,100.0,92.903166,,62500.0
4,82,0,845208,845208,827580,100.0,97.91436,,62500.0
5,21,0,25416,25416,21007,100.0,82.65266,,62500.0
6,81,0,110375,110375,81475,100.0,73.816535,,62500.0
7,23,0,5036,5036,4783,100.0,94.976172,,62500.0
8,24,0,1369,1369,1347,100.0,98.392988,,62500.0
9,2,2,1523239,26,250994,0.001707,16.477651,1064.753764,62500.0


## Fire event level analysis

In [None]:
fire_event_analysis_query = """
SELECT
    COUNTIF(all_zero_emissions) as events_with_all_zero_emissions,
    COUNTIF(mixed_emissions) as events_with_mixed_emissions,
    COUNTIF(all_positive_emissions) as events_with_all_positive_emissions,
    COUNT(*) as total_fire_events
FROM (
    SELECT
        global_fire_event_id,
        COUNTIF(ECO2 = 0 AND ECO = 0 AND ECH4 = 0) = COUNT(*) as all_zero_emissions,
        COUNTIF(ECO2 > 0 AND ECO > 0 AND ECH4 > 0) = COUNT(*) as all_positive_emissions,
        COUNTIF(ECO2 = 0 AND ECO = 0 AND ECH4 = 0) > 0
        AND COUNTIF(ECO2 > 0 OR ECO > 0 OR ECH4 > 0) > 0 as mixed_emissions
    FROM `code-for-planet.emission_db.emissions_db_final`
    WHERE global_fire_event_id IS NOT NULL
    GROUP BY global_fire_event_id
)
"""

fire_event_analysis = client.query(fire_event_analysis_query).to_dataframe()
fire_event_analysis

Unnamed: 0,events_with_all_zero_emissions,events_with_mixed_emissions,events_with_all_positive_emissions,total_fire_events
0,255751,70834,702179,1028764


## Temporal patterns within fire events

In [None]:
temporal_patterns_query = """
SELECT
    days_from_event_start,
    COUNT(*) as total_records,
    COUNTIF(ECO2 = 0) as ECO2_zeros,
    COUNTIF(area_burned = 0) as area_burned_zeros,
    COUNTIF(ECO2 = 0) * 100.0 / COUNT(*) as ECO2_zero_pct,
    AVG(CASE WHEN ECO2 > 0 THEN ECO2 END) as avg_ECO2_when_positive
FROM `code-for-planet.emission_db.emissions_db_final`
WHERE days_from_event_start IS NOT NULL
AND days_from_event_start BETWEEN 0 AND 30
GROUP BY days_from_event_start
ORDER BY days_from_event_start
"""

temporal_patterns = client.query(temporal_patterns_query).to_dataframe()
temporal_patterns

Unnamed: 0,days_from_event_start,total_records,ECO2_zeros,area_burned_zeros,ECO2_zero_pct,avg_ECO2_when_positive
0,0,3592217,716706,1159770,19.951634,2502.14156
1,1,1000288,118936,261115,11.890176,2421.530388
2,2,643644,101787,197736,15.814177,2876.068565
3,3,426310,71218,134063,16.705684,3122.575203
4,4,329401,57008,102563,17.306566,3244.972253
5,5,237649,43217,81714,18.185223,3673.78525
6,6,169550,36296,61414,21.407254,4131.068915
7,7,140512,26934,51312,19.16847,4353.976923
8,8,108968,20935,40460,19.212062,4392.6513
9,9,86472,16912,32820,19.557776,4598.356631
