## trends in atmospheric carbon dioxide

Mauna Loa CO2 concentration.  
data from [NOAA](https://gml.noaa.gov/ccgg/trends/data.html)

In [None]:
#| code-summary: "load csv"
url = "https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_weekly_mlo.csv"
# df = pd.read_csv(url, header=47, na_values=[-999.99])

# you can first download, and then read the csv
filename = "co2_weekly_mlo.csv"
df = pd.read_csv(filename,
                 comment='#',  # will ignore rows starting with #
                 na_values=[-999.99]  # substitute -999.99 for NaN (Not a Number), data not available
                 )
df['date'] = pd.to_datetime(df[['year', 'month', 'day']])
df = df.set_index('date')
df

The original data has a `decimal` column, showing the date as a decimal number instead of `YYYY-MM-DD`. This will turn out to be very useful to us in a little while. The code below calculates our own column with the decimal representation of the date, it is nice to know how to compute it.

In [None]:
#| code-fold: false

def toYearFraction(date):
    """
    source: https://stackoverflow.com/a/6451892
    This function accepts one date and returns its decimal representation
    This works also with leap years.
    """
    def sinceEpoch(date): # returns seconds since epoch (1970-01-01)
        return time.mktime(date.timetuple())
    s = sinceEpoch  # renaming of the function for convenience

    year = date.year
    startOfThisYear = dt(year=year, month=1, day=1)
    startOfNextYear = dt(year=year+1, month=1, day=1)

    yearElapsed = s(date) - s(startOfThisYear)
    yearDuration = s(startOfNextYear) - s(startOfThisYear)
    fraction = yearElapsed/yearDuration

    return date.year + fraction

# add new column to df
df['decimal_date'] = [toYearFraction(x) for x in df.index]
# we have too many columns, keep only average and decimal_date
columns_to_keep = ['average', 'decimal_date']  # Add the column names you want to keep here
df = df.drop(columns=df.columns.difference(columns_to_keep))
df = df.rename(columns={'average': 'co2'})
df

In [None]:
fig, ax = plt.subplots(figsize=(8,5))
ax.plot(df['co2'])
ax.set(ylabel="CO2 concentration (ppm)",
       title="Mauna Loa CO2 concentration");

## trend

In [None]:
df['trend365D'] = df['co2'].rolling('365D', center=True).mean()
df['co2_detrended'] = df['co2'] - df['trend365D']

In [None]:
fig, ax = plt.subplots(figsize=(8,5))
ax.plot(df['co2'])
ax.plot(df['trend365D'])
ax.set(ylabel="CO2 concentration (ppm)",
       title="Mauna Loa CO2 concentration");

In [None]:
fig, ax = plt.subplots(figsize=(8,5))
ax.plot(df['co2_detrended'])

In [None]:
gb_year = df.groupby(df.index.year)

In [None]:
# %matplotlib widget

fig, ax = plt.subplots(1, figsize=(8,4))

colors = plt.cm.hot_r(np.linspace(0.3, 0.8, len(gb_year)))

# Plot 'co2' for each year
for i, (year, data) in enumerate(gb_year):
    ax.plot(data.index.day_of_year, data['co2'].values, color=colors[i])

for y in [1974, 1984, 1994, 2004, 2014, 2023]:
    this_year = gb_year.get_group(y)
    ax.text(370, this_year['co2'][-1], f"{y}")

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)


In [None]:
# %matplotlib widget

fig, ax = plt.subplots(1, figsize=(8,4))

colors = plt.cm.hot_r(np.linspace(0.3, 0.8, len(gb_year)))

for i, (year, data) in enumerate(gb_year):
    ax.plot(data.index.day_of_year, data['co2_detrended'], color=colors[i])

gb_year['co2_detrended'].mean()

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)


In [None]:
gb_year['co2_detrended'].mean()

fill missing data. interpolate method: 'time'  
[interpolation methods visualized](https://thepythonyouneed.com/how-to-interpolate-values-with-pandas/)

In [None]:
df['co2'] = (df['average'].resample("D") #resample daily
                          .interpolate(method='time') #interpolate by time
            )
df

In [None]:
# %matplotlib widget

decomposed = decomposed_m

fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].plot(df['co2'], color="tab:blue", label="observed")
ax[0].plot(decomposed.trend * decomposed.resid, color="tab:orange", label="trend*resid")
ax[0].plot(decomposed.trend * decomposed.seasonal, color="tab:red", label="trend*seasonal")
ax[0].plot(decomposed.trend, color="black", label="trend")
ax[0].set(ylabel="CO$_2$ concentration (ppm)",
          title="Mauna Loa CO$_2$ concentration")
ax[0].legend(frameon=False)

start = "2000-01-01"
end = "2003-01-01"
zoom = slice(start, end)
ax[1].plot(df.loc[zoom, 'co2'], color="tab:blue", label="observed")
ax[1].plot((decomposed.trend * decomposed.resid)[zoom], color="tab:orange", label="trend*resid")
ax[1].plot((decomposed.trend * decomposed.seasonal)[zoom], color="tab:red", label="trend*seasonal")
ax[1].plot(decomposed.trend[zoom], color="black", label="trend")
date_form = DateFormatter("%Y")
ax[1].xaxis.set_major_formatter(date_form)
ax[1].xaxis.set_major_locator(mdates.YearLocator(1))
ax[1].set_title("Components, 2000--2003");
