In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle

sns.set_style('whitegrid')

In [None]:
data_dir = '../data/'

In [None]:
def get_files_in_dir(directory, return_dirs=False, verbose=False):
    if not os.path.exists(directory):
        raise Exception(f'{directory} does not exist!')
    for (path, dirs, files) in os.walk(directory):
        if verbose:
            print('path: ', path)
            print('dirs', dirs)
            print('files')
            for i, file in enumerate(files):
                print('\t', i, file)
        break
    return files if not return_dirs else (files, dirs)


In [None]:
files = get_files_in_dir(data_dir)
for i, fn in enumerate(files):
    print(i, fn)

## Berkeley Earth

http://berkeleyearth.org/data/

In [None]:
ind = 1
fn = data_dir + files[ind]
fn

In [None]:
with open(fn) as fo:
    lines = fo.readlines()

lines[:8]

In [None]:
len(lines)

In [None]:
cols = " Year, Month, Monthly Anomaly, Monthly Unc., Annual Anomaly, Annual Unc., Five-year Anomaly, Five-year Unc., Ten-year Anomaly, Ten-year Unc., Twenty-year Anomaly, Twenty-year Unc.".split(',')
cols = [
    col[1:].replace('.','ertainty').lower().replace(' ', '_').replace('-', '_')
    for col in cols
]
cols

In [None]:
lines = [line.strip().replace("\n", "") for line in lines if line[0] not in "%"]
lines[:8]

In [None]:
len(lines)

In [None]:
data_arr = [
    [entry for entry in line.split(' ') if len(entry) > 0]
    for line in lines
]

In [None]:
assert np.array(data_arr).shape[-1] == len(cols), 'data was read incorrectly, not every column was read on every row'

In [None]:
df_raw = pd.DataFrame(data_arr, columns=cols)
dts = [int]*2 + [float]*10
for i, col in enumerate(cols):
    df_raw[col] = df_raw[col].astype(dts[i])
df_raw

In [None]:
df_annual = df_raw.groupby('year').mean().drop(columns=['month', 'monthly_anomaly', 'monthly_uncertainty']).reset_index()
df_annual

In [None]:
plt.figure(figsize=(16,8))
sns.lineplot(data=df_annual, x='year', y='annual_anomaly')

In [None]:
df_annual.iloc[:20]['twenty_year_anomaly'].dropna()

In [None]:
preindustrial = df_annual.iloc[:20]['twenty_year_anomaly'].dropna().iloc[0]
preindustrial

In [None]:
plt.figure(figsize=(16,8))
sns.lineplot(data=df_annual, x='year', y='twenty_year_anomaly')

In [None]:
plt.figure(figsize=(16,8))
sns.barplot(data=df_annual, x='year', y='annual_anomaly')

## Met Office Hadley Centre observations datasets
https://www.metoffice.gov.uk/hadobs/hadcrut5/

In [None]:
files = get_files_in_dir(data_dir)
for i, fn in enumerate(files):
    print(i, fn)

In [None]:
ind = 0
fn = data_dir + files[ind]
fn

In [None]:
df_moh = pd.read_csv(fn)
df_moh

In [None]:
cols_moh = df_moh.columns.tolist()
cols_moh = [col.lower().split(' ')[0] for col in cols_moh]
df_moh.columns = cols_moh
df_moh

In [None]:
plt.figure(figsize=(16,8))
sns.lineplot(data=df_moh, x='time', y='anomaly')

## Create climate stripes

In [None]:
color2rgb = {
    #'dark_red': (134,17,23,255),
    'dark red': (134,17,23,255),
    'red': (169,0,31,255),
    #'dark rose': (173,94,97,255),
    'pink': (240,36,99,255),
    'orange': (233,105,61,255),
    #'mustard yellow': (219,163,44,255),
    'coral': (228,106,109,255),
    'pastelpink': (238,197,218,255),
    'naturalwhite': (255,246,229,255),
    #'white': (255,255,255,255),
    'aqua': (169,214,209,255),
    #'light blue': (185,206,227,255),
    'turqoise': (76,172,197,255),
    'antique blue': (77,107,157,255),
    'denim': (0,52,98,255),
    'dark blue': (77,107,157,255),
    'black': (0,0,0,255),
    'black1': (0,0,0,255),
}

colors = list(color2rgb.keys())[::-1]

color2code = {
    'dark_red': 65,
    'dark red': 65,
    'red': 64,
    'dark rose': 63,
    'pink': 75,
    'orange': 58,
    'mustard yellow': 71,
    'coral': 62,
    'pastelpink': 90,
    'naturalwhite': 52,
    'aqua': 59,
    'light blue': 68,
    'turqoise': 76,
    'antique blue': 69,
    'denim': 72,
    'dark blue': 74,
    'black': 70,
    'black1': 70,
}

In [None]:
barplot = False
add_preindustrial = False

In [None]:
if add_preindustrial:
    df_moh['preindustrial_anomaly'] = df_moh.anomaly + preindustrial

In [None]:
mi, ma = df_moh.anomaly.min(), df_moh.anomaly.max()
mi, ma

In [None]:
n_colors = len(colors)
colspace = np.linspace(-.8, .8, n_colors-1)

In [None]:
df_moh['color'] = df_moh.anomaly.copy()
for i in list(range(n_colors-1))[::-1]:
    print(i, colspace[i], colors[i])
    df_moh['color'] = df_moh.color.where(df_moh.anomaly >= colspace[i], colors[i])
df_moh['color'] = df_moh.color.where(df_moh.anomaly < colspace[-1], colors[-1])

df_moh

In [None]:
df_moh['color_rgb'] = df_moh.color.apply(lambda c: tuple(np.array(color2rgb[c])/255))
df_moh

In [None]:
def get_stripes(barplot=False):
    stripes = []

    dx = 0.5
    for i, row in df_moh.iterrows():
        year = row['time']
        anom = row['anomaly']
        if barplot:
            ly = min(anom, 0)
            h = anom if anom > 0 else abs(anom)
        else:
            ly=0
            h=(2020-1850)/220*140
        rgb = row['color_rgb']
        #print(year, anom, ly, dx, h, rgb)
        stripes.append(Rectangle(
            (year-dx, ly), 2*dx, h,
            color=rgb,
            linewidth=0.2,
        ))

    return stripes

In [None]:
fig, ax = plt.subplots(1,1,figsize=(16,8))

ax.plot([1850, 2021], [0,0], color='k', lw=.4, alpha=0.5)

stripes = get_stripes(barplot)

for stripe in stripes:
    ax.add_patch(stripe)

plt.show()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(16,8))

ax.plot([1850, 2021], [0,0], color='k', lw=.4, alpha=0.5)

stripes = get_stripes(not barplot)

ax.axvline(1961, color='k', lw=0.5, ls='--', label='1961')
ax.axvline(1990, color='k', lw=0.5, ls='-.', label='1990')

for stripe in stripes:
    ax.add_patch(stripe)
plt.ylim(mi*1.2, ma*1.2)
plt.legend()
plt.show()

In [None]:
colorcodes = []
for i, row in df_moh.iterrows():
    c = row['color']
    colorcodes.append((c, color2code[c]))

In [None]:
linew = round(220/170, 1)
linew

In [None]:
linew * 170

In [None]:
colorwidths = []

for i, (c, code) in enumerate(colorcodes):
    #print(c, code)
    if i == 0:
        c_prev = c
        n = 1
    elif len(c) == len(c_prev) and c in c_prev:
        n += 1
    else:
        colorwidths.append((c_prev, color2code[c_prev], round(n*linew, 3)))
        n = 1
    c_prev = c

    if i+1 == len(colorcodes):
        print(c, code, color2code[c])
        colorwidths.append((c, code, round(n*linew, 3)))

In [None]:
colorwidths[-20:]

In [None]:
sum([
    r[-1] for r in colorwidths
])

In [None]:
df_moh.iloc[-20:]

In [None]:
for color, code, w in colorwidths:
    tstr = '\t' * (2 - len(color) // 8)
    print(f'{color}{tstr}{code} - {w}cm')