In [None]:
import pandas as pd
from datetime import datetime, timedelta
from IPython.core.display import display, HTML

from src import *
pd.set_option('display.max_rows', None)

# Parameters

These parameters configure various things such as the relative location of data files, which date is being analysed
and the assumed generation period for an infection.

In [None]:
ALL_VIC_CASES='archive/2021-07-12/all-vic-cases.csv'
QUARANTINE='archive/2021-07-16/quarantine.csv'
GENERATION_DAYS=5
ENABLE_MELBOURNE_ANIMATION=False

TODAY=datetime.today().strftime("%Y-%m-%d")
LAST_WEEK=add_days(TODAY, -7)

RUN_DATE=TODAY
#RUN_DATE='2021-07-30'
PREV_DAY=add_days(RUN_DATE, -1)
NEXT_WEEK=add_days(RUN_DATE, 7)
PREV_WEEK=add_days(RUN_DATE, -7)
PREV_FORTNIGHT=add_days(RUN_DATE, -14)

NSW_6_MONTHS=f"archive/{RUN_DATE}/last-6-months-nsw.csv"
NSW_14_DAYS=f"archive/{RUN_DATE}/last-14-days-nsw.csv"

# Data Preparation

**sweep_downloads()** moves files from the $HOME/Downloads directory into today's archive directory. Thses should first be
downloaded from the "Last 14 days (new)" and "Last 6 months (true)" panels of the NSW Transmission Sources section of [covid19data.com.au](https://www.covid19data.com.au/nsw)

In [None]:
sweep_downloads(TODAY)

Next, we load the data from the files and merge the 14-day data with the 6 month data.

In [None]:
nsw_df = update_df(load_data(NSW_6_MONTHS),load_data(NSW_14_DAYS))
#nsw_df = append_recent_date(nsw_df, "2021-07-31", 210)
vic_df = load_vic_data(ALL_VIC_CASES)
quarantine_df = load_quarantine(QUARANTINE)

Next, we truncate and refindex the data frames to each outbreak.

In [None]:
avalon = select_outbreak(nsw_df[(nsw_df['date'] >= '2020-12-17') & (nsw_df['date'] <= '2021-01-16')], generation_days=GENERATION_DAYS)
bondi = select_outbreak(nsw_df[(nsw_df['date'] >= '2021-06-17')], generation_days=GENERATION_DAYS)
vic_outbreak = select_outbreak(vic_df[(vic_df['date'] >= '2020-05-27') & (vic_df['date'] <= '2020-10-29')], generation_days=GENERATION_DAYS)

# Growth Model

- **date** - the reporting date
- **cumulative** - the cumulastive cases to the reporting date. This is usually the day after the cases were "notified".
- **total** - the number of cases reported on the reporting date
- **ols-growth-rate** - growth rate obtained from exponential fit to previous 5-days cumulative amounts expressed as a percentage change of cumulative cases per day
- **ols-growth-rate-min** - minimum ols-growth-rate statistic calculated in previous 5 days
- **ols-growth-rate-decay** - an expoenential fit to the ols-growth-rate statistic in the previous 5 days expressed as a percentage change of the ols-growth-date per day
- **doubling-period** - doubling period (in days) implied by ols-growth-rate
- **Reff** - implied Reff, assuming a 5 day generation period

In [None]:
display(bondi[[
    "date",
    "cumulative", 
    "total", 
    "ols-growth-rate", 
    "ols-growth-rate-min", 
    "ols-growth-rate-decay",  
    "doubling-period",
    "Reff"
]].tail(21))

# 7 Day Model

This section presents the 7-day oldl 7-day forward projection, e.g. the prediction for the current date and a comparison with what actually happened.

- **date** - the reporting date
- **cumulative** - the cumulastive cases to the reporting date. This is usually the day after the cases were "notified".
- **total** - the number of cases reported on the reporting date
- **7-day-delta** - 7-day change in cumulative cases
- **7-day-projection** - the 7-day old projection of the cumulative total for the reporting date
- **7-day-projection-error** - the error in 7-day old projection. +ve is projection overshoot.
- **7-day-projection-relative-error** - the relative error in the projected cumulative change vs the actual cumulative change expressed as a percentage

In [None]:
display(bondi[[
    "date",
    "cumulative", 
    "total", 
    "7-day-delta",
    "7-day-projection",
    "7-day-projection-error",
    "7-day-projection-relative-error",
]].tail(21))

# 1-day Projections (Past and Present)

- **date** - the reporting date
- **cumulative** - the cumulastive cases to the reporting date. This is usually the day after the cases were "notified".
- **total** - the number of cases reported on the reporting date
- **one-day-projection-cumulative** - the 1-day cumulative projection
- **one-day-projection-total** - the 1-day total projection
- **one-day-error** - the difference between the 1-day projection and the actual
- **one-day-relative** - the ratio of error of the projection from the daily total expressed as a percentage.


In [None]:
display(bondi[[
    "date",
    "cumulative", 
    "total", 
    "one-day-projection-cumulative", 
    "one-day-projection-total",
    "one-day-error", 
    "one-day-relative-error", 
]].tail(21))

In [None]:
output=pd.DataFrame(columns=['cumulative', 'min', 'vic'])
output[['min', 'cumulative']] = bondi[['min','cumulative']]
output=output.reindex([r for r in range(0, len(bondi)+14)])
output['vic'] = vic_outbreak['cumulative']
x=11
output['vic-offset'] = vic_outbreak['cumulative'].shift(-x)
ax=output.plot(figsize=(10,10))
ax.set_yscale('log')
ax.grid()
ax.set_title(f'Cumulative Case Growth Projections ({RUN_DATE})')
ax.legend([
    'Sydney (2021) - actual', 
    'Sydney (2021) - model', 
    'Melbourne (2020)',
    f'Melbourne (2020) +{x} days'
])
ax.figure.savefig(f'archive/{RUN_DATE}/cumulative-partial.png')
_=_

In [None]:
df=vic_outbreak
ax=df[['min', 'cumulative']].plot(figsize=(10,10))
#ax.set_yscale('log')
ax.grid()
ax.plot(bondi['cumulative'])
ax.plot(bondi['min'])
ax.legend(['model (Melbourne 2020) ', 'cumulative (Melbourne 2020)','cumulative (Sydney 2021)',  'model (Sydney 2021) '])
ax.set_title("7 Day Projection vs Actual (Melbourne 2020, Sydney 2021)")
ax.figure.savefig(f'archive/{RUN_DATE}/cumulative-full.png')
_=_

In [None]:
VIC_EXTRA_DAYS=0

vic_growth_params=derive_growth_params(vic_outbreak[(vic_outbreak.index >= 70) & (vic_outbreak.index < 120)], generation_days=GENERATION_DAYS)
bondi_growth_params=derive_growth_params(bondi.tail(8), generation_days=GENERATION_DAYS)
N=1
bondi_growth_params_3=derive_growth_params(bondi.tail(8+N).head(8), generation_days=GENERATION_DAYS)
gp=derive_growth_params(bondi[(bondi.index>15)])
bondi_projection_1=select_outbreak(project_ols_growth_rate_min(bondi, 84, vic_growth_params[1]))
bondi_projection_2=select_outbreak(project_ols_growth_rate_min(bondi, 110, gp[1]))
bondi_projection_3=select_outbreak(project_ols_growth_rate_min(bondi.head(len(bondi)-N), 84+N, gp[1]))


vic_partial=vic_outbreak.head(len(bondi)+VIC_EXTRA_DAYS)
vic_partial_growth_params=derive_growth_params(vic_partial)
vic_projection=select_outbreak(project_ols_growth_rate_min(vic_partial, len(vic_outbreak)-len(bondi)-VIC_EXTRA_DAYS, vic_partial_growth_params[1]), generation_days=GENERATION_DAYS)

In [None]:
gp=GrowthPlot(RUN_DATE)
gp.add(bondi, offset=0, legend="Sydney 2021")
#gp.add(avalon, offset=0, legend="Avalon 2020")
gp.add(vic_outbreak, offset=0, legend="Melbourne 2020")
gp.ax.plot(bondi['ols-growth-rate-min'],color="C3")
gp.ax.plot(vic_outbreak['ols-growth-rate-min'], color="C4")
gp.ax.plot(bondi_projection_1['ols-growth-rate-min'], linestyle='dotted', color='C4')
gp.ax.plot(bondi_projection_2['ols-growth-rate-min'], linestyle='dashed', color='C3')
gp.ax.plot(bondi_projection_3['ols-growth-rate-min'], linestyle='dotted', color='C5')
gp.ax.set_yscale('log')
gp.legend = gp.legend+[
    'Sydney 2021 (retrospective model)', 
    'Melbourne 2020 (retrospective model)', 
    'Sydney 2021 (projection - Melbourne 2020 decay)',
    f'Sydney 2021 (projection - Sydney 2021 decay ({RUN_DATE})',
    f'Sydney 2021 (projection - Sydney 2021 decay ({PREV_DAY})'
]
gp.ax.legend(gp.legend)
gp.ax.figure.savefig(f'archive/{RUN_DATE}/cumulative-growth.png')

#gp.add(vic_outbreak.shift(-11), offset=0, legend="Melbourne 2020 (shifted)")
_=_

In [None]:
output=pd.DataFrame()
output['vic'] = modeling_errors(vic_outbreak)
output["vic"]
output["bondi"] = modeling_errors(bondi)
#output["avalon"] = modeling_errors(avalon)
                                    
ax=output.loc[output.index >= 15, ['bondi', 'vic', ]].plot(figsize=(10,10))
ax.grid()                         
ax.set_title("Modelling Error % vs Day Of Outbreak")
ax.set_xlabel("Day Of Outbreak")
ax.set_ylabel("% overshoot of projection vs actual")
ax.legend(['Sydney 2011', 'Melbourne 2020'])
ax.figure.savefig(f'archive/{RUN_DATE}/modellng-error.png')
_=_

In [None]:
plot_vic=True

pp = PhasePlot(f"New Cases vs Error Modeling % - Sydney 2021 vs Melbourne 2020 - ({RUN_DATE})")

bondi_idx=pp.add(bondi, offset=20, legend="Sydney 2021", color="C0") # 20
if plot_vic:
    vic_idx=pp.add(vic_outbreak, offset=15, legend="Melbourne 2020", color="C1") # 15

pp.add_horizon(horizon(bondi, 7), legend=f"7-Day Projection for {NEXT_WEEK}", color="blue")
pp.add_horizon(horizon(bondi.head(len(bondi)-7), 7), legend=f"7-Day Projection for {RUN_DATE}", color="red")
# for i in range(0,7):
#     pp.add_horizon(horizon(bondi.head(len(bondi)-7-i),7), legend=f"today - as projected {7+i} days ago", color=f"C{i}")


if plot_vic:
    first_case=pp.frames[vic_idx].head(1)['date']
    last_case=pp.frames[vic_idx].tail(1)[['date', 'total', '7-day-projection-relative-error']]

    pp.add_label(vic_idx, "2020-08-04", "peak daily new cases")
    pp.add_label(vic_idx, "2020-07-16", "last model undershoot, prior to recovery")
    pp.add_label(vic_idx, "2020-08-02", "stage 4 restrictions announced")
    pp.add_label(vic_idx, "2020-07-15", "similar state (VIC)")
    pp.add_label(vic_idx, "2020-07-09", "stage 3 restrictions announced")
    pp.add_label(vic_idx, last_case.values[0][0], f"last day plotted (VIC)")
    pp.add_label(vic_idx, first_case.values[0], f"first day plotted (VIC)")

    # pp.add_label(vic_idx, "2020-08-09", "1 week after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-08-16", "2 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-08-23", "3 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-08-30", "4 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-09-06", "5 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-09-13", "6 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-09-20", "7 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-09-27", "8 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-10-03", "9 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-10-10", "10 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-10-17", "11 weeks after stage 4 restrictions announced")
    # pp.add_label(vic_idx, "2020-10-24", "12 weeks after stage 4 restrictions announced")

first_case_nsw=pp.frames[bondi_idx].head(1)['date']
pp.add_label(bondi_idx, RUN_DATE, "current state (NSW)")
pp.add_label(bondi_idx, PREV_WEEK, "a week ago (NSW)")
pp.add_label(bondi_idx, PREV_FORTNIGHT, "two weeks ago (NSW)")
pp.add_label(bondi_idx, first_case_nsw.values[0], f"first day plotted (NSW)")    
    
pp.ax.figure.savefig(f'archive/{RUN_DATE}/hedgehog.png')
_=_

In [None]:
gp=derive_growth_params(bondi[(bondi.index>15)])
ax=bondi["ols-growth-rate"].plot(figsize=(10,10))
ax.plot((gp[1]**bondi.index)*gp[0])
ax.set_title(f"Daily Cumulative Growth Rate % (Sydney 2021) ({RUN_DATE})")
ax.set_xlabel("Day Of Outbreak")
ax.set_ylabel("Daily Cumulative Growth Rate %")
#ax.set_yscale("log")
ax.legend(["observed", f"trend: y={round(gp[0],3)} * ({round(gp[1],3)}^x)"])
ax.grid()
ax.figure.savefig(f'archive/{RUN_DATE}/growth-rate-trend.png')
gp[1]

In [None]:
df=select_outbreak(project_ols_growth_rate_min(bondi, 200-len(bondi), gp[1]))
ax=plot_derivatives(df, len(bondi), "Sydney 2021")
ax.figure.savefig(f'archive/{RUN_DATE}/derivatives-sydney-partial.png')

In [None]:
df=vic_partial
gp=derive_growth_params(df[df.index>30])
ax=df["ols-growth-rate"].plot(figsize=(10,10))
ax.plot((gp[1]**bondi.index)*gp[0])
ax.set_title(f"Daily Cumulative Growth Rate % (Melbourne 2021) ({RUN_DATE})")
ax.set_xlabel("Day Of Outbreak")
ax.set_ylabel("Daily Cumulative Growth Rate %")
ax.legend(["observed", f"trend: y={round(gp[0],3)} * ({round(gp[1],3)}^x)"])
#ax.figure.savefig(f'archive/{RUN_DATE}/growth-rate-trend.png')

In [None]:
df=select_outbreak(project_ols_growth_rate_min(vic_partial, 110, gp[1]))
ax=plot_derivatives(df, len(vic_partial), dataset="Melbourne 2020")
#ax.figure.savefig(f'archive/{RUN_DATE}/derivatives-melbourne-partial.png')

In [None]:
ax=plot_derivatives(vic_outbreak, None, dataset="Melbourne 2020")
ax.figure.savefig(f'archive/{RUN_DATE}/derivatives-melbourne-full.png')

# Hedgehog Animation

In [None]:
%%capture phaseplot
animate_phaseplot(
    df=bondi, 
    outbreak="Sydney 2021", 
    fn=f'archive/{RUN_DATE}/animated-hedgehog-sydney2021.gif', 
    offset=15
)
if ENABLE_MELBOURNE_ANIMATION:
    animate_phaseplot(
        df=vic_outbreak, 
        offset=20, 
        outbreak="Melbourne 2020", 
        fn=f'archive/{RUN_DATE}/animated-hedgehog-melbourne2020.gif'
    )

In [None]:
display(HTML(f"<img src='animated-hedgehog-sydney2021.gif'>"))

In [None]:
display(HTML(f"<img src='../latest/animated-hedgehog-melbourne2020.gif'>"))

# Derivatives Animation

In [None]:
%%capture derivatives
animate_derivatives(
    bondi, 
    "Sydney 2021", 
    f'archive/{RUN_DATE}/animated-derivatives-sydney.gif', 
    derive_growth_params(bondi[(bondi.index>15)])[1]
)

if ENABLE_MELBOURNE_ANIMATION:
    animate_derivatives(
        vic_outbreak, 
        "Melbourne 2020", 
        f'archive/{RUN_DATE}/animated-derivatives-melbourne.gif', 
        derive_growth_params(vic_outbreak[(vic_outbreak.index>30)])[1]
    )
                    
                    

In [None]:
display(HTML(f"<img src='animated-derivatives-sydney.gif'>"))

In [None]:
display(HTML(f"<img src='../latest/animated-derivatives-melbourne.gif'>"))