In [1]:
## imports
using CSV;
using DataFrames;
using Interact;
using Plots;
using Dates

In [2]:
url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"

download(url, "covid_data.csv")

data = CSV.read("covid_data.csv", DataFrame)
rename!(data, 1 => "province", 2 => "country")

Unnamed: 0_level_0,province,country,Lat,Long,1/22/20
Unnamed: 0_level_1,String?,String,Float64?,Float64?,Int64
1,missing,Afghanistan,33.9391,67.71,0
2,missing,Albania,41.1533,20.1683,0
3,missing,Algeria,28.0339,1.6596,0
4,missing,Andorra,42.5063,1.5218,0
5,missing,Angola,-11.2027,17.8739,0
6,missing,Antigua and Barbuda,17.0608,-61.7964,0
7,missing,Argentina,-38.4161,-63.6167,0
8,missing,Armenia,40.0691,45.0382,0
9,Australian Capital Territory,Australia,-35.4735,149.012,0
10,New South Wales,Australia,-33.8688,151.209,0


In [3]:
all_countries = data[:, "country"];

In [42]:
#test that webio is working correctly
width, height = 700, 300
colors = ["black", "gray", "silver", "maroon", "red", "olive", "yellow", "green", "lime", "teal", "aqua", "navy", "blue", "purple", "fuchsia"]
color(i) = colors[i%length(colors)+1]
ui = @manipulate for nsamples in 1:200,
        sample_step in slider(0.01:0.01:1.0, value=0.1, label="sample step"),
        phase in slider(0:0.1:2pi, value=0.0, label="phase"),
        radii in 0.1:0.1:60
    cxs_unscaled = [i*sample_step + phase for i in 1:nsamples]
    cys = sin.(cxs_unscaled) .* height/3 .+ height/2
    cxs = cxs_unscaled .* width/4pi
    dom"svg:svg[width=$width, height=$height]"(
        (dom"svg:circle[cx=$(cxs[i]), cy=$(cys[i]), r=$radii, fill=$(color(i))]"()
            for i in 1:nsamples)...
    )
end

In [8]:
#did anyone find a built in julia thing to do this? couldn't find anything in the docs
function element_inclusion(item, tuple)
    if item in tuple
        return true
    else
        return false
    end
end

element_inclusion (generic function with 1 method)

In [9]:
#select countries
selected_countries = ["China", "Japan", "Korea, South", "US", "United Kingdom", "France", "Germany"]
countries_for_vis = data[element_inclusion.(all_countries, (selected_countries,)),:]

Unnamed: 0_level_0,province,country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20
Unnamed: 0_level_1,String?,String,Float64?,Float64?,Int64,Int64,Int64,Int64
1,Anhui,China,31.8257,117.226,1,9,15,39
2,Beijing,China,40.1824,116.414,14,22,36,41
3,Chongqing,China,30.0572,107.874,6,9,27,57
4,Fujian,China,26.0789,117.987,1,5,10,18
5,Gansu,China,35.7518,104.286,0,2,2,4
6,Guangdong,China,23.3417,113.424,26,32,53,78
7,Guangxi,China,23.8298,108.788,2,5,23,23
8,Guizhou,China,26.8154,106.875,1,3,3,4
9,Hainan,China,19.1959,109.745,4,5,8,19
10,Hebei,China,39.549,116.131,1,1,2,8


In [10]:
date_strings = String.(names(data))[5:end];
numdays = length(date_strings)

347

In [11]:
format = Dates.DateFormat("m/d/Y");
dates = parse.(Date, date_strings, format) .+ Year(2000)

347-element Array{Date,1}:
 2020-01-22
 2020-01-23
 2020-01-24
 2020-01-25
 2020-01-26
 2020-01-27
 2020-01-28
 2020-01-29
 2020-01-30
 2020-01-31
 2020-02-01
 2020-02-02
 2020-02-03
 ⋮
 2020-12-22
 2020-12-23
 2020-12-24
 2020-12-25
 2020-12-26
 2020-12-27
 2020-12-28
 2020-12-29
 2020-12-30
 2020-12-31
 2021-01-01
 2021-01-02

In [31]:
#combine states under each country 
summarised_covid_vis = combine(groupby(countries_for_vis, "country"), date_strings .=> sum .=> date_strings)

Unnamed: 0_level_0,country,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20
Unnamed: 0_level_1,String,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64
1,China,548,643,920,1406,2075,2877,5509,6087
2,France,0,0,2,3,3,3,4,5
3,Germany,0,0,0,0,0,1,4,4
4,Japan,2,2,2,2,4,4,7,7
5,"Korea, South",1,1,2,2,3,4,4,4
6,US,1,1,2,2,5,5,5,6
7,United Kingdom,0,0,0,0,0,0,0,0


### Problem 1

In [47]:
@manipulate for range in 1:numdays
    p = plot(xlim=(dates[1],dates[end]))
    for country in selected_countries
        selected_country_data = summarised_covid_vis[summarised_covid_vis[:,1] .== country, 2:end]
        selected_country_vis = vec(convert(Matrix{Float64}, selected_country_data))
        replace!(selected_country_vis, 0=>NaN)

        plot!(dates[1:range], selected_country_vis[1:range], xticks=dates[1:30:end], xrotation=45, leg=:bottomright, 
        label=country*" data", yscale=:log10)
    end
    p
end


### Problem 2

In [145]:
#the problem suggested using dict so... 
all_country_vis = Dict()
for country in selected_countries
    selected_country_data = summarised_covid_vis[summarised_covid_vis[:,1] .== country, 2:end]
    #we convert directly to a float matrix of size n x 1 and then the vec will transpose to 1 x n
    selected_country_vis = vec(convert(Matrix{Float64}, selected_country_data))
    #get the last 7 days of cases
    new_cases = [selected_country_vis[i] - selected_country_vis[i-6] for i in 7:numdays]
    #we now need to remove the first7 days from data for plotting purposes
    filtered_totals = selected_country_vis[7:numdays]
    #so we need this filter below because for some reason france's total covid case counts decrease for a few days 
    new_cases[new_cases .< 0] .= 0
    replace!(new_cases, 0=>NaN)
    replace!(filtered_totals, 0=>NaN)
    #funny dict append syntax
    push!(all_country_vis, country => (filtered_totals, new_cases))
end
all_country_vis


Dict{Any,Any} with 7 entries:
  "Germany"        => ([4.0, 4.0, 4.0, 5.0, 8.0, 10.0, 12.0, 12.0, 12.0, 12.0  …
  "United Kingdom" => ([NaN, NaN, NaN, 2.0, 2.0, 2.0, 8.0, 8.0, 9.0, 9.0  …  2.…
  "China"          => ([5509.0, 6087.0, 8141.0, 9802.0, 11891.0, 16630.0, 19716…
  "Korea, South"   => ([4.0, 4.0, 4.0, 11.0, 12.0, 15.0, 15.0, 16.0, 19.0, 23.0…
  "Japan"          => ([7.0, 7.0, 11.0, 15.0, 20.0, 20.0, 20.0, 22.0, 23.0, 23.…
  "France"         => ([4.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0  …  2.…
  "US"             => ([5.0, 6.0, 6.0, 8.0, 8.0, 8.0, 11.0, 11.0, 11.0, 12.0  ……

In [148]:
@manipulate for range in 7:numdays
    p = plot(legend=false)
    for country in selected_countries
        filtered_totals = all_country_vis[country][1]
        new_cases = all_country_vis[country][2]
        
        plot!(filtered_totals[1:range], new_cases[1:range], xticks=[10,1000,1e4,1e5,1e6, 1e7], xscale=:log10, yscale=:log10)
        scatter!([filtered_totals[range]], [new_cases[range]], markersize=1, xscale=:log10, yscale=:log10)
        #how to make the below country markers not ugly?
        annotate!(filtered_totals[range], new_cases[range], text(country, 8, :black))
    end
    p
end