# Visualizações

Criar as visualizações que permitam ter bons insights e acompanhamentos em relação a
pandemia do COVID-19.

## Carregamento de datasets

In [0]:
df_data_by_country = spark.read.parquet("dbfs:/FileStore/_covid_data_lake/_ready/_data_by_country/data_by_country.parquet")
df_summary_country = spark.read.parquet("dbfs:/FileStore/_covid_data_lake/_ready/_summary/summary_country.parquet")

In [0]:
display(df_summary_country)

NOME,SIGLA,NEWCONFIRMED,TOTALCONFIRMED,NEWDEATHS,TOTALDEATHS,NEWRECOVERED,TOTALRECOVERED,DATE
Afghanistan,AF,18,55894,0,2451,90,49499,2021-03-12
Albania,AL,631,114840,17,1986,714,77498,2021-03-12
Algeria,DZ,138,114681,4,3026,109,79428,2021-03-12
Andorra,AD,41,11130,0,112,23,10708,2021-03-12
Angola,AO,47,21161,0,516,84,19761,2021-03-12
Antigua and Barbuda,AG,20,882,1,23,34,489,2021-03-12
Argentina,AR,7693,2169694,107,53359,5049,1961640,2021-03-12
Armenia,AM,340,175538,5,3237,163,165259,2021-03-12
Australia,AU,16,29090,0,909,9,22945,2021-03-12
Austria,AT,2528,481919,19,8776,2012,449053,2021-03-12


# Análise exploratória dos dados

Aqui realizaremos uma série de queries sobre os dados

## Países TOP 10 de casos confirmados de COVID-19

In [0]:
import pyspark.sql.functions as f

df = df_summary_country.select('SIGLA', 'NOME', 'TOTALCONFIRMED') \
    .orderBy(f.desc('TOTALCONFIRMED')) \
    .limit(10)

display(df)

SIGLA,NOME,TOTALCONFIRMED
US,United States of America,29236735
IN,India,11285561
BR,Brazil,11202305
RU,Russian Federation,4302726
GB,United Kingdom,4247879
FR,France,4022429
ES,Spain,3172101
IT,Italy,3123368
TR,Turkey,2821943
DE,Germany,2541781


## Países TOP 10 em número de mortes por COVID-19

In [0]:
df = df_summary_country.select('SIGLA', 'NOME', 'TOTALDEATHS') \
    .orderBy(f.desc('TOTALDEATHS')) \
    .limit(10)

display(df)

SIGLA,NOME,TOTALDEATHS
US,United States of America,529263
BR,Brazil,270656
MX,Mexico,192491
IN,India,158189
GB,United Kingdom,125222
IT,Italy,100811
FR,France,89707
RU,Russian Federation,88773
DE,Germany,72858
ES,Spain,71961


## Evolução de casos e mortes no Brasil

In [0]:
cases_total_Brazil = df_data_by_country.select("NOME", "CONFIRMED", "DEATHS", "RECOVERED", "DATE") \
          .where("NOME in ('Brazil')") \
          .orderBy(f.asc('DATE'))
        
display(cases_total_Brazil)

NOME,CONFIRMED,DEATHS,RECOVERED,DATE
Brazil,0,0,0,2020-01-22
Brazil,0,0,0,2020-01-23
Brazil,0,0,0,2020-01-24
Brazil,0,0,0,2020-01-25
Brazil,0,0,0,2020-01-26
Brazil,0,0,0,2020-01-27
Brazil,0,0,0,2020-01-28
Brazil,0,0,0,2020-01-29
Brazil,0,0,0,2020-01-30
Brazil,0,0,0,2020-01-31


## Evolução de casos e mortes no Mundo

In [0]:
cases_total_world = df_data_by_country.select("CONFIRMED", "DEATHS", "RECOVERED", "DATE") \
          .groupBy("DATE") \
          .agg(
            f.sum("CONFIRMED").alias("CONFIRMED"),
            f.sum("DEATHS").alias("DEATHS"),
            f.sum("RECOVERED").alias("RECOVERED")
          ) \
          .orderBy(f.asc('DATE'))
        
display(cases_total_world)

DATE,CONFIRMED,DEATHS,RECOVERED
2020-01-22,557,17,30
2020-01-23,655,18,32
2020-01-24,941,26,39
2020-01-25,1433,42,42
2020-01-26,2118,56,56
2020-01-27,2927,82,65
2020-01-28,5578,131,108
2020-01-29,6167,133,127
2020-01-30,8235,171,145
2020-01-31,9927,213,225
