# Import python libraries

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
pd.set_option('display.max_rows',None)

# Importing Data 

In [12]:
url = 'https://www.worldometers.info/coronavirus/country/us/#nav-yesterday'
# request from the url
resp = requests.get(url)
# convert response to all text
data = resp.text
# use BeautifulSoup to find and get the targe data 
soup = BeautifulSoup(data, 'html.parser')
table = soup.find(id="usa_table_countries_yesterday")
# use pandas to create the dataframe 
df = pd.read_html(str(table))[0]
print(df.head())

     #    USAState  TotalCases  NewCases  TotalDeaths  NewDeaths  \
0  NaN   USA Total    71728557  312314.0     888623.0      841.0   
1  1.0  California     7420876   43998.0      78806.0       76.0   
2  2.0       Texas     5919216   54691.0      78423.0      154.0   
3  3.0     Florida     5351391   22818.0      63576.0        2.0   
4  4.0    New York     4813820   23333.0      63619.0       56.0   

   TotalRecovered  ActiveCases  Tot Cases/1M pop  Deaths/1M pop   TotalTests  \
0      44329200.0   26510734.0          216701.0         2685.0  879598015.0   
1             NaN          NaN          187812.0         1994.0  131160392.0   
2             NaN          NaN          204140.0         2705.0   55606051.0   
3       3666925.0    1620890.0          249160.0         2960.0   50383801.0   
4       2597185.0    2153016.0          247452.0         3270.0   95256290.0   

   Tests/ 1M pop  Population    Projections  
0      2657375.0         NaN            NaN  
1      3319489.0  

# Clean Data

In [14]:
# Remove #, Source, & Projections Columns, and country totals rows
US_COVID = df.loc[1:63,"USAState":"Population"]
# set USAState as index for easy to analyze
US_COVID = US_COVID.set_index("USAState")

# Describe The Dataframe By Printing The First Few Rows

In [15]:
# print a few rows to see the datadrame
print(US_COVID.head())

            TotalCases  NewCases  TotalDeaths  NewDeaths  TotalRecovered  \
USAState                                                                   
California     7420876   43998.0      78806.0       76.0             NaN   
Texas          5919216   54691.0      78423.0      154.0             NaN   
Florida        5351391   22818.0      63576.0        2.0       3666925.0   
New York       4813820   23333.0      63619.0       56.0       2597185.0   
Illinois       2773362       NaN      33446.0        NaN       1924335.0   

            ActiveCases  Tot Cases/1M pop  Deaths/1M pop   TotalTests  \
USAState                                                                
California          NaN          187812.0         1994.0  131160392.0   
Texas               NaN          204140.0         2705.0   55606051.0   
Florida       1620890.0          249160.0         2960.0   50383801.0   
New York      2153016.0          247452.0         3270.0   95256290.0   
Illinois       815581.0      

# Print Dataframe Summary Statistics 

In [16]:
print(US_COVID.describe())

         TotalCases      NewCases   TotalDeaths   NewDeaths  TotalRecovered  \
count  6.300000e+01     26.000000     60.000000   20.000000    4.500000e+01   
mean   1.138549e+06  12012.076923  14810.383333   42.050000    6.702068e+05   
std    1.462813e+06  13577.744051  18291.937440   47.293568    7.791263e+05   
min    3.000000e+00     42.000000      7.000000    1.000000    3.000000e+00   
25%    2.135670e+05   2478.250000   2393.750000    2.750000    1.436660e+05   
50%    6.813820e+05   7309.000000   9590.500000   31.500000    4.221640e+05   
75%    1.455528e+06  16298.750000  19061.250000   58.500000    8.976670e+05   
max    7.420876e+06  54691.000000  78806.000000  154.000000    3.666925e+06   

        ActiveCases  Tot Cases/1M pop  Deaths/1M pop    TotalTests  \
count  4.500000e+01         52.000000      52.000000  6.000000e+01   
mean   2.782371e+05     213857.423077    2492.576923  1.465997e+07   
std    4.272421e+05      37873.778858     747.988790  2.237660e+07   
min    0

# Print The The Top 5 States For Each Of These Metrics

## Top 5 States Of New Cases

In [18]:
print("New Cases top 5 states")
NewCases = US_COVID.loc[:, 'NewCases']
print(NewCases.sort_values(ascending= False).head(5))

New Cases top 5 states
USAState
Texas         54691.0
California    43998.0
Arizona       27681.0
New York      23333.0
Florida       22818.0
Name: NewCases, dtype: float64


## Top 5 States Of Total Deaths

In [19]:
print("Total Deaths top 5 states")
TotalDeaths = US_COVID.loc[:, 'TotalDeaths']
print(TotalDeaths.sort_values(ascending= False).head(5))

Total Deaths top 5 states
USAState
California      78806.0
Texas           78423.0
New York        63619.0
Florida         63576.0
Pennsylvania    39487.0
Name: TotalDeaths, dtype: float64


## Top 5 States Of Total Cases / 1M People

In [45]:
print("Total Cases / 1M People top 5 states")
TotalCases_1M_People = US_COVID.iloc[:, 6]
print(TotalCases_1M_People.sort_values(ascending= False).head(5))

Total Cases / 1M People top 5 states
USAState
Rhode Island    308780.0
North Dakota    276195.0
Alaska          257827.0
Utah            257642.0
Tennessee       254234.0
Name: Tot Cases/1M pop, dtype: float64


## Top 5 States Of Deaths / 1M People

In [46]:
print("Deaths/1M pop top 5 states")
TotalDeaths_1M_pop = US_COVID.loc[:, 'Deaths/1M pop']
print(TotalDeaths_1M_pop.sort_values(ascending= False).head(5))

Deaths/1M pop top 5 states
USAState
Mississippi    3602.0
Arizona        3511.0
New Jersey     3462.0
Alabama        3432.0
Louisiana      3296.0
Name: Deaths/1M pop, dtype: float64
