<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Import-pandas-and-numpy,-and-load-the-Covid-and-land-temperature-data" data-toc-modified-id="Import-pandas-and-numpy,-and-load-the-Covid-and-land-temperature-data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Import pandas and numpy, and load the Covid and land temperature data</a></span></li><li><span><a href="#Create-a-list-of-locations" data-toc-modified-id="Create-a-list-of-locations-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Create a list of locations</a></span></li><li><span><a href="#Use-a-NumPy-array-to-calculate-sums-by-location" data-toc-modified-id="Use-a-NumPy-array-to-calculate-sums-by-location-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Use a NumPy array to calculate sums by location</a></span></li><li><span><a href="#Sort-the-land-temperature-data-and-drop-rows-with-missing-values-for-temperature" data-toc-modified-id="Sort-the-land-temperature-data-and-drop-rows-with-missing-values-for-temperature-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Sort the land temperature data and drop rows with missing values for temperature</a></span></li><li><span><a href="#Use-a-NumPy-array-to-calculate-average-temperature-for-the-year" data-toc-modified-id="Use-a-NumPy-array-to-calculate-average-temperature-for-the-year-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Use a NumPy array to calculate average temperature for the year</a></span></li><li><span><a href="#Create-a-DataFrame-of-the-land-temperature-averages" data-toc-modified-id="Create-a-DataFrame-of-the-land-temperature-averages-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Create a DataFrame of the land temperature averages</a></span></li></ul></div>

# Import pandas and numpy, and load the Covid and land temperature data

In [1]:
import pandas as pd
import numpy as np

In [2]:
# pd.set_option('display.width', 200)
# pd.set_option('display.max_columns', 35)
# pd.set_option('display.max_rows', 200)
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
import watermark
%load_ext watermark

%watermark -n -i -iv

watermark: 2.1.0
pandas   : 1.2.1
numpy    : 1.19.2
json     : 2.0.9



In [4]:
coviddaily = pd.read_csv('data/coviddaily720.csv', parse_dates=['casedate'])
ltbrazil = pd.read_csv('data/ltbrazil.csv')

# Create a list of locations

In [5]:
loclist = coviddaily['location'].unique().tolist()

In [8]:
type(loclist)

list

In [9]:
loclist[0:5]

['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola']

# Use a NumPy array to calculate sums by location

In [6]:
rowlist = []
casevalues = coviddaily[['location', 'new_cases']].to_numpy()

for locitem in loclist:
    cases = [
        casevalues[j][1] for j in range(len(casevalues))
        if casevalues[j][0] == locitem
    ]
    rowlist.append(sum(cases))

In [10]:
type(casevalues)

numpy.ndarray

In [11]:
casevalues[0:5]

array([['Afghanistan', 0.0],
       ['Afghanistan', 0.0],
       ['Afghanistan', 0.0],
       ['Afghanistan', 0.0],
       ['Afghanistan', 0.0]], dtype=object)

In [12]:
len(rowlist)

209

In [13]:
len(loclist)

209

In [14]:
rowlist[0:5]

[34451.0, 3371.0, 18712.0, 855.0, 483.0]

In [15]:
casetotals = pd.DataFrame(zip(loclist, rowlist),
                          columns=(['location', 'casetotals']))

In [16]:
casetotals.head()

Unnamed: 0,location,casetotals
0,Afghanistan,34451.0
1,Albania,3371.0
2,Algeria,18712.0
3,Andorra,855.0
4,Angola,483.0


# Sort the land temperature data and drop rows with missing values for temperature

In [17]:
ltbrazil = ltbrazil.sort_values(['station', 'month'])

In [18]:
ltbrazil = ltbrazil.dropna(subset=['temperature'])

# Use a NumPy array to calculate average temperature for the year

In [19]:
prevstation = 'ZZZ'
prevtemp = 0
rowlist = []

tempvalues = ltbrazil[['station', 'temperature']].to_numpy()

for j in range(len(tempvalues)):
    station = tempvalues[j][0]
    temperature = tempvalues[j][1]
    if prevstation != station:
        if prevstation != 'ZZZ':
            rowlist.append({
                'station': prevstation,
                'avgtemp': temp_count / station_count,
                'station_count': station_count
            })
        temp_count = 0
        station_count = 0
        prevstation = station
    if ((0 <= abs(temperature - prevtemp) <= 3) or (station_count == 0)):
        temp_count += temperature
        station_count += 1
    prevtemp = temperature

In [20]:
rowlist.append({
    'station': prevstation,
    'avgtemp': temp_count / station_count,
    'station_count': station_count
})

In [21]:
rowlist[0:5]

[{'station': 'ALTAMIRA', 'avgtemp': 28.310000000000002, 'station_count': 5},
 {'station': 'ALTA_FLORESTA_AERO',
  'avgtemp': 29.433636363636367,
  'station_count': 11},
 {'station': 'ARAXA', 'avgtemp': 21.612499999999997, 'station_count': 4},
 {'station': 'BACABAL', 'avgtemp': 29.75, 'station_count': 4},
 {'station': 'BAGE', 'avgtemp': 20.366666666666664, 'station_count': 9}]

# Create a DataFrame of the land temperature averages

In [22]:
ltbrazilavgs = pd.DataFrame(rowlist)

In [23]:
ltbrazilavgs.head()

Unnamed: 0,station,avgtemp,station_count
0,ALTAMIRA,28.31,5
1,ALTA_FLORESTA_AERO,29.43,11
2,ARAXA,21.61,4
3,BACABAL,29.75,4
4,BAGE,20.37,9
