https://courses.spatialthoughts.com/python-dataviz.html#setup-and-data-download

Steps:
1. Data Download/Extraction
2. Data Pre-processing
3. 

### 1. Data Download

#### 1.1 

In [None]:
import pandas as pd
import geopandas as gpd
import rioxarray

In [None]:
import os

data_folder = 'data'
output_folder = 'output'

if not os.path.exists(data_folder):
    os.mkdir(data_folder)
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

In [None]:
import requests

def download(url):
    filename = os.path.join(data_folder, os.path.basename(url))
    if not os.path.exists(filename):
      with requests.get(url, stream=True, allow_redirects=True) as r:
          with open(filename, 'wb') as f:
              for chunk in r.iter_content(chunk_size=8192):
                  f.write(chunk)
      print('Downloaded', filename)

In [None]:
download('https://naciscdn.org/naturalearth/10m/cultural/' +
         'ne_10m_populated_places_simple.zip')

In [None]:
file = 'ne_10m_populated_places_simple.zip'
filepath = os.path.join(data_folder, file)
places = gpd.read_file(filepath)

In [None]:
places.columns

#### 1.2.

In [None]:
files = [
  '2020-01-metropolitan-street.csv',
  '2020-02-metropolitan-street.csv',
  '2020-03-metropolitan-street.csv',
  '2020-04-metropolitan-street.csv',
  '2020-05-metropolitan-street.csv',
  '2020-06-metropolitan-street.csv',
  '2020-07-metropolitan-street.csv',
  '2020-08-metropolitan-street.csv',
  '2020-09-metropolitan-street.csv',
  '2020-10-metropolitan-street.csv',
  '2020-11-metropolitan-street.csv',
  '2020-12-metropolitan-street.csv'
]


data_url = 'https://github.com/spatialthoughts/python-dataviz-web/releases/' \
  'download/police.uk/'

for f in files:
  url = os.path.join(data_url + f)
  download(url)

In [None]:
dataframe_list = []

for f in files:
    filepath = os.path.join(data_folder, f)
    df = pd.read_csv(filepath)
    dataframe_list.append(df)

merged_df = pd.concat(dataframe_list)

In [None]:
merged_df.columns

#### 1.3.

In [None]:
shapefile_name = 'tl_2019_06_tract'
shapefile_exts = ['.shp', '.shx', '.dbf', '.prj']
data_url = 'https://github.com/spatialthoughts/python-dataviz-web/releases/' \
  'download/census/'

for ext in shapefile_exts:
  url = data_url + shapefile_name + ext
  download(url)

csv_name = 'ACSST5Y2019.S0101_data.csv'
download(data_url + csv_name)

In [None]:
shapefile_path = os.path.join(data_folder, shapefile_name + '.shp')
tracts = gpd.read_file(shapefile_path)

In [None]:
tracts.columns

In [None]:
tracts.head()

In [None]:
csv_path = os.path.join(data_folder, csv_name)
table = pd.read_csv(csv_path, skiprows=[1])

In [None]:
table.columns

In [None]:
# table['S0101_C01_003M']

In [None]:
table.head()

In [None]:
# filtered = table[['GEO_ID','NAME', 'S0101_C01_001E']]
filtered = table[['GEO_ID','NAME', 'S0101_C01_001E']]
filtered = filtered.rename(columns = {'S0101_C01_001E': 'Population', 'GEO_ID': 'GEOID'})

filtered['GEOID'] = filtered.GEOID.str[-11:]

In [None]:
gdf = tracts.merge(filtered, on='GEOID')

In [None]:
gdf['density'] = 1e6*gdf['Population']/gdf['ALAND']

### 2. Spatial Visualization

#### 2.1. Chloroplet Map

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)
fig.set_size_inches(10,10)
# gdf.plot(ax=ax)
# gdf.plot(ax=ax, facecolor='#f0f0f0', edgecolor='#de2d26', linewidth=0.5)
gdf.plot(ax=ax, column='density', cmap='RdYlGn_r', scheme='quantiles')
plt.show()

In [None]:
legend_kwds= {
  'loc': 'upper right',
  'bbox_to_anchor': (0.8, 0.9),
  'fmt': '{:<5.0f}',
  'frameon': False,
  'fontsize': 8,
  'title': 'persons/sq.km.'
}
classification_kwds={
  'bins':[1,10,25,50,100, 250, 500, 1000, 5000]
}

fig, ax = plt.subplots(1, 1)
fig.set_size_inches(10,10)
gdf.plot(ax=ax, column='density', cmap='RdYlGn_r', scheme='User_Defined',
         classification_kwds=classification_kwds,
         legend=True, legend_kwds=legend_kwds)

ax.set_axis_off()

# Change the last entry in the legend to '>5000'
legend = ax.get_legend()
legend.texts[-1].set_text('> 5000')

ax.set_title('California Population Density (2019)', size = 18)

# output_path = os.path.join(output_folder, 'california_pop.png')
# plt.savefig(output_path, dpi=300)


plt.show()

In [None]:
# Plot the census tracts geodataframe tracts with just outlines and no fill color.
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(7,7)
tracts.plot(ax=ax, facecolor='none', edgecolor='#ef2d26', linewidth=0.5)
plt.show()

In [None]:
# Display the map zoomed-in around the San Francisco area between Latitudes from 37.71 to 37.82 and Longitudes from -122.53 to -122.36
# method 1
gdf["INTPTLAT"] = gdf["INTPTLAT"].astype(float)
gdf["INTPTLON"] = gdf["INTPTLON"].astype(float)

filtered = gdf[
    (gdf["INTPTLAT"].between(37.71, 37.82)) &
    (gdf["INTPTLON"].between(-122.53, -122.36))
]

fig, ax = plt.subplots(1, 1)
fig.set_size_inches(7,7)
filtered.plot(ax=ax, facecolor='none', linewidth=0.5)
plt.show()

In [None]:
# method 2

from shapely.geometry import box

bbox = box(
    -122.53,  # min lon
    37.71,    # min lat
    -122.36,  # max lon
    37.82     # max lat
)

filtered = gdf[gdf.geometry.intersects(bbox)]

fig, ax = plt.subplots(1, 1)
fig.set_size_inches(7,7)
filtered.plot(ax=ax, facecolor='none', linewidth=0.5)
plt.show()

#### 2.2. Basemaps

#### 2.3. XArray

#### 2.4. Gridded Dataset

#### 2.5. CartoPy

#### 2.6. Visualizing Rasters