# Section 2: Moving Beyond Static Visualizations

Create a dataset of cumulative questions per library over time

In [1]:
import pandas as pd

questions_per_library =pd.read_csv( 'stackoverflow.zip', parse_dates=True, index_col='creation_date'

).loc[:, 'pandas': 'bokeh'].resample('1M').sum().cumsum().reindex( pd.date_range('2008-08', '2021-10', freq='M')

).fillna(0)

questions_per_library.tail()

FileNotFoundError: [Errno 2] No such file or directory: 'stackoverflow.zip'

2. Import the FuncAnimation class

In [None]:
from matplotlib.animation import FuncAnimation

3. Write a function for generating the initial plot

In [None]:
import matplotlib.pyplot as plt
from matplotlib import ticker
def bar_plot(data):
    fig,ax=plt.subplots(figsize=(8,6))
    sort_order=data.last('1M').squeeze().sort_values().index
    bars=[
        bar.set_label(label) for label, bar in
        zip(sort_order,ax.barh(sort_order,[0]*data.shape[1]))
    ]
    
    ax.set_xlabel('total questions',fontweight='bold')
    ax.set_xlim(0,250_00)
    ax.xaxis.set_major_formatter(ticker.EngFormatter())
    ax.xaxis.set_tick_params(labelsize=12)
    ax.yaxis.set_tick_params(labelsize=12)
    for spine in ['top','right']:
        ax.spines[spine].set_visible(False)
        
    fig.tight_layout()
    return fig,ax

In [2]:
%config InlineBackend.figure_formats=['svg']
%matplotlib inline
bar_plot(questions_per_library)

NameError: name 'bar_plot' is not defined

4. Write a function for generating annotations and plot text

In [None]:
def generate_plot_text(ax):
    annotations = [
        ax.annotate(
        '', xy=(0, bar.get_y()+bar.get_height()/2),
        ha='left', va = 'center'
        ) for bar in ax.patches
    ]
    time_text = ax.text(
    0.9, 0.1, '', transform=ax.transAxes,
    fontsize=15, ha='center', va='center'
    )
    return annotations, time_text

5. Define the plot update function

In [None]:
def update(frame,*,ax,df,annotations,time_text):
    data=df.loc[frame,:]
    
    #update bars
    for rect, text in zip(ax.patches, annotations):
        col = rect.get_label()
        if data[col]:
            rect.set_width(data[col])
            text.set_x(data[col])
            text.set_text(f'{data[col]:,.0f}')
     #update time
    time_text.set_text(frame.strftime('%b\n%Y'))

6. Bind arguments to the update function

In [3]:
from functools import partial
def bar_plot_init(questions_per_library):
    fig,ax = bar_plot(questions_per_library)
    annotations, time_text=generate_plot_text(ax)
    
    bar_plot_update=partial(
        update, ax=ax, df=questions_per_library,
        annotations=annotations, time_text=time_text
    )
    
    return fig, bar_plot_update

7. Animate the plot

In [4]:
fig, update_func=bar_plot_init(questions_per_library)

ani =FuncAnimation(
    fig,update_func, frames=questions_per_library.index,repeat=False
)
ani.save(
    'stackoverflow_questions.gif',
    writer='ffmpeg',fps=30,bitrate=100,dpi=300
)
plt.close()

NameError: name 'questions_per_library' is not defined

In [5]:
from IPython import display

display.Video(
    'stackoverflow_questions.json', width = 600, height = 400,
    embed = True, html_attributes = 'controls muted autoplay'
)

# Animating distributions over time

1. Create a dataset of daily subway entries 

In [6]:
import pandas as pd
subway = pd.read_csv('NYC_subway_daily.csv',parse_dates=['Datetime'],index_col=['Borough','Datetime']
                )
subway_daily = subway.unstack(0)
subway_daily.head()

FileNotFoundError: [Errno 2] No such file or directory: 'NYC_subway_daily.csv'

In [7]:
manhattan_entries = subway_daily['Entries']['M']

NameError: name 'subway_daily' is not defined

2. Determine the bin ranges for the histograms

In [8]:
import numpy as np
count_per_bin, bin_ranges = np.histogram(manhattan_entries, bins=30)

NameError: name 'manhattan_entries' is not defined

3. Write a function for generating the initial histogram subplots

In [9]:
def subway_histogram(data ,bins, date_range):
    _, bin_ranges = np.histogram(data, bins=bins)
    
    weekday_mask = data.index.weekday < 5
    configs = [
        {'label':'Weekend','mask': ~weekday_mask, 'ymax':60},
         {'label':'Wekend','mask': weekday_mask, 'ymax':120}
    ]
    
    fig, axes = plt.subplots(1, 2, figsize=(8,4),sharex=True)
    for ax, config in zip(axes, configs):
        _, _, config['hist'] = ax.hist(
            data[config['mask']].loc[date_range], bin_ranges, ec='black'
        )
    ax.xaxis.set_major_formatter(ticker.EngFormatter())
    ax.set(
        xlim=(0, None), ylim=(0, config['ymax']),
        xlabel=f'{config["label"]} Entries'
    )
    
    for spine in ['top', 'right']:
        ax.spines[spine].set_visible(False)
        
    axes[0].set_ylabel('Frequency')
    fig.suptitle('Historam of daily subway in Manhattan')
    fig.tight_layout()
    return fig,axes,bin_ranges, configs


In [10]:
_ = subway_histogram(manhattan_entries, bins=30, date_range='2017')

NameError: name 'manhattan_entries' is not defined

4. Write a function for generating an annotation for the time period

In [11]:
def add_time_text(ax):
    time_text = ax.text(
        0.15, 0.9, '',transform=ax.transAxes,
        fontsize=15, ha='center', va='center'
    )
    return time_text

5. Define the plot update function

In [12]:
def update(frame, *,data,configs,time_text,bin_ranges):
    artists = []
    
    time = frame.strftime('%b\n%Y')
    if time != time_text.get_text():
        time_text.set_text(time)
        artists.append(time_text)
        
    for config in configs:
        time_frame_mask = \
            (data.index > frame - pd.Timedelta(days=365)) & (data.index <= frame)
        counts, _= np.histogram(
            data[time_frame_mask & config['mask']],
            bin_ranges
        )
        for count, rect in zip(counts, config['hist'].patches):
            if count != rect.get_height():
                rect.set_height(count)
                artists.append(rect)
    return artists

6. Bind arguments for the update function

In [13]:
def histogram_init(data, bins, initial_date_range):
    fig, axes, bin_ranges, configs = subway_histogram(data, bins, initial_date_range)
    
    update_func = partial(
        update, data = data,configs=configs,
        time_text=add_time_text(axes[0]),
        bin_ranges=bin_ranges
    )
    return fig, update_func

7. Animate the plot

In [14]:
fig, update_func = histogram_init(
    manhattan_entries, bins = 30, initial_date_range=slice('2017','2019-07')
)
ani = FuncAnimation(
    fig, update_func, frames=manhattan_entries['2019-08':'2021'].index,
    repeat=False, blit=True
)
ani.save(
    'subway_entries_subplots.gif',
    writer='ffmpeg',fps=30,bitrate=500,dpi=300
)
plt.close()

NameError: name 'manhattan_entries' is not defined

In [15]:
from IPython import display

display.Video(
    'subway_entries_subplots.json', width = 600, height = 400,
    embed = True, html_attributes = 'controls muted autoplay'
)

# Animating geospatial data with HoloViz

#1. Use GeoPandas to read in our data.

In [16]:
import geopandas as gpd

earthquakes = gpd.read_file('earthquakes.geojson').assign(
    time=lambda x: pd.to_datetime(x.time, unit='ms'),
    month=lambda x: x.time.dt.month
)[['geometry', 'mag', 'time', 'month']]

earthquakes.shape

ModuleNotFoundError: No module named 'geopandas'

In [17]:
earthquakes.head()

NameError: name 'earthquakes' is not defined

2. Handle HoloViz imports and set up the Matplotlib backend.

In [18]:
import geoviews as gv
import geoviews.feature as gf
import holoviews as hv

gv.extension('matplotlib')

ModuleNotFoundError: No module named 'geoviews'

3. Define a function for plotting earthquakes on a map using GeoViews.

In [19]:
import calendar

def plot_earthquakes(data, month_num):
    points = gv.Points(
        data.query(f'month == {month_num}'),
        kdims=['longitude', 'latitude'], # key dimensions (for coordinates in this case)
        vdims=['mag'] # value dimensions (for modifying the plot in this case)
    ).redim.range(mag=(-2, 10), latitude=(-90, 90))

    # create an overlay by combining Cartopy features and the points with *
    overlay = gf.land * gf.coastline * gf.borders * points

    return overlay.opts(
        gv.opts.Points(color='mag', cmap='fire_r', colorbar=True, alpha=0.75),
        gv.opts.Overlay(
            global_extent=False, title=calendar.month_name[month_num], fontscale=2
        )
    )

In [20]:
plot_earthquakes(earthquakes, 1).opts(
    fig_inches=(6, 3), aspect=2, fig_size=250, fig_bounds=(0.07, 0.05, 0.87, 0.95)
)

NameError: name 'earthquakes' is not defined

4. Create a mapping of frames to plots using HoloViews.

In [21]:
frames = {
    month_num: plot_earthquakes(earthquakes, month_num)
    for month_num in range(1, 13)
}
holomap = hv.HoloMap(frames)

NameError: name 'earthquakes' is not defined

5. Animate the plot.

In [22]:
hv.output(
    holomap.opts(
        fig_inches=(6, 3), aspect=2, fig_size=250,
        fig_bounds=(0.07, 0.05, 0.87, 0.95)
    ), holomap='gif', fps=5
)

NameError: name 'hv' is not defined

# Section 3: Building Interactive Visualizations for Data Exploration

# Adding tooltips and sliders

1. Read in and prepare the data.

In [23]:
import geopandas as gpd
import pandas as pd

earthquakes = gpd.read_file('earthquakes.geojson').assign(
    time=lambda x: pd.to_datetime(x.time, unit='ms'),
    month=lambda x: x.time.dt.month
).dropna()

earthquakes.head()

ModuleNotFoundError: No module named 'geopandas'

 2. Import the required libraries and set up the Bokeh backend.

In [24]:
from cartopy import crs
import geoviews as gv
import geoviews.feature as gf

gv.extension('bokeh')

ModuleNotFoundError: No module named 'cartopy'

3. Create an overlay with tooltips and a slider.

In [25]:
points = gv.Points(
    earthquakes,
    kdims=['longitude', 'latitude'],
    vdims=['month', 'place', 'tsunami', 'mag', 'magType']
)

# set colorbar limits for magnitude and axis limits
points = points.redim.range(
    mag=(-2, 10), longitude=(-180, 180), latitude=(-90, 90)
)

NameError: name 'gv' is not defined

In [26]:
overlay = gf.land * gf.coastline * gf.borders * points.groupby('month')

NameError: name 'gf' is not defined

In [27]:
interactive_map = overlay.opts(
    gv.opts.Feature(projection=crs.PlateCarree()),
    gv.opts.Overlay(width=700, height=450),
    gv.opts.Points(color='mag', cmap='fire_r', colorbar=True, tools=['hover'])
)

NameError: name 'overlay' is not defined

4. Render the visualization.

In [28]:
import panel as pn

earthquake_viz = pn.panel(interactive_map, widget_location='bottom')

NameError: name 'interactive_map' is not defined

In [None]:
earthquake_viz.embed()

# Additional plot types

In [None]:
import numpy as np

flight_stats = pd.read_csv(
    'T100_MARKET_ALL_CARRIER.zip',
    usecols=[
        'CLASS', 'REGION', 'UNIQUE_CARRIER_NAME', 'ORIGIN_CITY_NAME', 'ORIGIN', 
        'DEST_CITY_NAME', 'DEST', 'PASSENGERS', 'FREIGHT', 'MAIL'
    ]
).rename(lambda x: x.lower(), axis=1).assign(
    region=lambda x: x.region.replace({
        'D': 'Domestic', 'I': 'International', 'A': 'Atlantic', 
        'L': 'Latin America', 'P': 'Pacific', 'S': 'System'
    }),
    route=lambda x: np.where(
        x.origin < x.dest,
        x.origin + '-' + x.dest,
        x.dest + '-' + x.origin
    )
)

In [None]:
flight_stats.head()

In [None]:
cities = [
    'Atlanta, GA', 'Chicago, IL', 'New York, NY', 'Los Angeles, CA',
    'Dallas/Fort Worth, TX', 'Denver, CO', 'Houston, TX', 
    'San Francisco, CA', 'Seattle, WA', 'Orlando, FL'
]

top_airlines = [
    'American Airlines Inc.', 'Delta Air Lines Inc.', 'JetBlue Airways',
    'Southwest Airlines Co.', 'United Air Lines Inc.'
]

# Chord diagram

In [29]:
total_flight_stats = flight_stats.query(
    '`class` == "F" and origin_city_name != dest_city_name'
    f' and origin_city_name.isin({cities}) and dest_city_name.isin({cities})'
).groupby([
    'origin', 'origin_city_name', 'dest', 'dest_city_name'
])[['passengers', 'freight', 'mail']].sum().reset_index().query('passengers > 0')

NameError: name 'flight_stats' is not defined

In [30]:
total_flight_stats.sample(10, random_state=1)

NameError: name 'total_flight_stats' is not defined

In [31]:
chord = hv.Chord(
    total_flight_stats,
    kdims=['origin', 'dest'], 
    vdims=['passengers', 'origin_city_name', 'dest_city_name', 'mail', 'freight']
)

NameError: name 'hv' is not defined

In [32]:
from bokeh.models import HoverTool

tooltips = {
    'Source': '@origin_city_name (@origin)',
    'Target': '@dest_city_name (@dest)',
    'Passengers': '@passengers{0,.}',
    'Mail': '@mail{0,.} lbs.',
    'Freight': '@freight{0,.} lbs.',
}
hover = HoverTool(tooltips=tooltips)

In [33]:
chord = chord.opts(
    labels='index', node_color='index', cmap='Category20', # node config
    edge_color='origin', edge_cmap='Category20', directed=True, # edge config
    inspection_policy='edges', tools=[hover, 'tap'], # tooltip config
    frame_width=500, aspect=1, # plot size config
    title='Total Passenger Service Travel Between Top 10 Cities in 2019'
)

NameError: name 'chord' is not defined

In [34]:
chord

NameError: name 'chord' is not defined

# Sankey plot