In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from utils import plot_horizontal_ballline_plot, setup_plotting_standards

setup_plotting_standards()

cmap = {
    "South" : "#e41a1c",
    "Midwest" : "#377eb8",
    "NorthEast" : "#4daf4a",
    "West" : "#984ea3",
}

### Figure X. Transmission lineage characteristics are different in the Eastern and Western US.
Using the discrete state reconstructions, we determined whether geographic clustering has affected transmission dynamics.

First we load in the transmission lineages. These were calculated as part rule `beast_analysis.identify_transmission_lineages`. Briefly, we follow Du Plessis et al. 2020 in defining transmission lineages as two or more taxa belonging to the same state descend from a shared, single importation of the virus into the state from any other state.

In [None]:
tl = pd.read_csv( snakemake.input.transmission_lineages )

print( f"{tl.shape[0]} lineages found." )
print( f"{tl.loc[tl['children']>1].shape[0]} non-singleton lineages found.")
print()

tlong = tl.loc[tl['children']>1].copy()
tlong["length"] = tlong["latest_date"] - tlong["height"]

tlong.head()

Here we plot the median number of transmission lineages for each state. This generates figure X.

In [None]:
plot_df = tlong.pivot_table( index="tree", columns="Location", values="par.Location", aggfunc="count" ).describe(percentiles=[0.025, 0.5, 0.975]).T
fig, ax = plt.subplots( dpi=200, figsize=(4,6) )
plot_horizontal_ballline_plot( ax, plot_df, cmap, xlims=(0,25), xlabel="Transmission lineages", sortby=["50%", "97.5%"] )
plt.tight_layout()
fig.savefig( snakemake.output.tl_count_figure )
plt.show()

Here, we plot the median number of descendents for each states' transmission lineages. This generates figure X.

In [None]:
plot_df = tlong.groupby( "Location" )["children"].describe( percentiles=[0.025, 0.5, 0.975])

fig, ax = plt.subplots( dpi=200, figsize=(4,6) )
plot_horizontal_ballline_plot( ax, plot_df, cmap, xlims=(1,1000), xlabel="Descendents", sortby=["50%", "97.5%"] )
ax.set_xscale( "log" )
ax.tick_params( axis="x", direction="in", which="both", labelbottom=True, pad=3 )
plt.tight_layout()
fig.savefig( snakemake.output.tl_size_figure )
plt.show()

Here we plot the median length of transmission lineages from each state. This generates figure X.

In [None]:
plot_df = tlong.groupby( "Location" )["length"].describe( percentiles=[0.025, 0.5, 0.975])

fig, ax = plt.subplots( dpi=200, figsize=(4,6) )
plot_horizontal_ballline_plot( ax, plot_df, cmap, xlims=(0,17), xlabel="Length (years)", sortby=["50%"] )
plt.tight_layout()
fig.savefig( snakemake.output.tl_length_figure )
plt.show()

#### Post-2002
Dellicour et al. 2020 found that the dispersal of West Nile virus changed after it rapid expansion following its introduction in ~1999. Specifically, they found that the dispersal speed dropped after 2002 indicating the begining of geographic clustering. Consequently, the massive polytomy resulting from its early spread and its consistent assignment to New York and Connecticut, might be influencing our results. Thus, we compute the exact same graphs as above but only considering transmission lineages occuring after 2002.

In [None]:
tlong = tlong.loc[tlong["height"]>2002]
plot_df = tlong.pivot_table( index="tree", columns="Location", values="par.Location", aggfunc="count" ).describe(percentiles=[0.025, 0.5, 0.975]).T
fig, ax = plt.subplots( dpi=200, figsize=(4,6) )
plot_horizontal_ballline_plot( ax, plot_df, cmap, xlims=(0,20), xlabel="Transmission lineages", sortby=["50%", "97.5%"] )
plt.tight_layout()
fig.savefig( snakemake.output.tl_post_count_figure )
plt.show()

In [None]:
plot_df = tlong.groupby( "Location" )["children"].describe( percentiles=[0.025, 0.5, 0.975])

fig, ax = plt.subplots( dpi=200, figsize=(4,6) )
plot_horizontal_ballline_plot( ax, plot_df, cmap, xlims=(1,1000), xlabel="Descendents", sortby=["50%", "97.5%"] )
ax.set_xscale( "log" )
ax.tick_params( axis="x", direction="in", which="both", labelbottom=True, pad=3 )
plt.tight_layout()
fig.savefig( snakemake.output.tl_post_size_figure )
plt.show()

In [None]:
plot_df = tlong.groupby( "Location" )["length"].describe( percentiles=[0.025, 0.5, 0.975])

fig, ax = plt.subplots( dpi=200, figsize=(4,6) )
plot_horizontal_ballline_plot( ax, plot_df, cmap, xlims=(0,15), xlabel="Length (years)", sortby=["50%"] )
plt.tight_layout()
fig.savefig( snakemake.output.tl_post_length_figure )
plt.show()