In [2]:
import pandas as pd
import geopandas as gpd
import networkx as nx
import plotly.graph_objects as go
import numpy as np

In [3]:
states = gpd.read_file( "/Users/natem/Data/raw_data/shapefiles/NA_states/bound_p.shx" )
states = states.loc[states["COUNTRY"].isin(["USA", "MEX", "CAN"])]
states = states.drop( columns=["OBJECTID", "BOUND_P_", "UIDENT", "STATEABB", "Shape_Area", "Shape_Leng"])
states = states.dissolve( "NAME" ).reset_index()
states["NAME"] = states["NAME"].replace( {"British Columbia / Colombie-Britannique": "British Columbia", "Quebec / Québec" : "Québec", "Nova Scotia / Nouvelle-Écosse" : "Nova Scotia"} )
states["NAME"] = states["NAME"] + ", " + states["COUNTRY"]
states = states.loc[~states["NAME"].isin( ["California, USA","water/agua/d'eau, CAN"] )]

ca = gpd.read_file( "/Users/natem/Data/raw_data/shapefiles/USA_county/gadm36_USA_2.shx" )
ca = ca.loc[ca["NAME_1"]=="California",["NAME_2", "geometry"]]
ca["COUNTRY"] = "USA"
ca.columns = ["NAME", "geometry", "COUNTRY"]
ca = ca.dissolve( "NAME").reset_index()
ca["NAME"] = ca["NAME"] + ", CA"

states = pd.concat( [states,ca], ignore_index=True )
states["centroid_x"] = states["geometry"].centroid.x
states["centroid_y"] = states["geometry"].centroid.y
states = states.set_index( "NAME" )
states

DriverError: /Users/natem/Data/raw_data/shapefiles/NA_states/bound_p.shx: No such file or directory

In [11]:
min_sequences = 30
res = pd.read_csv( "phylosor_results.csv", parse_dates=["date"] )
res = res.drop( columns=["kind", "num"] )

size = res.shape[0]
res = res.loc[(res["countA"]>=min_sequences)&(res["countB"]>=min_sequences)]
print( f"removed {size - res.shape[0]} entries for having too few sequences." )


res["siteA"] = res["siteA"].str.replace( "_", ", ")
res["siteB"] = res["siteB"].str.replace( "_", ", ")

size = res.shape[0]
res = res.loc[res["siteA"].isin( states["NAME"] )&res["siteB"].isin( states["NAME"] )]
print( f"removed {size - res.shape[0]} entries because either site was not present in shapefile" )

res.head()

removed 22850 entries for having too few sequences.
removed 4189 entries for not being present in shapefile


Unnamed: 0,blA,blB,blBoth,date,siteA,countA,siteB,countB,value,value_turn
0,342.0,1371.0,84.0,2020-03-01,"Texas, USA",299,"New York, USA",1951,0.098074,0.245614
1,654.0,1496.0,67.0,2020-04-01,"Texas, USA",591,"New York, USA",1110,0.062326,0.102446
2,753.0,496.0,24.0,2020-05-01,"Texas, USA",535,"New York, USA",260,0.038431,0.048387
3,4081.0,212.0,45.0,2020-06-01,"Texas, USA",3510,"New York, USA",61,0.020964,0.212264
4,10563.0,464.0,130.0,2020-07-01,"Texas, USA",6370,"New York, USA",144,0.023578,0.280172


In [103]:
sd = res.query( "(siteA =='San Diego, CA')|(siteB == 'San Diego, CA')")


G = nx.from_pandas_edgelist( 
    sd.loc[sd["date"]=="2020-05-01"], 
    source="siteA",
    target="siteB", 
    edge_attr="value"
)
for node in G.nodes():
    G.nodes[node]["pos"] = (states.loc[node,["centroid_x","centroid_y"]].to_list())

In [164]:
fig = go.Figure() 
    
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    fig.add_trace(
        go.Scattergeo(
            lon=[x0,x1],
            lat=[y0,y1],
            hoverinfo='none',
            mode='lines',
            line=dict(width=1, color='#888'),
            opacity = G.edges[edge]["value"] / sd.loc[sd["date"]=="2021-03-01","value"].max(),
            showlegend=False
        )
    )

fig.add_trace( go.Scattergeo(
        lon=states["centroid_x"],
        lat=states["centroid_y"],
        hoverinfo="text",
        hovertext=states.index,
        showlegend=False
    )
)

fig.update_layout(
    margin=dict(b=20,l=5,r=5,t=40)
)

fig.update_geos(
    visible=False, resolution=110, scope="north america",
    showcountries=True, countrycolor="Black",
    showsubunits=True, subunitcolor="Grey", 
    countrywidth=1, subunitwidth=0.5,
    projection_type = 'azimuthal equal area'
)

fig.show()

In [163]:
fig = go.Figure()

counts, bins = np.histogram(sd["value"], bins=np.arange(0,0.5,0.01))
bins = 0.5 * (bins[:-1] + bins[1:])

fig.add_trace( 
    go.Bar(
        x=bins,
        y=counts,
        opacity=0.2,
        width=0.011,
        marker_line_width=0,
        marker_color="#646464"
    )
)

counts, bins = np.histogram(sd.loc[sd["date"]=="2021-06-01","value"], bins=np.arange(0,0.5,0.01))
bins = 0.5 * (bins[:-1] + bins[1:])

fig.add_trace( 
    go.Bar(
        x=bins,
        y=counts,
        width=0.011,
        marker_line_width=0,
        marker_color="#D6A564"
    )
)
fig.update_layout(barmode='overlay')

fig.show()