## Phylogeny of Omicron viruses from travelers and locals

#### This notebook prepares additional metadata (travelers and locals) for auspice.

In [10]:
import os
import pandas as pd

In [11]:
meta = pd.read_csv('../data/rawdata/nyc_omicron_metadata.csv', index_col='seqName')

In [12]:
nextclade = pd.read_csv('../data/nextclade_nyc/nextclade.tsv', delimiter='\t', index_col='seqName')

In [13]:
clades = {
    'A': [],
    'B': [],
    'C': [],
    'D': [],
    'Others': []
}

for i in nextclade.index:
    substitutions = nextclade.loc[i, 'substitutions'].split(',')
    if 'G5515T' in substitutions:
        clades['A'].append(i)
    elif 'G5924A' in substitutions:
        clades['B'].append(i)
    elif 'T10135C' in substitutions and 'C25708T' in substitutions and 'A29301G' in substitutions:
        clades['C'].append(i)
    elif 'C2470T' in substitutions and 'G22599A' in substitutions:
        clades['D'].append(i)
    else:
        clades['Others'].append(i)

In [14]:
local_states = ['NY', 'NJ']

cols = {
    'A': '#6098C5',
    'B': '#FD9E31',
    'C': '#9370DB',
    'D': '#EE6553',
    'Others': '#B0C4DE',
    'Traveler': '#FEBE64'
}

with open('../../nextstrain_workflow/auspice_metadata/travelers.csv', 'w') as f:
    f.write('strain'+','+'source'+','+'source__color'+'\n')
    for c in clades:
        for i in clades[c]:
            if meta.loc[i, 'State'] not in local_states:
                source = 'Traveler'
            else:
                source = c
            f.write(i+','+source+','+cols[source]+'\n')

In [15]:
meta[~meta['State'].isin(local_states)]['State'].sort_values()

seqName
caomi001    CA
caomi002    CA
flomi001    FL
flomi002    FL
gaomi001    GA
mdomi001    MD
meomi001    ME
ncomi001    NC
ncomi002    NC
oromi001    OR
riomi001    RI
txomi001    TX
utomi001    UT
Name: State, dtype: object