# Review and Organize Sequences

Notebook objective:
- Split large sequence file in smaller files, based on the species

# Setup


In [None]:
from ecutilities.ipython import nb_setup
nb_setup()

Set autoreload mode


In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint
from metagentools.art import ArtIllumina
from metagentools.cnn_virus.data import FastaFileReader, FastaFileIterator, parse_metadata_fasta_cov

# Explore fasta file sequence metadata

Explore the `cov_data` directory with sequence files

In [None]:
p2art = Path('/bin/art_illumina')
assert p2art.is_file()

p2inputs = Path('../../data/cov_data').resolve()
print(p2inputs.absolute())
assert p2inputs.is_dir()

p2outputs = Path('../../data/cov_simreads').resolve()
print(p2outputs.absolute())
assert p2outputs.is_dir()

/home/vtec/projects/bio/metagentools/data/cov_data
/home/vtec/projects/bio/metagentools/data/cov_simreads


In [None]:
art = ArtIllumina(
    path2app=p2art,
    input_dir=p2inputs,
    output_dir=p2outputs
    )

Ready to operate with art: /bin/art_illumina
Input files from : /home/vtec/projects/bio/metagentools/data/cov_data
Output files to :  /home/vtec/projects/bio/metagentools/data/cov_simreads


In [None]:
art.list_all_input_files()

cov_virus_list.txt
cov_virus_sequence_one_1.fa
cov_virus_sequence_one_2.fa
cov_virus_sequences.fa
cov_virus_sequences_hundred.fa
cov_virus_sequences_ten.fa
cov_virus_sequences_twenty_five.fa
cov_virus_sequences_two.fa


## Create a reader for the fasta file and extract all metadata

In [None]:
p2fasta = p2inputs / 'cov_virus_sequences.fa'
assert p2fasta.is_file()

In [None]:
fasta = FastaFileReader(p2fasta)
seqs_metadata = fasta.parse_fasta()
print(f"The file includes {len(seqs_metadata):,d} sequences")

The file includes 3,318 sequences


Display the metadata for the first few sequences

In [None]:
seqids = list(seqs_metadata.keys())
for seqid in seqids[:5]:
    pprint(seqs_metadata[seqid])

{'accession': 'MK211378',
 'seqid': '2591237:ncbi:1',
 'seqnb': '1',
 'source': 'ncbi',
 'species': 'Coronavirus BtRs-BetaCoV/YN2018D  scientific name',
 'taxonomyid': '2591237'}
{'accession': 'LC494191',
 'seqid': '11128:ncbi:2',
 'seqnb': '2',
 'source': 'ncbi',
 'species': 'Bovine coronavirus  scientific name',
 'taxonomyid': '11128'}
{'accession': 'KY967361',
 'seqid': '31631:ncbi:3',
 'seqnb': '3',
 'source': 'ncbi',
 'species': 'Human coronavirus OC43  scientific name',
 'taxonomyid': '31631'}
{'accession': 'LC654455',
 'seqid': '277944:ncbi:4',
 'seqnb': '4',
 'source': 'ncbi',
 'species': 'Human coronavirus NL63  scientific name',
 'taxonomyid': '277944'}
{'accession': 'MN987231',
 'seqid': '11120:ncbi:5',
 'seqnb': '5',
 'source': 'ncbi',
 'species': 'Infectious bronchitis virus  scientific name',
 'taxonomyid': '11120'}


## Create a DataFrame with all metadata for analysis

In [None]:
meta_df = pd.DataFrame(columns=seqs_metadata[seqids[0]].keys())
for k, v in seqs_metadata.items():
    meta_df = pd.concat([meta_df, pd.DataFrame(index=[k], data=v)], axis=0)
meta_df.shape

(3318, 6)

In [None]:
meta_df.head()

Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
2591237:ncbi:1,MK211378,2591237:ncbi:1,1,ncbi,Coronavirus BtRs-BetaCoV/YN2018D scientific name,2591237
11128:ncbi:2,LC494191,11128:ncbi:2,2,ncbi,Bovine coronavirus scientific name,11128
31631:ncbi:3,KY967361,31631:ncbi:3,3,ncbi,Human coronavirus OC43 scientific name,31631
277944:ncbi:4,LC654455,277944:ncbi:4,4,ncbi,Human coronavirus NL63 scientific name,277944
11120:ncbi:5,MN987231,11120:ncbi:5,5,ncbi,Infectious bronchitis virus scientific name,11120


We will break down the 3k sequences by species. We know that some of the sequences do not have a species string.

In [None]:
idx_nospecies = meta_df.loc[meta_df.species.isna(), :].index
meta_df.loc[idx_nospecies, :]

Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
2877474:ncbi:73,MZ328303,2877474:ncbi:73,73,ncbi,,2877474
2872806:ncbi:125,MZ081378,2872806:ncbi:125,125,ncbi,,2872806
2891194:ncbi:184,MW924112,2891194:ncbi:184,184,ncbi,,2891194
2833184:ncbi:240,OK017818,2833184:ncbi:240,240,ncbi,,2833184
2877471:ncbi:243,MZ328300,2877471:ncbi:243,243,ncbi,,2877471
...,...,...,...,...,...,...
2833184:ncbi:3144,OK017826,2833184:ncbi:3144,3144,ncbi,,2833184
2833184:ncbi:3149,OK017833,2833184:ncbi:3149,3149,ncbi,,2833184
2877469:ncbi:3168,MZ328302,2877469:ncbi:3168,3168,ncbi,,2877469
2833184:ncbi:3185,OK017855,2833184:ncbi:3185,3185,ncbi,,2833184


First we work with the sequence that have a value for `species`

In [None]:
meta = meta_df.dropna(axis=0, subset=['species'])
meta.shape

(3210, 6)

Number of unique `species`

In [None]:
max_rows = pd.options.display.max_rows
max_cols = pd.options.display.max_columns
print(max_cols, max_rows)
pd.options.display.max_rows = None
pd.options.display.max_columns = None
display(meta.species.sort_values().value_counts(sort=False))
pd.options.display.max_rows = max_rows
pd.options.display.max_columns = max_cols

20 60


229E-related bat coronavirus  scientific name                                 6
Alpaca respiratory coronavirus  scientific name                               1
Alphacoronavirus 1  scientific name                                           1
Alphacoronavirus Bat-CoV/P                                                    4
Alphacoronavirus BtMs-AlphaCoV/GS2013  scientific name                        1
Alphacoronavirus Mink/China/1/2016  scientific name                           1
Alphacoronavirus UKRn3  scientific name                                       1
Alphacoronavirus sp                                                          56
Atlantic salmon bafinivirus  scientific name                                  1
Avian coronavirus  scientific name                                           24
Avian infectious bronchitis virus                                             1
Avian infectious bronchitis virus partridge/GD/S14/2003  scientific name      1
Ball python nidovirus 1  scientific name

In [None]:
pattern = 'Alphacoronavirus'
meta.loc[meta.species.str.startswith(pattern)]

Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
1906673:ncbi:113,MH687967,1906673:ncbi:113,113,ncbi,Alphacoronavirus sp,1906673
1906673:ncbi:137,MH687954,1906673:ncbi:137,137,ncbi,Alphacoronavirus sp,1906673
1906673:ncbi:204,MH687950,1906673:ncbi:204,204,ncbi,Alphacoronavirus sp,1906673
1906673:ncbi:368,MH687945,1906673:ncbi:368,368,ncbi,Alphacoronavirus sp,1906673
1906673:ncbi:409,MZ081391,1906673:ncbi:409,409,ncbi,Alphacoronavirus sp,1906673
...,...,...,...,...,...,...
2492658:ncbi:2983,NC_046964,2492658:ncbi:2983,2983,ncbi,Alphacoronavirus Bat-CoV/P,2492658
1906673:ncbi:3025,MZ081389,1906673:ncbi:3025,3025,ncbi,Alphacoronavirus sp,1906673
1906673:ncbi:3211,MZ081394,1906673:ncbi:3211,3211,ncbi,Alphacoronavirus sp,1906673
1906673:ncbi:3216,MH687965,1906673:ncbi:3216,3216,ncbi,Alphacoronavirus sp,1906673


In [None]:
for pattern in 'Bat coronavirus; Bat SARS-like; Bat SARS coronavirus; Bat SARS CoV; Bat CoV; Bat Hp'.split('; '):
    display(meta.loc[meta.species.str.startswith(pattern)])

Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
1244203:ncbi:18,MN477899,1244203:ncbi:18,18,ncbi,Bat coronavirus HKU10 scientific name,1244203
875613:ncbi:231,HM211101,875613:ncbi:231,231,ncbi,Bat coronavirus HKU9-10-2 scientific name,875613
424368:ncbi:256,EF065514,424368:ncbi:256,256,ncbi,Bat coronavirus HKU9-2 scientific name,424368
424361:ncbi:316,EF065507,424361:ncbi:316,316,ncbi,Bat coronavirus HKU4-3 scientific name,424361
389230:ncbi:454,DQ648794,389230:ncbi:454,454,ncbi,Bat coronavirus,389230
1244203:ncbi:535,MN477900,1244203:ncbi:535,535,ncbi,Bat coronavirus HKU10 scientific name,1244203
1244203:ncbi:691,MN477915,1244203:ncbi:691,691,ncbi,Bat coronavirus HKU10 scientific name,1244203
1244203:ncbi:735,MN477906,1244203:ncbi:735,735,ncbi,Bat coronavirus HKU10 scientific name,1244203
424362:ncbi:802,EF065508,424362:ncbi:802,802,ncbi,Bat coronavirus HKU4-4 scientific name,424362
424370:ncbi:896,EF065516,424370:ncbi:896,896,ncbi,Bat coronavirus HKU9-4 scientific name,424370


Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
1415851:ncbi:3273,KC881005,1415851:ncbi:3273,3273,ncbi,Bat SARS-like coronavirus RsSHC014 scientific...,1415851
1415834:ncbi:3274,KC881006,1415834:ncbi:3274,3274,ncbi,Bat SARS-like coronavirus Rs3367 scientific name,1415834
1415852:ncbi:3275,KF367457,1415852:ncbi:3275,3275,ncbi,Bat SARS-like coronavirus WIV1 scientific name,1415852
1699360:ncbi:3277,KP886808,1699360:ncbi:3277,3277,ncbi,Bat SARS-like coronavirus YNLF_31C scientific...,1699360
1699361:ncbi:3278,KP886809,1699361:ncbi:3278,3278,ncbi,Bat SARS-like coronavirus YNLF_34C scientific...,1699361
1508227:ncbi:3280,KY417142,1508227:ncbi:3280,3280,ncbi,Bat SARS-like coronavirus scientific name,1508227
1508227:ncbi:3281,KY417143,1508227:ncbi:3281,3281,ncbi,Bat SARS-like coronavirus scientific name,1508227
1508227:ncbi:3282,KY417144,1508227:ncbi:3282,3282,ncbi,Bat SARS-like coronavirus scientific name,1508227
1508227:ncbi:3283,KY417145,1508227:ncbi:3283,3283,ncbi,Bat SARS-like coronavirus scientific name,1508227
1508227:ncbi:3284,KY417146,1508227:ncbi:3284,3284,ncbi,Bat SARS-like coronavirus scientific name,1508227


Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
338605:ncbi:3252,DQ084199,338605:ncbi:3252,3252,ncbi,Bat SARS coronavirus HKU3-2 scientific name,338605
338606:ncbi:3253,DQ084200,338606:ncbi:3253,3253,ncbi,Bat SARS coronavirus HKU3-3 scientific name,338606
333387:ncbi:3255,DQ022305,333387:ncbi:3255,3255,ncbi,Bat SARS coronavirus HKU3-1 scientific name,333387
742001:ncbi:3263,GQ153539,742001:ncbi:3263,3263,ncbi,Bat SARS coronavirus HKU3-4 scientific name,742001
742002:ncbi:3264,GQ153540,742002:ncbi:3264,3264,ncbi,Bat SARS coronavirus HKU3-5 scientific name,742002
742003:ncbi:3265,GQ153541,742003:ncbi:3265,3265,ncbi,Bat SARS coronavirus HKU3-6 scientific name,742003
742004:ncbi:3266,GQ153542,742004:ncbi:3266,3266,ncbi,Bat SARS coronavirus HKU3-7 scientific name,742004
742005:ncbi:3267,GQ153543,742005:ncbi:3267,3267,ncbi,Bat SARS coronavirus HKU3-8 scientific name,742005
742006:ncbi:3268,GQ153544,742006:ncbi:3268,3268,ncbi,Bat SARS coronavirus HKU3-9 scientific name,742006
741997:ncbi:3269,GQ153545,741997:ncbi:3269,3269,ncbi,Bat SARS coronavirus HKU3-10 scientific name,741997


Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
349344:ncbi:3254,DQ071615,349344:ncbi:3254,3254,ncbi,Bat SARS CoV Rp3/2004 scientific name,349344
347537:ncbi:3256,DQ412042,347537:ncbi:3256,3256,ncbi,Bat SARS CoV Rf1/2004 scientific name,347537
347536:ncbi:3257,DQ412043,347536:ncbi:3257,3257,ncbi,Bat SARS CoV Rm1/2004 scientific name,347536


Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
389166:ncbi:3259,DQ648856,389166:ncbi:3259,3259,ncbi,Bat CoV 273/2005 scientific name,389166
389167:ncbi:3260,DQ648857,389167:ncbi:3260,3260,ncbi,Bat CoV 279/2005 scientific name,389167


Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
1541205:ncbi:1362,KF636752,1541205:ncbi:1362,1362,ncbi,Bat Hp-betacoronavirus/Zhejiang2013 scientifi...,1541205
1541205:ncbi:2738,NC_025217,1541205:ncbi:2738,2738,ncbi,Bat Hp-betacoronavirus/Zhejiang2013 scientifi...,1541205


In [None]:
pattern = 'Betacoronavirus'
meta.loc[meta.species.str.startswith(pattern)]

Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
1928434:ncbi:157,MH687973,1928434:ncbi:157,157,ncbi,Betacoronavirus sp,1928434
1385427:ncbi:197,KC545383,1385427:ncbi:197,197,ncbi,Betacoronavirus Erinaceus/VMC/DEU/2012 scient...,1385427
1928434:ncbi:225,MH687969,1928434:ncbi:225,225,ncbi,Betacoronavirus sp,1928434
2720538:ncbi:247,MW246800,2720538:ncbi:247,247,ncbi,Betacoronavirus Erinaceus scientific name,2720538
1385427:ncbi:319,NC_039207,1385427:ncbi:319,319,ncbi,Betacoronavirus Erinaceus/VMC/DEU/2012 scient...,1385427
2720538:ncbi:410,MW246802,2720538:ncbi:410,410,ncbi,Betacoronavirus Erinaceus scientific name,2720538
1590370:ncbi:442,NC_026011,1590370:ncbi:442,442,ncbi,Betacoronavirus HKU24 scientific name,1590370
1928434:ncbi:463,MH687971,1928434:ncbi:463,463,ncbi,Betacoronavirus sp,1928434
694003:ncbi:502,MW773844,694003:ncbi:502,502,ncbi,Betacoronavirus 1 scientific name,694003
1590370:ncbi:697,KM349744,1590370:ncbi:697,697,ncbi,Betacoronavirus HKU24 scientific name,1590370


In [None]:
pattern = 'Bovine'
meta.loc[meta.species.str.startswith(pattern)]

Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
11128:ncbi:2,LC494191,11128:ncbi:2,2,ncbi,Bovine coronavirus scientific name,11128
11128:ncbi:63,LC494181,11128:ncbi:63,63,ncbi,Bovine coronavirus scientific name,11128
11128:ncbi:72,KU886219,11128:ncbi:72,72,ncbi,Bovine coronavirus scientific name,11128
11128:ncbi:74,AF391542,11128:ncbi:74,74,ncbi,Bovine coronavirus scientific name,11128
454963:ncbi:100,FJ938064,454963:ncbi:100,100,ncbi,Bovine coronavirus E-AH187-TC scientific name,454963
...,...,...,...,...,...,...
11128:ncbi:2998,LC494174,11128:ncbi:2998,2998,ncbi,Bovine coronavirus scientific name,11128
11128:ncbi:3054,LC494146,11128:ncbi:3054,3054,ncbi,Bovine coronavirus scientific name,11128
11128:ncbi:3132,MG757138,11128:ncbi:3132,3132,ncbi,Bovine coronavirus scientific name,11128
11128:ncbi:3219,LC494154,11128:ncbi:3219,3219,ncbi,Bovine coronavirus scientific name,11128


In [None]:
pattern = 'Bt'
meta.loc[meta.species.str.startswith(pattern)]

Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
1503291:ncbi:382,KJ473809,1503291:ncbi:382,382,ncbi,BtNv-AlphaCoV/SC2013 scientific name,1503291
1503302:ncbi:497,KJ473814,1503302:ncbi:497,497,ncbi,BtRs-BetaCoV/HuB2013 scientific name,1503302
1495253:ncbi:543,KJ473821,1495253:ncbi:543,543,ncbi,BtVs-BetaCoV/SC2013 scientific name,1495253
1503289:ncbi:887,NC_028811,1503289:ncbi:887,887,ncbi,BtMr-AlphaCoV/SAX2011 scientific name,1503289
1503299:ncbi:1168,KJ473811,1503299:ncbi:1168,1168,ncbi,BtRf-BetaCoV/JL2012 scientific name,1503299
1503286:ncbi:1252,KJ473798,1503286:ncbi:1252,1252,ncbi,BtMf-AlphaCoV/HuB2013 scientific name,1503286
1503278:ncbi:1503,KJ473795,1503278:ncbi:1503,1503,ncbi,BtMf-AlphaCoV/AH2011 scientific name,1503278
1503280:ncbi:1681,KJ473797,1503280:ncbi:1681,1681,ncbi,BtMf-AlphaCoV/GD2012 scientific name,1503280
1503291:ncbi:1766,NC_028833,1503291:ncbi:1766,1766,ncbi,BtNv-AlphaCoV/SC2013 scientific name,1503291
1503279:ncbi:1870,KJ473799,1503279:ncbi:1870,1870,ncbi,BtMf-AlphaCoV/FJ2012 scientific name,1503279


In [None]:
pattern = 'Coronavirus'
meta.loc[meta.species.str.startswith(pattern)]

Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
2591237:ncbi:1,MK211378,2591237:ncbi:1,1,ncbi,Coronavirus BtRs-BetaCoV/YN2018D scientific name,2591237
2591233:ncbi:159,MK211374,2591233:ncbi:159,159,ncbi,Coronavirus BtRl-BetaCoV/SC2018 scientific name,2591233
2591236:ncbi:266,MK211377,2591236:ncbi:266,266,ncbi,Coronavirus BtRs-BetaCoV/YN2018C scientific name,2591236
1368314:ncbi:509,KC869678,1368314:ncbi:509,509,ncbi,Coronavirus Neoromicia/PML-PHE1/RSA/2011 scie...,1368314
1965089:ncbi:686,LC216915,1965089:ncbi:686,686,ncbi,Coronavirus HKU15 scientific name,1965089
2591230:ncbi:710,MK211371,2591230:ncbi:710,710,ncbi,Coronavirus BtSk-AlphaCoV/GX2018C scientific ...,2591230
2591229:ncbi:811,MK211370,2591229:ncbi:811,811,ncbi,Coronavirus BtSk-AlphaCoV/GX2018B scientific ...,2591229
1964806:ncbi:900,KX964649,1964806:ncbi:900,900,ncbi,Coronavirus AcCoV-JC34 scientific name,1964806
1965089:ncbi:1120,LC216914,1965089:ncbi:1120,1120,ncbi,Coronavirus HKU15 scientific name,1965089
2591231:ncbi:1217,MK211372,2591231:ncbi:1217,1217,ncbi,Coronavirus BtSk-AlphaCoV/GX2018D scientific ...,2591231


In [None]:
pattern = 'Deltacoronavirus'
meta.loc[meta.species.str.startswith(pattern)]

Unnamed: 0,accession,seqid,seqnb,source,species,taxonomyid
1477411:ncbi:104,KJ601777,1477411:ncbi:104,104,ncbi,Deltacoronavirus PDCoV/USA/Illinois133/2014 s...,1477411
1911231:ncbi:1399,MT138108,1911231:ncbi:1399,1399,ncbi,Deltacoronavirus sp,1911231
1477414:ncbi:1401,KJ601780,1477414:ncbi:1401,1401,ncbi,Deltacoronavirus PDCoV/USA/Ohio137/2014 scien...,1477414
1477413:ncbi:1611,KJ601779,1477413:ncbi:1611,1611,ncbi,Deltacoronavirus PDCoV/USA/Illinois136/2014 s...,1477413
1465644:ncbi:1639,KJ481931,1465644:ncbi:1639,1639,ncbi,Deltacoronavirus PDCoV/USA/Illinois121/2014 s...,1465644
1911231:ncbi:2319,MT138105,1911231:ncbi:2319,2319,ncbi,Deltacoronavirus sp,1911231
1911231:ncbi:2427,MT138104,1911231:ncbi:2427,2427,ncbi,Deltacoronavirus sp,1911231
1477412:ncbi:3041,KJ601778,1477412:ncbi:3041,3041,ncbi,Deltacoronavirus PDCoV/USA/Illinois134/2014 s...,1477412


```
     '229E-related bat coronavirus  scientific name',
     'Alpaca respiratory coronavirus  scientific name',
 'Alphacoronavirus ...',
     'Atlantic salmon bafinivirus  scientific name',
     'Avian coronavirus  scientific name',
     'Avian infectious bronchitis virus',
     'Avian infectious bronchitis virus partridge/GD/S14/2003  scientific name',
     'Ball python nidovirus 1  scientific name',
 'Bat CoV ...',
 'Bat Hp-betacoronavirus/Zhejiang2013  scientific name',
 'Bat SARS CoV ...',
 'Bat SARS coronavirus ...',
 'Bat SARS-like coronavirus ...',
 'Bat alphacoronavirus  scientific name',
 'Bat coronavirus ...',
     'Bellinger River virus  scientific name',
     'Beluga whale coronavirus SW1  scientific name',
     'Berne virus  scientific name',
     'Betacoronavirus ...',
     'Bottlenose dolphin coronavirus  scientific name',
     'Bottlenose dolphin coronavirus HKU22  scientific name',
 'Bovine ...',
 'Bt ...',
     'Bulbul coronavirus HKU11-796  scientific name',
     'Bulbul coronavirus HKU11-934  scientific name',
     'Calf-giraffe coronavirus US/OH3/2006  scientific name',
     'Camel alphacoronavirus  scientific name',
     'Camel alphacoronavirus Camel229E  scientific name',
     'Camel coronavirus HKU23  scientific name',
     'Canada goose coronavirus  scientific name',
     'Canine coronavirus  scientific name',
     'Canine respiratory coronavirus  scientific name',
     'Chinook salmon bafinivirus  scientific name',
     'Civet SARS CoV SZ16/2003  scientific name',
     'Civet SARS CoV SZ3/2003  scientific name',
     'Common moorhen coronavirus HKU21  scientific name',
 'Coronavirus ...',
 'Deltacoronavirus ...',
     'Dromedary camel coronavirus HKU23  scientific name',
     'Duck coronavirus  scientific name',
     'Equine coronavirus  scientific name',
     'Erinaceus hedgehog coronavirus HKU31  scientific name',
     'European turkey coronavirus 080385d  scientific name',
     'Fathead minnow nidovirus  scientific name',
 'Feline ...',
     'Ferret coronavirus  scientific name',
     'Ferret enteric coronavirus  scientific name',
     'Ferret systemic coronavirus  scientific name',
     'Giraffe coronavirus US/OH3-TC/2006  scientific name',
     'Giraffe coronavirus US/OH3/2003  scientific name',
     'Goat torovirus  scientific name',
     'Guangdong red-banded snake torovirus  scientific name',
     'Guinea fowl coronavirus  scientific name',
     'Guinea fowl coronavirus GfCoV/FR/2011  scientific name',
     'Hainan hebius popei torovirus  scientific name',
     'Hedgehog coronavirus 1  scientific name',
     'Hipposideros bat coronavirus HKU10  scientific name',
     'Hipposideros pomona bat coronavirus CHB25  scientific name',
     'Hipposideros pomona bat coronavirus HKU10-related  scientific name',
 'Human ...',
 'Infectious bronchitis ...',
     'Lucheng Rn rat coronavirus  scientific name',
     'Magpie-robin coronavirus HKU18  scientific name',
 'Middle East respiratory syndrome-related coronavirus  scientific name',
 'Miniopterus bat coronavirus 1  scientific name',
 'Miniopterus bat coronavirus HKU8  scientific name',
 'Miniopterus pusillus bat coronavirus HKU8-related  scientific name',
 'Miniopterus schreibersii bat coronavirus 1-related  scientific name',
 'Mink coronavirus 1  scientific name',
 'Mink coronavirus strain WD1127  scientific name',
 'Mink coronavirus strain WD1133  scientific name',
 'Munia coronavirus HKU13-3514  scientific name',
 'Murine coronavirus  scientific name',
 'Murine coronavirus MHV-1  scientific name',
 'Murine coronavirus MHV-3  scientific name',
 'Murine coronavirus MHV-JHM',
 'Murine coronavirus RA59/R13  scientific name',
 'Murine coronavirus RA59/SJHM  scientific name',
 'Murine coronavirus RJHM/A  scientific name',
 'Murine coronavirus SA59/RJHM  scientific name',
 'Murine coronavirus inf-MHV-A59  scientific name',
 'Murine coronavirus repA59/RJHM  scientific name',
 'Murine coronavirus repJHM/RA59  scientific name',
 'Murine hepatitis virus  scientific name',
 'Murine hepatitis virus strain 2  scientific name',
 'Murine hepatitis virus strain A59  scientific name',
 'Murine hepatitis virus strain JHM  scientific name',
 'Murine hepatitis virus strain ML-11  scientific name',
 'Murine hepatitis virus strain S/3239-17  scientific name',
 'Myotis lucifugus coronavirus  scientific name',
 'NL63-related bat coronavirus  scientific name',
 'Night heron coronavirus HKU19  scientific name',
 'PRCV ISU-1  scientific name',
 'Pangolin coronavirus  scientific name',
 'Pheasant coronavirus  scientific name',
 'Pipistrellus abramus bat coronavirus HKU5-related  scientific name',
 'Pipistrellus bat coronavirus HKU5  scientific name',
 'Porcine coronavirus HKU15  scientific name',
 'Porcine deltacoronavirus  scientific name',
 'Porcine deltacoronavirus 8734/USA-IA/2014  scientific name',
 'Porcine deltacoronavirus KNU14-04  scientific name',
 'Porcine enteric alphacoronavirus  scientific name',
 'Porcine enteric alphacoronavirus GDS04  scientific name',
 'Porcine epidemic diarrhea virus  scientific name',
 'Porcine epidemic diarrhea virus L00721/GER/2014  scientific name',
 'Porcine hemagglutinating encephalomyelitis virus  scientific name',
 'Porcine respiratory coronavirus  scientific name',
 'Porcine torovirus  scientific name',
 'Quail deltacoronavirus  scientific name',
 'Rabbit coronavirus HKU14  scientific name',
 'Rat coronavirus  scientific name',
 'Rat coronavirus Parker  scientific name',
 'Rhinolophus affinis bat coronavirus HKU2-related  scientific name',
 'Rhinolophus affinis coronavirus  scientific name',
 'Rhinolophus bat coronavirus HKU2  scientific name',
 'Rhinolophus bat coronavirus HKU32  scientific name',
 'Rousettus aegyptiacus bat coronavirus 229E-related  scientific name',
 'Rousettus bat coronavirus  scientific name',
 'Rousettus bat coronavirus GCCDC1  scientific name',
 'Rousettus bat coronavirus HKU10  scientific name',
 'Rousettus bat coronavirus HKU9  scientific name',
 'SARS coronavirus Rs_672/2006  scientific name',
 'SARS-like coronavirus WIV16  scientific name',
 'Sable antelope coronavirus US/OH1/2003  scientific name',
 'Sambar deer coronavirus US/OH-WD388-TC/1994  scientific name',
 'Sambar deer coronavirus US/OH-WD388/1994  scientific name',
 'Scotophilus bat coronavirus 512  scientific name',
 'Scotophilus kuhlii bat coronavirus 512-related  scientific name',
 'Severe acute respiratory syndrome coronavirus 2  scientific name',
 'Severe acute respiratory syndrome-related coronavirus  scientific name',
 'Sparrow coronavirus HKU17  scientific name',
 'Sparrow deltacoronavirus  scientific name',
 'Swine deltacoronavirus OhioCVM1/2014  scientific name',
 'Swine enteric alphacoronavirus  scientific name',
 'Swine enteric coronavirus  scientific name',
 'TGEV Miller M6  scientific name',
 'TGEV Miller M60  scientific name',
 'TGEV Purdue P115  scientific name',
 'TGEV virulent Purdue  scientific name',
 'Thrush coronavirus HKU12-600  scientific name',
 'Transmissible gastroenteritis virus  scientific name',
 'Turkey coronavirus  scientific name',
 'Tylonycteris bat coronavirus HKU33  scientific name',
 'Tylonycteris bat coronavirus HKU4  scientific name',
 'Tylonycteris pachypus bat coronavirus HKU4-related  scientific name',
 'Water deer coronavirus  scientific name',
 'Waterbuck coronavirus US/OH-WD358-GnC/1994  scientific name',
 'Waterbuck coronavirus US/OH-WD358-TC/1994  scientific name',
 'Waterbuck coronavirus US/OH-WD358/1994  scientific name',
 'White bream virus  scientific name',
 'White-eye coronavirus HKU16  scientific name',
 'White-tailed deer coronavirus US/OH-WD470/1994  scientific name',
 'Wigeon coronavirus HKU20  scientific name',
 'Yak coronavirus  scientific name'
```

# Others