# Make FASTA file of SARS-CoV-2 N proteins

Note: This notebook is to be run from the `./inputs` folder using a Python kernel with PyMOL installed.

In [3]:
!conda install -y -c conda-forge -c schrodinger pymol-bundle pandas

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 22.9.0
  latest version: 23.5.0

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /Users/colby/SARS-CoV-2_N-Cytokine_Docking/.conda

  added / updated specs:
    - pandas
    - pymol-bundle


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    apbs-1.5                   |       h1de35cc_3         258 KB  schrodinger
    biopython-1.81             |   py39ha30fb19_0         2.6 MB  conda-forge
    brotli-1.0.9               |       hb7f2c08_8          19 KB  conda-forge
    brotli-bin-1.0.9           |       hb7f2c08_8          17 KB  conda-forge
    c-ares-1.19.1              |       h0dc2134_0         101 KB  conda-forge
    certifi-2023.5.7           |     pyhd8ed1ab_0         149 KB  conda-forge
    charset-norm

In [1]:
import os, shutil, sys
import pandas as pd
from itertools import product
from pymol import cmd

In [2]:
## List N protein files
n_files = os.listdir('./N-Proteins/')
# n_files = [x.replace('.pdb', '') for x in n_files]

n_files

['BANAL-20-52-N.pdb',
 'MERS-CoV-N.pdb',
 'OC43-N.pdb',
 'RaTG13-N.pdb',
 'SARS-CoV-2-B.1.1-N.pdb',
 'SARS-CoV-2-B.1.1.529-N.pdb',
 'SARS-CoV-2-B.1.1.7-N.pdb',
 'SARS-CoV-2-B.1.351-N.pdb',
 'SARS-CoV-2-B.1.617.2-DeltaA-N.pdb',
 'SARS-CoV-2-BA.1.1-N.pdb',
 'SARS-CoV-2-BA.2-N.pdb',
 'SARS-CoV-2-BA.4-N.pdb',
 'SARS-CoV-2-BQ.1-N.pdb',
 'SARS-CoV-2-P.1-N.pdb',
 'SARS-CoV-2-WA1-N.pdb',
 'SARS-CoV-2-XBB-N.pdb',
 'SARS-CoV-N.pdb']

In [6]:
## Define Helper Functions

## Convert PDB to FASTA
def convert_file(file, output):
    cmd.load(file)
    cmd.save(output)
    ## Reintialize Everything
    cmd.reinitialize(what='everything')


In [8]:
os.mkdir('./N-fastas')

n_fasta_files = []

for pdb in n_files:
    pdb_path = f"./N-Proteins/{pdb}"
    fasta_path = f"./N-fastas/{pdb.replace('.pdb', '.fasta')}"
    print(f'Making: {fasta_path}')

    convert_file(file = pdb_path, output = fasta_path)
    n_fasta_files.append(fasta_path)


Making: ./N-fastas/BANAL-20-52-N.fasta
Making: ./N-fastas/MERS-CoV-N.fasta
Making: ./N-fastas/OC43-N.fasta
Making: ./N-fastas/RaTG13-N.fasta
Making: ./N-fastas/SARS-CoV-2-B.1.1-N.fasta
Making: ./N-fastas/SARS-CoV-2-B.1.1.529-N.fasta
Making: ./N-fastas/SARS-CoV-2-B.1.1.7-N.fasta
Making: ./N-fastas/SARS-CoV-2-B.1.351-N.fasta
Making: ./N-fastas/SARS-CoV-2-B.1.617.2-DeltaA-N.fasta
Making: ./N-fastas/SARS-CoV-2-BA.1.1-N.fasta
Making: ./N-fastas/SARS-CoV-2-BA.2-N.fasta
Making: ./N-fastas/SARS-CoV-2-BA.4-N.fasta
Making: ./N-fastas/SARS-CoV-2-BQ.1-N.fasta
Making: ./N-fastas/SARS-CoV-2-P.1-N.fasta
Making: ./N-fastas/SARS-CoV-2-WA1-N.fasta
Making: ./N-fastas/SARS-CoV-2-XBB-N.fasta
Making: ./N-fastas/SARS-CoV-N.fasta


In [10]:
## Make combined FASTA file
with open("./N-Proteins.fasta", "w") as combined_fasta_file:
   for n_fasta_file in n_fasta_files:
      with open(n_fasta_file) as file:
         for line in file:
            combined_fasta_file.write(line)
            

In [11]:
## Clean up ./N-fastas directory
shutil.rmtree('./N-fastas')