In [None]:
#This notebook is designed to simplify the procedure for setting up and running a set of CoronaKMC simulations
#Because both the UnitedAtom and CoronaKMC steps can take a significant amount of time, these are not run directly
#Instead it prints out the commands you should run for CoronaKMC 
#For help please ask Ian Rouse, ian.rouse@ucd.ie

In [20]:
import os


#Define some naming conventions used throughout the setup.

ProjectName = "testproject"
ProteinStorageFolder = "all_proteins"
ProteinWorkingFolder = "proteins_"+ProjectName
UAResultsFolder = "results_"+ProjectName

if not os.path.isdir(ProteinStorageFolder):
    os.mkdir(ProteinStorageFolder)
if not os.path.isdir(ProteinWorkingFolder):
    os.mkdir(ProteinWorkingFolder)
if not os.path.isdir(UAResultsFolder):
    os.mkdir(UAResultsFolder)
if not os.path.isdir("cg_corona_data"):
    os.mkdir("cg_corona_data")
    
#Defines the set of proteins that calculations should be run for. 
#In all cases, if you have no proteins in this category leave the list empty
#E.g. if you have no proteins with structures from the PDB then leave this as
#  PDBProteinSet = []



#These have the format [UniprotID, number concentration] and should be separated by commas
#Structures for these proteins are found from AlphaFold
UniProtProteinSet = [
    ["Q99J83", 1e-3],
    ["Q09X10", 1e-3]
]


#Structures for this set are instead found from the RSC PDB repository
PDBProteinSet = [
    ["1AX8", 1e-3]
]


#Structures for these are taken directly from the storage folder
OtherProteinSet =[
]


AllProteins = []
for protein in UniProtProteinSet:
    AllProteins.append( [protein[0],protein[1], "AF"])
for protein in PDBProteinSet:
    AllProteins.append( [protein[0],protein[1], "PDB"])
for protein in OtherProteinSet:
    AllProteins.append( [protein[0],protein[1], "Other"])
    
    
#For all the proteins specified in the list
#    Check to see if the working folder already has the structure
#    Check to see if the protein already has a structure available in the storage folder
#    If so, copy this to the working folder.
#    If not, download from AlphaFold/PDB assuming
#        If this succeeds, copy to the working folder
#        If this fails, print an error and move on to the next protein

successfulProteins = []
for proteinLine in AllProteins:
    foundProtein = 0
    proteinID = proteinLine[0]
    proteinSource = proteinLine[2]
    if os.path.exists( ProteinWorkingFolder+"/"+proteinID+".pdb"  ):
        successfulProteins.append( proteinLine)
        print("Found "+proteinID+" in working folder")
        continue
    print("Looking for ", ProteinStorageFolder+"/"+proteinID+".pdb")
    if os.path.exists( ProteinStorageFolder+"/"+proteinID+".pdb"):
        os.system('cp '+ProteinStorageFolder+"/"+proteinID+".pdb "+ProteinWorkingFolder+"/"+proteinID+".pdb")
        print("Found "+proteinID+" in storage folder, copied to working")
        foundProtein = 1
    else:
        if proteinSource=="AF":
            #download from AlphaFold
            try:
                os.system('wget  https://alphafold.ebi.ac.uk/files/AF-'+proteinID+'-F1-model_v2.pdb -P '+ProteinStorageFolder+' -O '+ProteinStorageFolder+"/"+proteinID+'.pdb')
                os.system('cp '+ProteinStorageFolder+"/"+proteinID+".pdb "+ProteinWorkingFolder+"/"+proteinID+".pdb")
                foundProtein = 1
            except:
                print("AlphaFold download failed, please try manually")
                foundProtein = 0
        elif proteinSource=="PDB":
            #download from PDB
            try:
                os.system('wget  https://files.rcsb.org/download/'+proteinID+'.pdb -P '+ProteinStorageFolder)
                os.system('cp '+ProteinStorageFolder+"/"+proteinID+".pdb "+ProteinWorkingFolder+"/"+proteinID+".pdb")
                foundProtein = 1
            except:
                print("PDB download failed, please try manually")
                foundProtein = 0
        else:
            print("Could not find a structure for protein "+proteinID)
    if foundProtein == 1:
        successfulProteins.append(proteinLine)


serumOutputFile = open(ProjectName+"_serum.csv","w")
serumOutputFile.write("#ProteinID, NumberConcentration\n")
for protein in successfulProteins:
    serumOutputFile.write(protein[0]+","+str(protein[1])+"\n")
serumOutputFile.close()

print("Now run UnitedAtom with pdbs set to "+ProteinWorkingFolder)
print("Suggested autorun command: ")
print("python3 RunUA.py -r [RADIUS] -z [ZETA] -p "+ProteinWorkingFolder+ " -o "+UAResultsFolder+ " -m [MATERIAL] --operation-type=pdb-folder")

print("Then: ")
print("python3 BuildCoronaParams-P3.py -r [RADIUS] -z [ZETA] -f "+UAResultsFolder+" -p "+ProjectName+"_serum.csv -c "+ProteinWorkingFolder)
print("python3 CoronaKMC-P3.py -r [RADIUS] -z [ZETA] -f 0 -p cg_corona_data/"+UAResultsFolder+"_[RADIUS]_[ZETA].csv -t [TIME] ")
print("If python3 isn't installed, use python BuildCoronaParams.py, python CoronaKMC.py instead")

Found Q99J83 in working folder
Found Q09X10 in working folder
Found 1AX8 in working folder
Now run UnitedAtom with pdbs set to proteins_testproject
Suggested autorun command: 
python3 RunUA.py -r [RADIUS] -z [ZETA] -p proteins_testproject -o results_testproject -m [MATERIAL] --operation-type=pdb-folder
Then: 
python3 BuildCoronaParams-P3.py -r [RADIUS] -z [ZETA] -f results_testproject -p testproject_serum.csv -c proteins_testproject
python3 CoronaKMC-P3.py -r [RADIUS] -z [ZETA] -f 0 -p cg_corona_data/results_testproject_[RADIUS]_[ZETA].csv -t [TIME] 
If python3 isn't installed, use python BuildCoronaParams.py, python CoronaKMC.py instead
