# Split existing ARC project

Currently, this notebook support split by 
- multiplicity (ARC has trouble do isomorhpic check for birads)
- convergence (deal crashed jobs in a separate project)
- user-defined label list

In [None]:
import os
import shutil
from copy import deepcopy

import numpy as np

from rmgpy.species import Species
from easy_rmg_model.common import read_yaml_file, save_yaml_file
from easy_rmg_model.species.converter import xyz_to_mol

In [None]:
########################################################
org_ARC_project = '/Users/xiaorui/RMG server/arc_job/hyperboosting/dmbene/first_iter/part2'
new_ARC_project = '/Users/xiaorui/RMG server/arc_job/hyperboosting/dmbene/first_iter/birad2'
########################################################

org_restart_dict = read_yaml_file(os.path.join(org_ARC_project, 'restart.yml'))

## [OPTIONAL] Filter by multiplicity
Currently isomorphism check has a problem checking biradicals (and more complicated species), we can split the job into two parts

In [None]:
to_move_out = []
for info in org_restart_dict["species"]:
    if info['multiplicity'] > 2:
        to_move_out.append(info["label"])
print("Species to be split:")
print(to_move_out)

## [OPTIONAL] Filter by convergence
We can also split a project into converged and non-converged species

In [None]:
to_move_out = []
for label, info in org_restart_dict["output"].items():
    if info['convergence'] = False:
        to_move_out.append(label)
print("Species to be split:")
print(to_move_out)

## [OPTIONAL] Filter by user-defined label list
User can set their own list

In [None]:
to_move_out = []
print(to_move_out)

Create a new restart dictionary and save the split orignal dictionary

In [None]:
new_restart_dict = deepcopy(org_restart_dict)
new_restart_dict["output"] = {}
new_restart_dict["running_jobs"] = {}
new_restart_dict["species"] = {}
# Make species section a dict object for easier search
org_restart_dict["species"] = {spc["label"]: spc for spc in org_restart_dict["species"]}

# Remove things from the original restart file
for label in to_move_out:
    for section in ["output", "running_jobs", "species"]:
        try:
            popped = org_restart_dict[section].pop(label)
        except KeyError:
            pass
        else:
            new_restart_dict[section][label] = popped

# Revert change to species section
org_restart_dict["species"] = [spc for spc in org_restart_dict["species"].values()]
# Save it as 'restart_new.yml' to avoid overwrite
save_yaml_file(os.path.join(org_ARC_project, 'restart_new.yml'), org_restart_dict)

## [ONLY FOR Filter by multiplicity]
Check the biradical isomorphism by OBMOL, and remove the error message in `output`

In [None]:
not_known = {}
new_restart_dict["allow_nonisomorphic_2d"] = True
for label in list(new_restart_dict["species"].keys()):
    # Find the lowest conformer
    spc = new_restart_dict["species"][label]
    min_ind = np.argmin(spc["conformer_energies"])
    # Get the mol from lowest conformer
    xyz = spc['conformers'][min_ind]
    try:
        perceive_mol = xyz_to_mol(xyz)
    except:
        print(f"Cannot perceive {label}.")
        not_known[label] = spc
        continue
    mol = Species().from_adjacency_list(spc["mol"])
    mol.generate_resonance_structures()
    if not mol.is_isomorphic(perceive_mol):
        not_known[label] = spc
        print(f"{label} is not isomorphic to {mol}")
        continue
    # modify output section
    info = new_restart_dict["output"][label]
    info['conformers'] = f'most stable conformer (min_ind) passed '
                         f'isomorphism check according to OBMOL perception; '
    info['job_types'] = deepcopy(new_restart_dict['job_types'])
    if 'composite_method' in new_restart_dict:
        info['job_types']['composite'] = True
    for job_type, path in info['paths'].items():
        if path:
            job_path, file_name = os.path.split(path) 
            _, job_name = os.path.split(job_path)
        info[paths][job_type] = os.path.join(new_ARC_project, 'calcs', 'Species', label, job_name, file_name)

for label in not_known:
    for section in ["output", "running_jobs", "species"]:
        new_restart_dict[section].pop(label)


## Save the split part to the new directory

In [None]:
try:
    os.makedirs(new_ARC_project)
except:
    pass

# Revert change to the species section
new_restart_dict["species"] = [spc for spc in new_restart_dict["species"].values()]

# Save restart file
save_yaml_file(os.path.join(new_ARC_project, 'restart.yml'), new_restart_dict)

# Migrate species and outputs
for sub_dir in ['calcs', 'output']:
    old_path = os.path.join(org_ARC_project, sub_dir, 'Species')
    new_path = os.path.join(new_ARC_project, sub_dir, 'Species')
    for label in new_restart_dict['output']:
        try:
            shutil.move(os.path.join(old_path, label),
                        os.path.join(new_path, label))
        except:
            print(f'Cannot move {sub_dir}/{label}')
            os.makedirs(os.path.join(new_path, label), exist_ok=True)