In [1]:
import numpy as np
from tqdm import tqdm
from pymatgen.core import Structure, Lattice
from pathlib import Path
import nglview
import sys
sys.path.append('../scripts')


from utils import read_config, read_structures


tqdm.pandas()
def show(x): return nglview.show_pymatgen(x)






### Loading data

 1. Config with paths
 2. Private data

In [3]:
config = read_config('../configs/defects_extractor_config.yaml')
assert 'data' in config, 'Config is broken'

private_path = '..' / Path(config['data']['private']['root'])
private = read_structures(private_path)


2967it [00:19, 154.95it/s]


### Ideal structure

We will denote ideal structure with formula $ \text{Mo}_{64}\text{S}_{128}$

In [6]:
def construct_ideal():
    coords = {
        'high': {
            'a': np.linspace(0.08333333, 0.95833333, 8, endpoint=True),
            'b': np.linspace(0.04166667, 0.91666667, 8, endpoint=True),
            'c': 0.355174,
            'element': ['S'],
            'position': []
        },
        'mid': {
            'a': np.linspace(0.04166667, 0.91666667, 8, endpoint=True),
            'b': np.linspace(0.08333333, 0.95833333, 8, endpoint=True),
            'c': 0.25,
            'element': ['Mo'],
            'position': []
        },
        'low': {
            'b': np.linspace(0.04166667, 0.91666667, 8, endpoint=True),
            'a': np.linspace(0.08333333, 0.95833333, 8, endpoint=True),
            'c': 0.144826,
            'element': ['S'],
            'position': []
        }
    }
    
    elements, positions = [], []
    for position in ('high', 'mid', 'low'):
        for a in coords[position]['a']:
            for b in coords[position]['b']:
                coords[position]['position'].append([a, b, coords[position]['c']])
        
        elements += coords[position]['element'] * 64
        positions += coords[position]['position']

    lat = Lattice.from_parameters(25.5225256, 25.5225256, 14.879004, 90, 90, 120)
    return Structure(lat, elements, positions, coords_are_cartesian=False)

In [7]:
ideal = construct_ideal()

### Extraction of defects

We will call defect each atom, which doesnt correspond to the atom with the same coordinates
in ideal structure. In what follows we create new structures from such defects

In [8]:
def extract_defects(s: Structure, ideal_set: set) -> Structure:
    """ constructs new structure of defects from ideal and current structure """
    
    ideal_defected_atoms = tuple(ideal_set - set(s)) 
    defects = list(set(s) - ideal_set)

    ideal_defected_coords = np.array([np.around(i.frac_coords, 5) for i in ideal_defected_atoms])
    defects_coords = np.array([np.around(i.frac_coords, 5) for i in defects])

    for n, i in enumerate(ideal_defected_coords):
        if not all(np.isin(i, defects_coords, True)):
            defects.append(ideal_defected_atoms[n])

    return Structure.from_sites(defects)

iterating over all structures and writing output to the path in config file. Overall time is 2-3 hours.

In [11]:
ideal_set = set(ideal)
result = {id_: extract_defects(struct, ideal_set) for id_, struct in tqdm(list(private.items())[:2])}

path = '..' / Path(config['data']['private']['defects'])

if not path.exists():
    path.mkdir()

for name, item in result.items():
    with open((path / name).with_suffix('.json'), 'w') as f:
        f.writelines(item.to_json())

100%|██████████| 2/2 [00:05<00:00,  2.74s/it]
