In [1]:
import nglview
import numpy as np
from tqdm import tqdm
from pymatgen.core import Structure
from pymatgen.core import Lattice
from utils import structures_to_df

from pathlib import Path

import multiprocessing as mp

tqdm.pandas()


show = lambda x: nglview.show_pymatgen(x)





### Создание идеального материала

In [2]:
df_public = structures_to_df()

2966it [00:19, 151.76it/s]


In [3]:
coords = {
    'high': {
        'a': np.linspace(0.08333333, 0.95833333, 8, endpoint=True),
        'b': np.linspace(0.04166667, 0.91666667, 8, endpoint=True),
        'c': 0.355174,
        'element': ['S'],
        'position': []
    },
    'mid': {
        'a': np.linspace(0.04166667, 0.91666667, 8, endpoint=True),
        'b': np.linspace(0.08333333, 0.95833333, 8, endpoint=True),
        'c': 0.25,
        'element': ['Mo'],
        'position': []
    },
    'low': {
        'b': np.linspace(0.04166667, 0.91666667, 8, endpoint=True),
        'a': np.linspace(0.08333333, 0.95833333, 8, endpoint=True),
        'c': 0.144826,
        'element': ['S'],
        'position': []
    }
}

for position in ('high', 'mid', 'low'):
    for a in coords[position]['a']:
        for b in coords[position]['b']:
            coords[position]['position'].append([a, b, coords[position]['c']])

lat = Lattice.from_parameters(25.5225256, 25.5225256, 14.879004, 90, 90, 120)
elements = coords['low']['element'] * 64 + coords['mid']['element'] * 64 + coords['high']['element'] * 64
positions = coords['low']['position'] + coords['mid']['position'] + coords['high']['position']

ideal = Structure(lat, elements,
                  positions,
                  coords_are_cartesian=False)

ideal_set = set(ideal)


### Отличие структур от идеальной

In [6]:
from functools import partial

def diff_ideal(s, ideal_set):

    ideal_defected_atoms = tuple(ideal_set - set(s))  # координаты молекул с проблемой
    defects = list(set(s) - ideal_set)

    ideal_defected_coords = np.array([np.around(i.frac_coords, 5) for i in ideal_defected_atoms])
    defects_coords = np.array([np.around(i.frac_coords, 5) for i in defects])

    for n, i in enumerate(ideal_defected_coords):
        if not all(np.isin(i, defects_coords, True)):
            defects.append(ideal_defected_atoms[n])

    return Structure.from_sites(defects)



structures = df_public['structure'].to_list()
ids = df_public['_id'].to_list()

r = [diff_ideal(i, ideal_set) for i in s]

path = Path('eda_export/')

for name, item in zip(df_public.iloc[:16, :]._id, r):
    with open((path / name).with_suffix('.json'), 'w') as f:
        f.writelines(item.to_json())

# if __name__ == "__main__":
        
    

    # with mp.Pool(8) as p:
    #     r = list(tqdm(p.imap(partial(diff_ideal, ideal_set=ideal_set), s), total=6))
    #     # result = p.map(partial(diff_ideal, ideal_set=ideal_set), s)
    #     print(len(r))


In [11]:
!ls

[1m[36m__pycache__[m[m           [1m[36meda_export[m[m            mt-cgcnn.ipynb
defects_eda.ipynb     eda_predictions.ipynb test.csv
eda.ipynb             invert_defects.ipynb  test.py
[1m[36meda_data[m[m              jsons2csv.ipynb       utils.py


In [19]:
import json
Structure.from_dict(json.load(open((path / name).with_suffix('.json'), 'r')))




Structure Summary
Lattice
    abc : 25.5225256 25.5225256 14.879004
 angles : 90.0 90.0 119.99999999999999
 volume : 8393.668021812642
      A : 25.5225256 0.0 1.5628039641098191e-15
      B : -12.761262799999994 22.10315553833868 1.5628039641098191e-15
      C : 0.0 0.0 14.879004
PeriodicSite: Se (4.7855, 6.4468, 5.2846) [0.3333, 0.2917, 0.3552]
PeriodicSite: Se (-3.1903, 9.2096, 5.2846) [0.0833, 0.4167, 0.3552]
PeriodicSite: W (14.3564, 15.6564, 3.7198) [0.9167, 0.7083, 0.2500]

In [None]:
def f(x):
    return x**2

pool = mp.Pool(processes=4)

for i in range(10):
    pool.apply_async(f, i)
    
pool.close()   # close the use of pool, to stop inserting processes into this pool
pool.join()    # con

Process SpawnPoolWorker-29:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/queues.py", line 358, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'f' on <module '__main__' (built-in)>
Process SpawnPoolWorker-30:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/pyth

In [29]:
(_, example), (_, diff) = result.sample(1)[['structure', 'diff']].iteritems()

In [30]:
show(example.iloc[0])

NGLWidget()

In [32]:
show(diff.iloc[0])

NGLWidget()