In [None]:
from sys import path
path.insert(0, '/home/azadoks/git/glosim2/')
path.insert(0, '/home/azadoks/git/structureREST/lib/')
from pymatgen import MPRester
from pymatgen.io.cif import CifParser
from libmatch.soap import get_Soaps
from multiprocessing import Pool, cpu_count
import numpy as np
import pandas as pd
import json
import zoap

In [None]:
def cifparser_from_string_wrap(cif_string):
    '''
    Wrapper to use pymatgen.io.cif.CifParser with multiprocessing.Pool.map()
        or pandas.DataFrame.apply()
    '''
    return CifParser.from_string(cif_string).get_structures()[0]

def structure2qp_wrap(structure):
    '''
    Wrapper to use zoap.structure2qp with multiprocessing.Pool.map()
    Anonymize _greatly_ speeds up SOAP calculation.
    If structures are not already primitive,
        primitivize can speed up SOAP calculation.
    '''
    return zoap.structure2qp(structure,
                             anonymize=True, scale=True,
                             standardize=True, primitivize=True)

In [None]:
# sources = ['matproj_query', 'matproj_query_json', 'matproj_nature_json']
data_source = 'matproj_query_json'

if data_source == 'matproj_query':
    with MPRester('0WqdPfXxloze6T9N') as mpr:
        criteria = {'elasticity': {'$ne': None}}
        properties = ['pretty_formula', 'structure', 'elasticity', 'material_id']
        results = mpr.query(criteria, properties, mp_decode=True)
    df = pd.DataFrame(results)
elif data_source == 'matproj_query_json':
    with open('../../data/elastic/ec_query.json', 'r') as f:
        results = json.load(f)
    df = pd.DataFrame(results)
elif data_source == 'matproj_nature_json':
    with open('../../data/elastic/ec.json', 'r') as f:
        results = json.load(f)
    df = pd.DataFrame(results)
    df['cif'] = df['structure']
    df['structure'] = df['cif'].apply(cifparser_from_string_wrap)

In [None]:
pool = Pool(processes=cpu_count())  # lower processes if memory gets out of hand
df['quippy'] = pool.map(structure2qp_wrap, df['structure'])
pool.close()
pool.join()

In [None]:
df['soaps'] = get_Soaps(np.array(df['quippy']),
                        nocenters=None, chem_channels=False,
                        centerweight=1.0, gaussian_width= 0.25,
                        cutoff=2.5, cutoff_transition_width=0.5,
                        nmax=10, lmax=8,
                        spkitMax=None, nprocess=8,
                        chemicalProjection=None, dispbar=False,
                        is_fast_average=False)