In [None]:
# summary
with MPRester() as mpr:
    docs = mpr.materials.summary.search(
        material_ids=["mp-149", "mp-13", "mp-22526"],
    )
    list_of_available_fields = mpr.materials.summary.available_fields

# Get data from `mp_api` directly

## get coordination_number

In [None]:
import numpy as np
import re

with MPRester("PvwYAdq8X47uf7jorN98x48jHEEb2uy6") as mpr:
        docs  = mpr.materials.chemenv.search(
            elements=["Fe"],
            num_chunks=1,
            num_elements=(2,4),
            fields=['chemenv_symbol', 'formula_pretty', 'material_id', 'species', 'structure'],
        )

def check_same_coordination_number(a_doc, element,verbose=False):
    ind = []
    sites = a_doc['species']
    coordination_numbers = []
    for i in range(len(sites)):
        if re.search(element, sites[i]):  # only check the coordination number of the interested element.
            ind.append(i)
            coord_number = re.findall(r'(?<=:)\d', a_doc['chemenv_symbol'][i])[0]
            coordination_numbers.append(int(coord_number))

    try:
        if ((np.array(coordination_numbers) - coordination_numbers[0])==0).all():
            coordination_number = coordination_numbers[0]
            return True, coordination_number
        else:
            if verbose:
                print(np.array(a_doc['species'])[ind])
                print(np.array(a_doc['chemenv_symbol'])[ind])
            return False, None
    except IndexError:
        print(np.array(a_doc))
        # print(np.array(a_doc['species']))
        # print(np.array(a_doc['chemenv_symbol']))
        return False, None

## get valences

In [None]:
with MPRester() as mpr:
        docs  = mpr.materials.oxidation_states.search(
                chemsys=["Fe-*", "Fe-*-*", "Fe-*-*-*"],
                num_chunks=1,
                fields=['average_oxidation_states', 'formula_pretty', 'material_id'],
        )
example_doc = docs[0]
print(example_doc.average_oxidation_states['Fe'])
print(example_doc.material_id)


# Compute features by `pymatgen`

In [None]:
with MPRester("PvwYAdq8X47uf7jorN98x48jHEEb2uy6") as mpr:
    docs = mpr.materials.search(
        elements=["Fe"],
        num_chunks=1,
    )

example_doc = docs[0]

In [None]:

def check_same_values(list_of_values):
    if ((np.array(list_of_values) - list_of_values[0])==0).all():
        return True
    else:
        return False


def get_features(element, structure):
    
    sites = structure._sites
    inds = []
    valences = []
    coordination_numbers = []
    mean_mean_distances = []

    # calulate coordination number first
    # since get_valences takes too much time
    for i in range(len(sites)):
        if re.search(element,sites[i]._label):
            inds.append(i)
            cn = pymatgen.analysis.local_env.CrystalNN().get_cn(structure, i)
            coordination_numbers.append(cn)

    cn_check = check_same_values(coordination_numbers)

    if cn_check:
        try:
            site_valences = pymatgen.analysis.bond_valence.BVAnalyzer().get_valences(structure)  # takes 2s for complex structures
        except ValueError:
            return False, None, None, None
        for i in range(len(inds)):
            valences.append(site_valences[inds[i]])

            neighbor_sites = pymatgen.analysis.local_env.CrystalNN().get_nn_info(structure, inds[i])
            distances = []
            for j in range(len(neighbor_sites)):
                distances.append(np.linalg.norm(neighbor_sites[j]['site'].coords - sites[inds[i]].coords))
            mean_mean_distances.append(np.mean(distances))
        mean_mean_distances = np.mean(mean_mean_distances)

        valence_check = check_same_values(valences)
        if valence_check:
            return True, valences[0], coordination_numbers[0], mean_mean_distances
        else:
            return False, None, None, None
    else:
        return False, None, None, None

structure = example_doc.structure
get_features("Fe",structure)

In [None]:
def get_element_features(an_element):

    with MPRester("PvwYAdq8X47uf7jorN98x48jHEEb2uy6") as mpr:
        docs = mpr.materials.summary.search(
            elements=[an_element, "O"],
            num_chunks=1,
        )
        
    outs = []
    for i in range(len(docs[:100])):
        structure = docs[i].structure
        check, valence, coordination_number, mean_mean_distance = get_features(an_element, structure)
        if check:
            outs.append({
                'element': an_element,
                'material_id':docs[i].material_id, 
                'valence':valence, 
                'coordination_number':coordination_number, 
                'mean_mean_distance':mean_mean_distance
                })
        else:
            continue
    return outs


In [None]:
import json

outs = get_element_features("Ti")
with open('datasets/Ti.json', 'w') as f:
    json.dump(outs, f, indent=4)