In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import pymatgen as mg
import numpy as np
# from pymatgen.ext.matproj import MPRester
from tqdm import tqdm
import pickle
from pymatgen.symmetry.groups import SpaceGroup
from pymatgen.core.periodic_table import Species
from pymatgen.electronic_structure.plotter import BSPlotter
from pymatgen.io import ase
from pymatgen.transformations.standard_transformations import OxidationStateDecorationTransformation
from pymatgen.transformations.standard_transformations import AutoOxiStateDecorationTransformation
import os
from pymatgen.core.structure import Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.core.periodic_table import Element
from pymatgen.core.periodic_table import Species, DummySpecie
from pymatgen.core import Species

from tqdm import notebook as tqdm
from tqdm.auto import tqdm as tqdm_pandas
tqdm_pandas.pandas()
from oxi_states import *

  from .autonotebook import tqdm as notebook_tqdm


1a. Read in all Li-containing compounds from Materials Project

In [2]:
save_path = os.path.join(os.getcwd(), 'mpr_test/testing_structures.pkl')
open_file = open(save_path, 'rb')
structures_df = pickle.load(open_file)
open_file.close()

Add spacegroup

In [9]:
oxidation_dictionary = {"H":1, "Li": 1, "Na":1, "K":1, "Rb": 1, "Cs":1, "Be":2, "Mg":2, "Ca":2, \
                        "Sr":2, "Ba":2, "Ra": 2, "B":3, "Al":3, "Ga":3, "In":3, "Tl":3, \
                        "C":4, "Si": 4, "Ge": 4, "Sn": 4, "Pb": 4, "N":-3, "P":5, "As":5, \
                        "Sb": 5, "Bi":5, "O":-2, "S":-2, "Se":-2, "Te":-2, "Po":-2, "F":-1, \
                       "Cl":-1, "Br":-1, "I":-1, "Sc":3, "Y":3, "Lu":3, "Ti":4, "Zr":4, "Hf":4, \
                       "V":5, "Nb":5, "Ta":5, "Cr":6, "Mo":4, "W":6, "Mn":7, "Tc":7, "Re":7, \
                       "Fe":3, "Ru":3, "Os":3, "Co": 3, "Rh":3, "Ir":3, "Cu":2, "Ag":1, "Au":3, \
                       "Zn":2, "Ni":2, "Cd":2, "Hg":2, "La":3, "Ce":3, "Pd":2, "Pm":3, "Ho":3, \
                        "Eu":3, "Np":3, "Pu":4, "Gd":3, "Sm":2, "Tb":3, "Tm":3, "Yb":3, "Ac":3, \
                       "Dy": 3, "Er":3, "Pr":3, "U":6, "Pt":2, "Nd":3, "Th":4, "Pa":5}

# two types of transformations taken from the pymatgen
oxidation_decorator = OxidationStateDecorationTransformation(oxidation_dictionary)
oxidation_auto_decorator = AutoOxiStateDecorationTransformation(distance_scale_factor=1)

In [12]:
# the eight distinct simplification dictionary are hardcoded here
simplification_dict_A = {'C':False, 'A':True, 'M':False, 'N':False, '40':False}
simplification_dict_AM = {'C':False, 'A':True, 'M':True, 'N':False, '40':False}
simplification_dict_CAN = {'C':True, 'A':True, 'M':False, 'N':True, '40':False}
simplification_dict_CAMN = {'C':True, 'A':True, 'M':True, 'N':True, '40':False}
simplification_dict_A40 = {'C':False, 'A':True, 'M':False, 'N':False, '40':True}
simplification_dict_AM40 = {'C':False, 'A':True, 'M':True, 'N':False, '40':True}
simplification_dict_CAN40 = {'C':True, 'A':True, 'M':False, 'N':True, '40':True}
simplification_dict_CAMN40 = {'C':True, 'A':True, 'M':True, 'N':True, '40':True}

In [15]:
for structures_df in [structures_df]:
    structures_df['structure_A'] = structures_df['structure'].progress_apply(structure_simplifications, simplification_dict=simplification_dict_A)
    structures_df['structure_AM'] = structures_df['structure'].progress_apply(structure_simplifications, simplification_dict=simplification_dict_AM)
    structures_df['structure_CAN'] = structures_df['structure'].progress_apply(structure_simplifications, simplification_dict=simplification_dict_CAN)
    structures_df['structure_CAMN'] = structures_df['structure'].progress_apply(structure_simplifications, simplification_dict=simplification_dict_CAMN)
    structures_df['structure_A40'] = structures_df['structure'].progress_apply(structure_simplifications, simplification_dict=simplification_dict_A40)
    structures_df['structure_AM40'] = structures_df['structure'].progress_apply(structure_simplifications, simplification_dict=simplification_dict_AM40)
    structures_df['structure_CAN40'] = structures_df['structure'].progress_apply(structure_simplifications, simplification_dict=simplification_dict_CAN40)
    structures_df['structure_CAMN40'] = structures_df['structure'].progress_apply(structure_simplifications, simplification_dict=simplification_dict_CAMN40)

100%|██████████| 70/70 [00:00<00:00, 557.35it/s]
100%|██████████| 70/70 [00:00<00:00, 539.87it/s]
100%|██████████| 70/70 [00:00<00:00, 328.93it/s]
100%|██████████| 70/70 [00:00<00:00, 550.11it/s]
100%|██████████| 70/70 [00:00<00:00, 501.05it/s]
100%|██████████| 70/70 [00:00<00:00, 484.22it/s]
100%|██████████| 70/70 [00:00<00:00, 489.56it/s]
100%|██████████| 70/70 [00:00<00:00, 440.23it/s]


In [21]:
structures_df = structures_df[['composition', 'ICSD_ID', 'MP_ID', 'structure', 'structure_A',
       'structure_AM', 'structure_CAN', 'structure_CAMN', 'structure_A40',
       'structure_AM40', 'structure_CAN40', 'structure_CAMN40']]

In [None]:
save_path = os.path.join(os.getcwd(), f'mpr_test/testing_structures_oxi_state.pkl')
save_file = open(save_path, 'wb')
pickle.dump(structures_df, save_file)
save_file.close()