In [2]:
from multiprocessing import set_start_method
# set_start_method("spawn")

import os
import pickle
import numpy as np
from numpy import nan as Nan
import pandas as pd

from ase import atoms
from ase.io import read, write
from dscribe.descriptors import SOAP
import matminer.featurizers.composition as mm_composition
import matminer.featurizers.structure as mm_structure
import pymatgen as mg
from pymatgen.io import ase
from pymatgen.io.cif import CifParser
from pymatgen.io.cif import CifWriter

from tqdm import notebook as tqdm
from tqdm.auto import tqdm as tqdm_pandas
tqdm_pandas.pandas()

AAA = ase.AseAtomsAdaptor

# # CAVD imports, comment out when using 3.7
from numpy import nan as NaN
from monty.io import zopen
# from cavd.channel import Channel
# from cavd.netstorage import AtomNetwork, connection_values_list
# from cavd.local_environment import CifParser_new, LocalEnvirCom
import re

### 3a. Load the structures_df from the saved pickle

The structures_df already contains the eight simplifications plus the original structure. Thus there are nine unique representations. 

In [33]:
save_path = os.path.join(os.getcwd(), 'groups_and_oxi_states_5_frames/df_step_3.pkl')
open_file = open(save_path, 'rb')
structures_df = pickle.load(open_file)
open_file.close()

In [34]:
structures_df['structure'] = structures_df['stru_traj']

## 3a. Initialize the Feature_Creator class

In [35]:
# for full model
from featurizer import Feature_Creator
path = 'groups_and_oxi_states_5_frames/df_step_3/features'
fc = Feature_Creator(structures_df, path)

## 3b. Make sure the 9 modes are working correctly
Check to see that the correct atoms are in each mode. 

In [36]:
# for mode in fc.mode_list:
#     fc.calculate_unique_atoms(mode)
#     print("{} contains {}".format(mode, fc.unique_atoms))

##  3c. Run featurizers as needed
***
All featurizers require the user to specify which 'mode' is used. The modes tell the class which column of the dataframe to apply the featurizer to. The valid modes are:

* structure
* structure_A
* structure_AM
* structure_CAN
* structure_CAMN
* structure_A40
* structure_AM40
* structure_CAN40
* structure_CAMN40
***
A few of the featurizers require additional paramaters. The paramaters are discussed in more detail below.

* __Global Instability Index__: rcut_list
* __Radial Distribution Function__: cutoff_list
* __Smooth Overlap of Atomic Positions (SOAP)__: rcut_list, nmax_list, lmax_list, average
* __Xray Diffraction__: pattern_length_list

### Featurizer: Atomic Packing Efficiency 

In [37]:
# import os
# for i in range(5):
#     directory = os.path.join('groups_and_oxi_states_5_frames', f'df_step_{i}', 'features')
#     if not os.path.exists(directory):
#         os.makedirs(directory)

In [38]:
fc.run_atomic_packing_efficiency_featurizer('structure_CAMN')

  0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Band Center

In [39]:
fc.run_band_center_featurizer('structure_CAMN')

  0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Bond Fraction

In [40]:
fc.run_bond_fraction_featurizer('structure_CAMN')

BondFractions:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Chemical Ordering

In [41]:
fc.run_chemical_ordering_featurizer('structure_CAMN')

ChemicalOrdering:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Density

In [42]:
fc.run_density_featurizer('structure_CAMN')

DensityFeatures:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Electron Negativity Difference

In [43]:
fc.run_electron_negativity_difference_featurizer('structure')

  0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Ewald Energy

In [44]:
fc.run_ewald_energy_featurizer('structure_CAMN')

EwaldEnergy:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Global Instability Index

Pass in a list of rcut values. The function will iterate over the list generating a feature file for each entry. 

In [45]:
fc.run_global_instability_index_featurizer('structure', rcut_list=[20])

  self.params = pd.read_csv(
  new_params = self.params.append(new_data, sort=True, ignore_index=True)


GlobalInstabilityIndex:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Jarvis CFID

In [46]:
# fc.run_jarvis_cfid_featurizer('structure')

### Featurizer: Maximum Packing Efficiency

In [47]:
fc.run_maximum_packing_efficiency_featurizer('structure_CAMN')

MaximumPackingEfficiency:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: MereDig 

In [48]:
fc.run_meredig_featurizer('structure_CAMN')

  0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Orbital Field Matrix

In [49]:
# fc.run_orbital_field_matrix_featurizer('structure')

### Featurizer: Oxidation States

In [50]:
fc.run_oxidation_states_featurizer('structure_CAMN')

  0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Radial Distribution Function

Pass in a list of cutoff values and a list of bin_size values. The function will iterate over the lists, generating a feature file for each combination. 

In [51]:
fc.run_rdf_featurizer('structure_CAMN', cutoff_list=[10], bin_size_list=[0.1])

RadialDistributionFunction:   0%|          | 0/121 [00:00<?, ?it/s]

There were 121 errors when using mode: structure_CAMN with cutoff=10 and bin_size-0.1. Filling those rows with zeroes.


### Featurizer: Sine Coulomb Matrix

In [52]:
fc.run_sine_coulomb_featurizer('structure')

SineCoulombMatrix:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Smooth Overlap of Atomic Positions (SOAP)

Pass in the following paramaters:

* __rcut_list__:  a list of rcut values for the dscribe SOAP class
* __nmax_list__:  a list of nmax values for the dscribe SOAP class
* __lmax_list__:  a list of lmax values fro the dscribe SOAP class
* __average__: the averaging strategy for SOAP. Either 'outer' or 'inner'

The function will create a feature file for every unique combination of the above paramaters. 

In [53]:
# fc.run_SOAP('structure_CAN', rcut_list=[3], nmax_list=[5], lmax_list=[3], average='outer')

### Featurizer: Structural Complexity

In [54]:
fc.run_structural_complexity_featurizer('structure')

StructuralComplexity:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Structural Heterogeneity

In [55]:
fc.run_structural_heterogeneity_featurizer('structure')

StructuralHeterogeneity:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Valence Orbital

In [56]:
fc.run_valence_orbital_featurizer('structure')

  0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: X-ray Diffraction Pattern

Pass in a list of pattern lengths. The function will iterate over the list, saving a feature representation for each pattern length. 

In [57]:
fc.run_XRD_featurizer('structure_CAMN', pattern_length_list=[451])

XRDPowderPattern:   0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: Yang Solid Solution

In [58]:
fc.run_yang_solid_solution_featurizer('structure')

  0%|          | 0/121 [00:00<?, ?it/s]

### Featurizer: CAVD - requires python 3.7 for cavd library

In [59]:
# mode = 'structure'

In [60]:
# for i in tqdm.tqdm(np.arange(0, len(structures_df), 1)):
#     for site in structures_df.loc[i, mode].sites:
#         try:
#             site._atom_site_label = site.species.alphabetical_formula
#             site.properties.update({'_atom_site_label': site.species.alphabetical_formula})
#         except Exception as e:
#             print(e)
    
#     try: 
#         w = CifWriter(structures_df.loc[i, mode], symprec=True)
#         w.write_file('groups_and_oxi_states/df_step_1/LiCifsCAVD/{}.cif'.format(str(i)))
#     except:
#         w = CifWriter(structures_df.loc[i, mode])
#         w.write_file('groups_and_oxi_states/df_step_1/LiCifsCAVD/{}.cif'.format(str(i)))
        

In [61]:
# def cavd_calc(filename, migrant, ntol=0.02, lower=0.0, upper=10.0):
#     with zopen(filename, "rt") as f:
#         input_string = f.read()
#     parser = CifParser_new.from_string(input_string)
#     stru = parser.get_structures(primitive=False)[0]
    
#     species = [str(sp).replace("Specie ","") for sp in stru.species]
#     elements = [re.sub('[^a-zA-Z]','',sp) for sp in species]
#     if migrant not in elements:
#         raise ValueError("The input migrant ion not in the input structure! Please check it.")
#     effec_radii,migrant_radius,migrant_alpha,nei_dises,coordination_list = LocalEnvirCom(stru,migrant)
    
#     atmnet = AtomNetwork.read_from_RemoveMigrantCif(filename, migrant, effec_radii, True)
#     vornet, edge_centers, fcs, faces = atmnet.perform_voronoi_decomposition(True, ntol)

#     prefixname = filename.replace(".cif","")
#     prefixname = filename.replace("./Li Cifs/", "")
#     newpath = "./cavdoutputs/" + prefixname

#     # compute the R_T
#     conn_val = connection_values_list(newpath+".resex", vornet)
#     return conn_val

In [62]:
# cavd_features = []
# non_working = []
# for i in tqdm.tqdm(np.arange(0, len(structures_df), 1)):
#     try:
#         cavd_features.append(sorted(cavd_calc("./Li Cifs CAVD/{}.cif".format(str(i)), "Li")))
#     except:
#         cavd_features.append([NaN, NaN, NaN])
#         non_working.append(i)
# np.save('features/cavd2_{}'.format(mode), cavd_features)