## Script to Featurize the Materials Project Data

In [10]:
from pymatgen.core import Composition   
from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers.conversions import StructureToComposition

######## Featurizer based on the Composition object
# Element
from matminer.featurizers.composition import ElementFraction, TMetalFraction, Stoichiometry, BandCenter
# Composition
from matminer.featurizers.composition import ElementProperty, Meredig 
# Ion 
from matminer.featurizers.composition import OxidationStates, IonProperty, ElectronAffinity, ElectronegativityDiff
# Orbital
from matminer.featurizers.composition.orbital import AtomicOrbitals, ValenceOrbital
# Packing 
from matminer.featurizers.composition.packing import AtomicPackingEfficiency

######## Featurizer based on the Structure object
# Order
from matminer.featurizers.structure.order import DensityFeatures, ChemicalOrdering, MaximumPackingEfficiency, StructuralComplexity
# RDF
from matminer.featurizers.structure.rdf import ElectronicRadialDistributionFunction
# Symmetry
from matminer.featurizers.structure.symmetry import GlobalSymmetryFeatures, Dimensionality
# Composition
from matminer.featurizers.structure.composite import JarvisCFID


import pandas as pd
import numpy as np

import pandas as pd
from mp_api.client import MPRester


In [11]:
"""
import pandas as pd
from mp_api.client import MPRester

api_key = "GnR0eFNqClPWDPWLOnCV4HYNvp70Nwar"

fields = [
    "material_id", "formula_pretty", "symmetry", "bulk_modulus", "shear_modulus", "composition", "structure",
    "formation_energy_per_atom", "band_gap", "energy_above_hull", "density", "volume", "nsites"
]

with MPRester(api_key) as mpr:
    docs = mpr.materials.summary.search(
        fields=fields,
        all_fields=False,
        chunk_size=1000,
        num_chunks=100  # Adjust or remove for more data
    )

df_mp = pd.DataFrame([doc.dict() for doc in docs])

# Save to CSV (or use .to_pickle for more complex objects)
df_mp.to_csv("materials_data_100k.csv", index=False)
# For objects like 'structure', use pickle:
# df_mp.to_pickle("materials_data.pkl")
"""

'\nimport pandas as pd\nfrom mp_api.client import MPRester\n\napi_key = "GnR0eFNqClPWDPWLOnCV4HYNvp70Nwar"\n\nfields = [\n    "material_id", "formula_pretty", "symmetry", "bulk_modulus", "shear_modulus", "composition", "structure",\n    "formation_energy_per_atom", "band_gap", "energy_above_hull", "density", "volume", "nsites"\n]\n\nwith MPRester(api_key) as mpr:\n    docs = mpr.materials.summary.search(\n        fields=fields,\n        all_fields=False,\n        chunk_size=1000,\n        num_chunks=100  # Adjust or remove for more data\n    )\n\ndf_mp = pd.DataFrame([doc.dict() for doc in docs])\n\n# Save to CSV (or use .to_pickle for more complex objects)\ndf_mp.to_csv("materials_data_100k.csv", index=False)\n# For objects like \'structure\', use pickle:\n# df_mp.to_pickle("materials_data.pkl")\n'

In [12]:
df_mp = pd.read_csv("/home/april-ai/Desktop/UQ/MP_Data_Corrected/mp_data_100k.csv")

In [13]:
unwanted_columns = ["builder_meta", "formula_anonymous", "property_name", "deprecated", 
                    "deprecated_reason", "last_updated", "last_updated_by", "origins", "warnings", "task_ids", "uncorrected_energy_per_atom", "is_stable", "is_metal", 
                    "equilibrum_reaction_energy_per_atom", "decomposes_to", "xas", "es_source_calc_id", "dos", "bandstructure", "dos_energy_up", "dos_energy_down",
                    "is_magnetic", "ordering", "total_magnetization", "total_magnetization_normalized_vol", "total_magnetization_normalized_formula_units", "num_magnetic_sites", 
                    "num_unique_magnetic_sites", "types_of_magnetic_species", "universal_anisotropy", "homogenous_poisson", "e_total", "e_ionic", "e_elctronic", "n", "e_ij_max", 
                    "weightded_sace_energy_EV_PER_ANG2", "weighted_surface_energy", "weighted_work_function","surface_anisotropy", "shape_factor", "has_reconstructed",
                     "possible_species", "has_props", "theoretical", "database_IDs", "fields_not_requested"]
# Remove unwanted columns
df_mp = df_mp.drop(columns=unwanted_columns, errors='ignore')

In [14]:
df_mp = df_mp.drop(columns=["deprecation_reasons","equilibrum_reaction_energy_per_atom", "deprecation_reasons", "equilibrum_reaction_energy_per_atom", "grain_boundaries", 
                            "cbm", "vbm", "efermi", "is_gap_direct", "homogenous_poisson", "e_electronic", "weighted_surface_energy_EV_PER_ANG2", "shear_modulus", "bulk_modulus"], errors='ignore')

In [15]:
#Drop columns with all NaN values
df_mp = df_mp.dropna(axis=1, how='all')

In [16]:
# Convert 'pretty_formula' to Composition objects
def safe_composition(x):
    try:
        return Composition(x)
    except Exception:
        return None

df_mp["composition"] = df_mp["formula_pretty"].apply(safe_composition)
small_df = df_mp.dropna(subset=["composition"])

#Featurize the composition




df_mp.head()

Unnamed: 0,nsites,composition,formula_pretty,volume,density,symmetry,material_id,structure,formation_energy_per_atom,energy_above_hull,band_gap
0,12,(Nb),Nb,226.839615,8.161256,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1094120,"{'@module': 'pymatgen.core.structure', '@class...",0.189748,0.189748,0.0
1,8,(Si),Si,163.065268,2.28802,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1120447,"{'@module': 'pymatgen.core.structure', '@class...",0.37251,0.37251,0.0
2,8,(Rb),Rb,744.494598,1.525036,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1179802,"{'@module': 'pymatgen.core.structure', '@class...",0.053649,0.053649,0.0
3,12,(O),O2,199.450062,1.598453,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1180008,"{'@module': 'pymatgen.core.structure', '@class...",0.419854,0.419854,1.2773
4,10,(O),O2,159.269552,1.668092,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1180064,"{'@module': 'pymatgen.core.structure', '@class...",0.387014,0.387014,0.0423


In [17]:
ef= ElementFraction()
df_mp = ef.featurize_dataframe(df_mp, "composition", ignore_errors=True)

ElementFraction:   0%|          | 0/100000 [00:00<?, ?it/s]

In [19]:
tmf = TMetalFraction()
df_mp = tmf.featurize_dataframe(df_mp, "composition", ignore_errors=True)
 

TMetalFraction:   0%|          | 0/100000 [00:00<?, ?it/s]

In [25]:
sc = Stoichiometry()
df_mp = sc.featurize_dataframe(df_mp, "composition", ignore_errors=True)


ValueError: "0-norm" exists in input dataframe

In [21]:
df_mp.head()

Unnamed: 0,nsites,composition,formula_pretty,volume,density,symmetry,material_id,structure,formation_energy_per_atom,energy_above_hull,...,Lv,Ts,Og,transition metal fraction,0-norm,2-norm,3-norm,5-norm,7-norm,10-norm
0,12,(Nb),Nb,226.839615,8.161256,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1094120,"{'@module': 'pymatgen.core.structure', '@class...",0.189748,0.189748,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,8,(Si),Si,163.065268,2.28802,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1120447,"{'@module': 'pymatgen.core.structure', '@class...",0.37251,0.37251,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
2,8,(Rb),Rb,744.494598,1.525036,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1179802,"{'@module': 'pymatgen.core.structure', '@class...",0.053649,0.053649,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
3,12,(O),O2,199.450062,1.598453,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1180008,"{'@module': 'pymatgen.core.structure', '@class...",0.419854,0.419854,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
4,10,(O),O2,159.269552,1.668092,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1180064,"{'@module': 'pymatgen.core.structure', '@class...",0.387014,0.387014,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0


In [22]:
bc = BandCenter()
df_mp = bc.featurize_dataframe(df_mp, "composition",  ignore_errors=True)

In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.
In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.
In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
           

BandCenter:   0%|          | 0/100000 [00:00<?, ?it/s]

In [23]:
df_mp.head()

Unnamed: 0,nsites,composition,formula_pretty,volume,density,symmetry,material_id,structure,formation_energy_per_atom,energy_above_hull,...,Ts,Og,transition metal fraction,0-norm,2-norm,3-norm,5-norm,7-norm,10-norm,band center
0,12,(Nb),Nb,226.839615,8.161256,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1094120,"{'@module': 'pymatgen.core.structure', '@class...",0.189748,0.189748,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.825663
1,8,(Si),Si,163.065268,2.28802,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1120447,"{'@module': 'pymatgen.core.structure', '@class...",0.37251,0.37251,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,4.768346
2,8,(Rb),Rb,744.494598,1.525036,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1179802,"{'@module': 'pymatgen.core.structure', '@class...",0.053649,0.053649,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.331571
3,12,(O),O2,199.450062,1.598453,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1180008,"{'@module': 'pymatgen.core.structure', '@class...",0.419854,0.419854,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,7.539905
4,10,(O),O2,159.269552,1.668092,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1180064,"{'@module': 'pymatgen.core.structure', '@class...",0.387014,0.387014,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,7.539905


In [26]:
ep = ElementProperty.from_preset('magpie') # Composition
#ep2 = ElementProperty.from_preset('matminer') # Composition
#ep3 = ElementProperty.from_preset('deml') # Composition
#ep4 = ElementProperty.from_preset('megnet_el') # Composition

df_mp = ep.featurize_dataframe(df_mp, "composition", ignore_errors=True)
#df_mp = ep2.featurize_dataframe(df_mp, "composition")
#df_mp = ep3.featurize_dataframe(df_mp, "composition")
#df_mp = ep4.featurize_dataframe(df_mp, "composition")


In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.


ElementProperty:   0%|          | 0/100000 [00:00<?, ?it/s]

In [None]:
md = Meredig()

df_mp = md.featurize_dataframe(df_mp, "composition", ignore_errors=True)


In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.
In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.
In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
           

Meredig:   0%|          | 0/100000 [00:00<?, ?it/s]

In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.
In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.
In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
           

In [None]:
ao = AtomicOrbitals()
df_mp = ao.featurize_dataframe(df_mp, "composition", ignore_errors=True)

AtomicOrbitals:   0%|          | 0/100000 [00:00<?, ?it/s]

  self.aos = {str(el): [[str(el), k, v] for k, v in Element(el).atomic_orbitals.items()] for el in self.elements}
  self.aos = {str(el): [[str(el), k, v] for k, v in Element(el).atomic_orbitals.items()] for el in self.elements}
  self.aos = {str(el): [[str(el), k, v] for k, v in Element(el).atomic_orbitals.items()] for el in self.elements}
  self.aos = {str(el): [[str(el), k, v] for k, v in Element(el).atomic_orbitals.items()] for el in self.elements}
  self.aos = {str(el): [[str(el), k, v] for k, v in Element(el).atomic_orbitals.items()] for el in self.elements}
  syms: list[str] = sorted(sym_amt, key=lambda x: [get_el_sp(x).X, x])
  syms: list[str] = sorted(sym_amt, key=lambda x: [get_el_sp(x).X, x])
  syms: list[str] = sorted(sym_amt, key=lambda x: [get_el_sp(x).X, x])
  self.aos = {str(el): [[str(el), k, v] for k, v in Element(el).atomic_orbitals.items()] for el in self.elements}
  self.aos = {str(el): [[str(el), k, v] for k, v in Element(el).atomic_orbitals.items()] for el in self

In [None]:
df_mp.head()

Unnamed: 0,nsites,composition,formula_pretty,volume,density,symmetry,material_id,structure,formation_energy_per_atom,energy_above_hull,...,frac p valence electrons,frac d valence electrons,frac f valence electrons,HOMO_character,HOMO_element,HOMO_energy,LUMO_character,LUMO_element,LUMO_energy,gap_AO
0,10,(O),O2,159.269552,1.668092,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1180064,"{'@module': 'pymatgen.core.structure', '@class...",0.387014,0.387014,...,0.666667,0.0,0.0,p,O,-0.338381,p,O,-0.338381,0.0
1,100,(C),C,1190.353192,1.675489,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1244913,"{'@module': 'pymatgen.core.structure', '@class...",0.994253,0.994253,...,0.5,0.0,0.0,p,C,-0.199186,p,C,-0.199186,0.0
2,100,(Ti),Ti,1744.318224,4.556796,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1245006,"{'@module': 'pymatgen.core.structure', '@class...",0.141384,0.141384,...,0.0,0.5,0.0,d,Ti,-0.17001,d,Ti,-0.17001,0.0
3,100,(Si),Si,1793.764938,2.599954,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1244933,"{'@module': 'pymatgen.core.structure', '@class...",0.349291,0.349291,...,0.5,0.0,0.0,p,Si,-0.153293,p,Si,-0.153293,0.0
4,12,(Nb),Nb,226.839615,8.161256,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1094120,"{'@module': 'pymatgen.core.structure', '@class...",0.189748,0.189748,...,0.0,0.8,0.0,d,Nb,-0.125252,d,Nb,-0.125252,0.0


In [None]:
#vo = ValenceOrbital()
#df_mp = vo.featurize_dataframe(df_mp, "composition")

In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.
In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.


ValueError: "avg s valence electrons" exists in input dataframe

In [None]:
ape = AtomicPackingEfficiency() 
df_mp = ape.featurize_dataframe(df_mp, "composition", ignore_errors =True)

In a future release, impute_nan will be set to True by default.
                    This means that features that are missing or are NaNs for elements
                    from the data source will be replaced by the average of that value
                    over the available elements.
                    This avoids NaNs after featurization that are often replaced by
                    dataset-dependent averages.


AtomicPackingEfficiency:   0%|          | 0/100000 [00:00<?, ?it/s]

  x1 = float("inf") if self.X != self.X else self.X
  if not hasattr(other, "X") or not hasattr(other, "symbol"):
  x2 = float("inf") if other.X != other.X else other.X
  x1 = float("inf") if self.X != self.X else self.X
  if not hasattr(other, "X") or not hasattr(other, "symbol"):
  x2 = float("inf") if other.X != other.X else other.X


In [None]:
columns_to_drop = [col for col in df_mp.columns if col in ["density", "vpa", "packing fraction"]]
df_mp = df_mp.drop(columns=columns_to_drop)

df = DensityFeatures()
df_mp = df.featurize_dataframe(df_mp, "structure", ignore_errors=True)

DensityFeatures:   0%|          | 0/10000 [00:00<?, ?it/s]

In [None]:
co = ChemicalOrdering()
df_mp = co.featurize_dataframe(df_mp, "structure", ignore_errors = True)

ChemicalOrdering:   0%|          | 0/10000 [00:00<?, ?it/s]

In [None]:
mpe = MaximumPackingEfficiency()
df_mp = mpe.featurize_dataframe(df_mp, "structure", ignore_errors = True)

MaximumPackingEfficiency:   0%|          | 0/10000 [00:00<?, ?it/s]

In [None]:
sc = StructuralComplexity()
df_mp = sc.featurize_dataframe(df_mp, "structure", ignore_errors = True)

StructuralComplexity:   0%|          | 0/10000 [00:00<?, ?it/s]

In [None]:
gsf = GlobalSymmetryFeatures()
df_mp = gsf.featurize_dataframe(df_mp, "structure", ignore_errors=True)

GlobalSymmetryFeatures:   0%|          | 0/10000 [00:00<?, ?it/s]

In [None]:
dim = Dimensionality()
df_mp = dim.featurize_dataframe(df_mp, "structure", ignore_errors = True)

Dimensionality:   0%|          | 0/10000 [00:00<?, ?it/s]

In [None]:
df_mp.head() #788 column

Unnamed: 0,nsites,composition,formula_pretty,volume,symmetry,material_id,structure,formation_energy_per_atom,energy_above_hull,band_gap,...,mean ordering parameter shell 3,max packing efficiency,structural complexity per atom,structural complexity per cell,spacegroup_num,crystal_system,crystal_system_int,is_centrosymmetric,n_symmetry_ops,dimensionality
0,12,(Nb),Nb,226.839615,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1094120,"{'@module': 'pymatgen.core.structure', '@class...",0.189748,0.189748,0.0,...,,,,,,,,,,
1,8,(Si),Si,163.065268,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1120447,"{'@module': 'pymatgen.core.structure', '@class...",0.37251,0.37251,0.0,...,,,,,,,,,,
2,8,(Rb),Rb,744.494598,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1179802,"{'@module': 'pymatgen.core.structure', '@class...",0.053649,0.053649,0.0,...,,,,,,,,,,
3,12,(O),O2,199.450062,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1180008,"{'@module': 'pymatgen.core.structure', '@class...",0.419854,0.419854,1.2773,...,,,,,,,,,,
4,10,(O),O2,159.269552,"{'crystal_system': 'Triclinic', 'symbol': 'P1'...",mp-1180064,"{'@module': 'pymatgen.core.structure', '@class...",0.387014,0.387014,0.0423,...,,,,,,,,,,


In [None]:
print(df_mp)


      nsites              composition formula_pretty      volume  \
0         12             {'Nb': 12.0}             Nb  226.839615   
1          8              {'Si': 8.0}             Si  163.065268   
2          8              {'Rb': 8.0}             Rb  744.494598   
3         12              {'O': 12.0}             O2  199.450062   
4         10              {'O': 10.0}             O2  159.269552   
...      ...                      ...            ...         ...   
9995      22  {'Ta': 10.0, 'N': 12.0}          Ta5N6  243.607673   
9996      16  {'Ta': 10.0, 'Ge': 6.0}         Ta5Ge3  271.299900   
9997      16  {'Pr': 10.0, 'Ge': 6.0}         Pr5Ge3  468.304759   
9998      24   {'La': 6.0, 'F': 18.0}           LaF3  327.134737   
9999      16  {'Sc': 10.0, 'Ge': 6.0}         Sc5Ge3  323.046561   

                                               symmetry material_id  \
0     {'crystal_system': 'Triclinic', 'symbol': 'P1'...  mp-1094120   
1     {'crystal_system': 'Triclinic', 'sy

In [None]:
df_mp.to_csv("mp_data_100k_featurized")

In [None]:
len(df_mp)

10000

In [None]:
for column in df_mp.columns: 
    print(column)

nsites
composition
formula_pretty
volume
symmetry
material_id
structure
formation_energy_per_atom
energy_above_hull
band_gap
minimum oxidation state
maximum oxidation state
range oxidation state
std_dev oxidation state
compound possible
max ionic char
avg ionic char
avg anion electron affinity
minimum EN difference
maximum EN difference
range EN difference
mean EN difference
std_dev EN difference
H
He
Li
Be
B
C
N
O
F
Ne
Na
Mg
Al
Si
P
S
Cl
Ar
K
Ca
Sc
Ti
V
Cr
Mn
Fe
Co
Ni
Cu
Zn
Ga
Ge
As
Se
Br
Kr
Rb
Sr
Y
Zr
Nb
Mo
Tc
Ru
Rh
Pd
Ag
Cd
In
Sn
Sb
Te
I
Xe
Cs
Ba
La
Ce
Pr
Nd
Pm
Sm
Eu
Gd
Tb
Dy
Ho
Er
Tm
Yb
Lu
Hf
Ta
W
Re
Os
Ir
Pt
Au
Hg
Tl
Pb
Bi
Po
At
Rn
Fr
Ra
Ac
Th
Pa
U
Np
Pu
Am
Cm
Bk
Cf
Es
Fm
Md
No
Lr
Rf
Db
Sg
Bh
Hs
Mt
Ds
Rg
Cn
Nh
Fl
Mc
Lv
Ts
Og
transition metal fraction
0-norm
2-norm
3-norm
5-norm
7-norm
10-norm
band center
MagpieData minimum Number
MagpieData maximum Number
MagpieData range Number
MagpieData mean Number
MagpieData avg_dev Number
MagpieData mode Number
MagpieData minimum MendeleevNum

In [None]:
zero_counts = (df_mp == 0).sum()
for i in zero_counts: 
    print(i)

0
0
0
0
0
0
0
89
1996
7186
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
10000
1