## Parsing radial averages from the "Handbook of Atomic Data"

In [2]:
# %load_ext autoreload
# # provide cell timings
# %load_ext autotime
# %autoreload 2
import sympy as sp
import numpy as np
import pandas as pd
# from sympy import poly, lambdify
from IPython.display import display, Math
# %config InlineBackend.figure_format='retina'
%config Completer.use_jedi = False
from itertools import product
from matplotlib import pyplot as plt
%matplotlib widget
from pyperclip import copy as pypercopy
import os, pickle

In [8]:
conversion_factors = pickle.load(open(os.path.join(module_dir,'data','conversion_facts.pkl'),'rb'))

In [743]:
radialavgs = pd.read_excel("/Users/juan/Google Drive/Zia Lab/Codebase/qdef/data/HF radial averages.xlsx",None)
metadata = radialavgs['Preamble']
radialavgs = {k:radialavgs[k][['Atom','II','III','IV']] for k in ['r^2','r^4','r^6']}
# conversion from atomic units to A^2, A^4, and A^6
for k in ['r^2','r^4','r^6']:
    power = int(k.split('^')[-1])
    scaler = conversion_factors[('a_{0}','Å')]**power
    for ionstate in 'II III IV'.split(' '):
        radialavgs[k][ionstate] = radialavgs[k][ionstate]*scaler
        radialavgs[k][ionstate] = radialavgs[k][ionstate].apply(lambda x: sigrounder(x,5))
    radialavgs[k].set_index('Atom',inplace=True)
HFradavg = {}
HFradavg['<r^2>'] = radialavgs['r^2']
HFradavg['<r^2>'].metadata = str(metadata.iloc[0]['preamble']) + '\n Units were changed from atomic units to A^2.'
HFradavg['<r^4>'] = radialavgs['r^4']
HFradavg['<r^4>'].metadata = str(metadata.iloc[1]['preamble']) + '\n Units were changed from atomic units to A^4.'
HFradavg['<r^6>'] = radialavgs['r^6']
HFradavg['<r^6>'].metadata = str(metadata.iloc[2]['preamble']) + '\n Units were changed from atomic units to A^6.'

In [377]:
# pickle.dump(HFradavg, open('/Users/juan/Google Drive/Zia Lab/Codebase/qdef/data/HFravgs.pkl','wb'))

In [363]:
class HartreeFock():
    def __init__(self):
        self.HFradavg = HFradavg
        self.ArabicToRoman = dict(zip(range(1,11),'I II III IV V VI VII VIII IX X'.split(' ')))
        self.num_to_symb  = pickle.load(open(os.path.join('/Users/juan/Google Drive/Zia Lab/Codebase/qdef','data','num_to_symb.pkl'),'rb'))
    def radial_average(self, element, charge_state, n):
        '''
        Returns the radial average <r^n> for the given element 
        and charge state (n=0 neutral, n=1 singly ionized, ...).
        The element can be given either as its atomic number
        or by its symbol.
        Data is taken from Fraga's et al Handbook of Atomic Data.
        If the information is not known, function returns None.
        The unit for the provided radial average is Angstrom^n.
        '''
        charge_state = self.ArabicToRoman[charge_state+1]
        if isinstance(element, int):
            element = self.num_to_symb[element]
        try:
            return float(HFradavg['<r^%d>' % n].loc[[element]][charge_state])
        except:
            return None

In [364]:
hf = HartreeFock()

In [347]:
%%timeit
hf.radial_averages('UU',1,2)

138 µs ± 318 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [359]:
hf.radial_average('Eu',1,2)

0.32497

In [365]:
hf.radial_average(63,1,2)

0.32497

In [469]:
atomsizes = pd.read_excel("/Users/juan/Google Drive/Zia Lab/Codebase/qdef/data/HFradii.xlsx",None)['Sheet1']
atomsizes.set_index(['Element'],inplace=True)
atomsizes.fillna(np.nan, inplace=True)
pickle.dump(atomsizes, 
            open('/Users/juan/Google Drive/Zia Lab/Codebase/qdef/data/HFsizes.pkl','wb'))

In [None]:
metadata = radialavgs['Preamble']
radialavgs = {k:radialavgs[k][['Atom','II','III','IV']] for k in ['r^2','r^4','r^6']}
# conversion from atomic units to A^2, A^4, and A^6
for k in ['r^2','r^4','r^6']:
    power = int(k.split('^')[-1])
    scaler = conversion_factors[('a0','A')]**power
    for ionstate in 'II III IV'.split(' '):
        radialavgs[k][ionstate] = radialavgs[k][ionstate]*scaler
        radialavgs[k][ionstate] = radialavgs[k][ionstate].apply(lambda x: sigrounder(x,5))
    radialavgs[k].set_index('Atom',inplace=True)
HFradavg = {}
HFradavg['<r^2>'] = radialavgs['r^2']
HFradavg['<r^2>'].metadata = str(metadata.iloc[0]['preamble']) + '\n Units were changed from atomic units to A^2.'
HFradavg['<r^4>'] = radialavgs['r^4']
HFradavg['<r^4>'].metadata = str(metadata.iloc[1]['preamble']) + '\n Units were changed from atomic units to A^4.'
HFradavg['<r^6>'] = radialavgs['r^6']
HFradavg['<r^6>'].metadata = str(metadata.iloc[2]['preamble']) + '\n Units were changed from atomic units to A^6.'

In [472]:
from matplotlib import pyplot as plt

In [434]:
module_dir = '/Users/juan/Google Drive/Zia Lab/Codebase/qdef'

In [436]:
num_to_symb  = pickle.load(open(os.path.join(module_dir,'data','num_to_symb.pkl'),'rb'))

In [488]:
class HartreeFockData():
    '''
    Repo of data from the land of Hartree-Fock.
    '''
    HFradavg = pickle.load(open(os.path.join(module_dir,'data','HFravgs.pkl'),'rb'))
    ArabicToRoman = dict(zip(range(1,36),['I','II','III','IV','V','VI','VII','VIII','IX','X','XI','XII','XIII','XIV','XV','XVI','XVII','XVIII','XIX','XX','XXI','XXII','XXIII','XXIV','XXV','XXVI','XXVII','XXVIII','XXIX','XXX','XXXI','XXXII','XXXIII','XXXIV','XXXV']))
    num_to_symb  = num_to_symb
    HFsizes = pickle.load(open(os.path.join(module_dir,'data','HFsizes.pkl'),'rb'))
    @classmethod
    def radial_average(cls, element, charge_state, n):
        '''
        Returns the radial average <r^n> for a valence electron
        for the given element
        and charge state (n=0 neutral, n=1 singly ionized, ...)
        within the limitations of Hartree-Fock.
        The element can be given either as its atomic number
        or by its symbol.
        Data is taken from Fraga's et al Handbook of Atomic Data.
        The unit for the provided radial average is Angstrom^n.
        Provided data has 5 significant figures.
        '''
        charge_state = int(charge_state)
        assert charge_state >= 0, "What odd ion state you speak of?"
        charge_state = cls.ArabicToRoman[charge_state+1]
        if isinstance(element, int):
            element = cls.num_to_symb[element]
        try:
            return float(cls.HFradavg['<r^%d>' % n].loc[[element]][charge_state])
        except:
            raise ValueError('This radial average is not here.')
    @classmethod
    def atom_size(cls, element, charge_state):
        '''
        Size of given element with given charge.
        Given in Angstrom.
        '''
        if isinstance(element, int):
            element = cls.num_to_symb[element]
        charge_state = cls.ArabicToRoman[charge_state+1]
        return float(cls.HFsizes.loc[[element]][charge_state])

In [492]:
plt.figure()
Zs = list(range(1,102))
for j in range(35):
    sizes = [HartreeFockData.atom_size(i,j) for i in Zs]
    plt.plot(Zs,sizes,label=j)
plt.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Unit network (DEPRECATED - edit unitexpander.py instead)

In [635]:
import networkx as nx
import sympy as sp
from matplotlib import pyplot as plt

In [519]:
%%timeit -n 4
conversions = pd.read_excel("/Users/juan/Google Drive/Zia Lab/Codebase/qdef/data/ConversionFactors.xlsx",None)
conversion_facts = {}
for conversion_type in conversions:
    these_factors = conversions[conversion_type]
    these_dict = dict(zip(list(map(tuple,these_factors[['Source','Destination']].values.tolist())),
          list(map(lambda x: x[0],these_factors[['Factor']].values.tolist()))))
    conversion_facts.update(these_dict)

8.83 ms ± 1.99 ms per loop (mean ± std. dev. of 7 runs, 4 loops each)


In [715]:
conversions = pd.read_excel(os.path.join(module_dir,'data','ConversionFactors.xlsx'),None)
conversion_facts = {}
for conversion_type in conversions:
    these_factors = conversions[conversion_type]
    these_dict = dict(zip(list(map(tuple,these_factors[['Source','Destination']].values.tolist())),
          list(map(lambda x: x[0],these_factors[['Factor']].values.tolist()))))
    conversion_facts.update(these_dict)
cfacts = conversion_facts
unitgraph = nx.DiGraph()
allunits = set()
for k,v in cfacts.items():
    unitgraph.add_node(k[0])
    unitgraph.add_node(k[1])
    v = sp.S(v)
    unitgraph.add_weighted_edges_from([(k[0],k[1],v)])
    unitgraph.add_weighted_edges_from([(k[1],k[0],1/v)])
    allunits.add(k[0])
    allunits.add(k[1])
allunits = list(allunits)
allpairs = list(combinations(allunits,2))
for pair in allpairs:
    try:
        path = nx.shortest_path(unitgraph,*pair)
    except:
        path = []
    if len(path) == 0:
        continue
    fromto = (path[0],path[-1])
    backto = (path[-1], path[0])
    total_factor = sp.S(1)
    for node_idx in range(len(path)-1):
        nodeA = path[node_idx]
        nodeB = path[node_idx+1]
        total_factor *= unitgraph[nodeA][nodeB]['weight']
    if fromto not in cfacts.keys():
        cfacts[fromto] = float(total_factor)
    if backto not in cfacts.keys():
        cfacts[backto] = float(1/total_factor)
for k,v in cfacts.items():
    unitgraph.add_node(k[0])
    unitgraph.add_node(k[1])
    v = sp.S(v)
    unitgraph.add_weighted_edges_from([(k[0],k[1],v)])
    unitgraph.add_weighted_edges_from([(k[1],k[0],1/v)])

In [725]:
pickle.dump(cfacts,open(os.path.join(module_dir,'data','conversion_facts.pkl'),'wb'))

In [721]:
# Script version
# from itertools import combinations
# import pandas as pd
# import networkx as nx
# import os
# import pickle

# def main():
#     if os.path.exists('./conversion_facts.pkl'):
#         print("conversion_facts.pkl already exists, delete if you want to generate it again.")
#         return None
#     conversions = pd.read_excel(os.path.join(module_dir,'data','ConversionFactors.xlsx'),None)
#     conversion_facts = {}
#     for conversion_type in conversions:
#         these_factors = conversions[conversion_type]
#         these_dict = dict(zip(list(map(tuple,these_factors[['Source','Destination']].values.tolist())),
#               list(map(lambda x: x[0],these_factors[['Factor']].values.tolist()))))
#         conversion_facts.update(these_dict)
#     cfacts = conversion_facts
#     unitgraph = nx.DiGraph()
#     allunits = set()
#     for k,v in cfacts.items():
#         unitgraph.add_node(k[0])
#         unitgraph.add_node(k[1])
#         v = sp.S(v)
#         unitgraph.add_weighted_edges_from([(k[0],k[1],v)])
#         unitgraph.add_weighted_edges_from([(k[1],k[0],1/v)])
#         allunits.add(k[0])
#         allunits.add(k[1])
#     allunits = list(allunits)
#     allpairs = list(combinations(allunits,2))
#     for pair in allpairs:
#         try:
#             path = nx.shortest_path(unitgraph,*pair)
#         except:
#             path = []
#         if len(path) == 0:
#             continue
#         fromto = (path[0],path[-1])
#         backto = (path[-1], path[0])
#         total_factor = sp.S(1)
#         for node_idx in range(len(path)-1):
#             nodeA = path[node_idx]
#             nodeB = path[node_idx+1]
#             total_factor *= unitgraph[nodeA][nodeB]['weight']
#         if fromto not in cfacts.keys():
#             cfacts[fromto] = float(total_factor)
#         if backto not in cfacts.keys():
#             cfacts[backto] = float(1/total_factor)
#     for k,v in cfacts.items():
#         unitgraph.add_node(k[0])
#         unitgraph.add_node(k[1])
#         v = sp.S(v)
#         unitgraph.add_weighted_edges_from([(k[0],k[1],v)])
#         unitgraph.add_weighted_edges_from([(k[1],k[0],1/v)])
#     pickle.dump(cfacts,open('./conversion_facts.pkl','wb'))
#     return None
# if __name__ == '__main__':
#     main()

In [739]:
plt.figure()
nx.draw(unitgraph)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Others

In [18]:
# latestMorrison = sorted([s for s in os.listdir('/Users/juan/Downloads/') if 'Morrison' in s])[0]

In [116]:
morrison_sheets = pd.read_excel('/Users/juan/Google Drive/Zia Lab/Codebase/qdef/data/Morrison.xlsx',None)

In [117]:
def table_types(dframe):
    '''
    Receive a table and return a list with the types of thing it is.
    '''
    if 'Types' in list(dframe):
        try:
            return dframe['Types'][0].split(',')
        except:
            return []
    else:
        print("No types, returning empty.")
        return []

In [118]:
t_types = {}
present_types = set()
for df_key in (morrison_sheets).keys():
    t_types[df_key] = (table_types(morrison_sheets[df_key]))
    present_types.update(table_types(morrison_sheets[df_key]))

In [119]:
rundown = {key: [k for k,v in t_types.items() if key in v] for key in present_types}

In [120]:
egs = []
for combo in [s.split('+') for s in list(morrison_sheets['Schema']['Type'])]:
    egs.append(', '.join([k for k,v in t_types.items() if set(v)==set(combo)]))

In [121]:
import pyperclip

In [122]:
pyperclip.copy('\n'.join(egs))

In [87]:
special = 'CFCOMPA'
for sheet_key in rundown[special]:
    print(sheet_key)
    print(', '.join(list(morrison_sheets[sheet_key])[1:]))
    try:
        print(' || '.join((morrison_sheets[sheet_key]['A_{n,m}'])))
    except:
        pass

3.2.1
A_{n,m}, Point charge, Self-induced, Dipole, Total, Comments, Types
A_{2,0} || A_{3,2}^r || A_{3,2}^i || A_{4,0} || A_{4,4}^r || A_{4,4}^i || A_{5,2}^r || A_{5,2}^i || A_{4,4}^n
3.2.2
X, Charge, 3d^N, F^{(2)}, F^{(4)}, B_{2,0}, B_{4,0}, B_{4,3}, Comments, Types
3.3.1
A_{n,m}, Monopole, Self-induced, Dipole, Total, Comments, Types
A_{2,0} || Α_{4,0} || A_{4,3}^r || A_{4,3}^i || A_{4,3}^n
4.2.1
A_{n,m}, Monopole, Self-induced, Dipole, Total, Comments, Types
A_{2,0} || A_{4,0} || A_{4,3}^r || A_{4,3}^i || A_{4,3}^n
4.3.1
X, A_{n,m}, Monopole, Self-induced, Dipole, Total, Ref, Comments, Types
A_{4,0} || A_{4,0} || A_{4,0} || A_{4,4} || A_{4,4} || A_{4,4}
5.2.1
A_{n,m}, Monopole, Self-induced, Dipole, Total, Comments, Types
A_{4,0} || A_{4,4}
5.3.1
A_{n,m}, Monopole, Dipole, Self-induced, Total, Comments, Types
A_{2,0} || A_{4,0} || A_{4,3}
6.2
A_{n,m}, Monopole, Self-induced, Dipole, Total, Comments, Types
A_{2,0} || A_{3,3}^i || A_{4,0} || A_{4,3} || A_{5,3}^i
7.2.3
A_{n,m}, Monopol

In [55]:
for key in present_types:
    print(key)

CFDATA
111ZAXIS
SPECIAL
XVAR
ALTERNATIVE(2/2)
REFS
FREEION
CFCOMPA
THEOCFCOMP
HFOCK
ALTERNATIVE(1/2)
EXPARAMS
CDATA
CFCOMP
NUMPARAMS


In [54]:
for df_key in (morrison_sheets).keys():
    if 'HFOCK' in t_types[df_key]:
        print(df_key)

2


In [66]:
def parse_comments(dframe):
    '''
    Receive a table an result a list of strings with the comments in the
    comments column.
    '''
    if 'Comments' in dframe:
        return list(filter(lambda x: x != '',list(dframe['Comments'].fillna(''))))
    else:
        print("No comments column, returning empty.")
        return []

In [34]:
def parse_table(dframe):
    # get the type of table it is
    dframe_types = table_types(dframe)
    if ('CDATA' in dframe_types) or True:
        print("This is a table with crystallographic data.")
        # lattice params are in the first column
        crystallographic_data = list(dframe)[0]
        first_col = [s for s in dframe[list(dframe)[0]].fillna('') if s]
        lattice_params = {s.split('=')[0]:float(s.split('=')[1]) for s in first_col}
        if ('GARNET') in dframe_types:
            print("This is a table with an array of garnets.")
        else:
            

In [61]:
dframe = morrison_sheets['11.1']
crystallographic_data = list(dframe)[0]
first_col = [s for s in dframe[list(dframe)[0]].fillna('') if s]
lattice_params = {s.split('=')[0]:float(s.split('=')[1]) for s in first_col}
crystal_system = crystallographic_data.split(' ')[0].lower()

In [62]:
crystal_system

'hexagonal'

In [56]:
lattice_params

{'a': 9.206, 'c': 9.205}

In [50]:
first_col

['a=9.206', 'c=9.205']

In [35]:
parse_table(morrison_sheets['12.1'])

This is a table with crystallographic data.


#### Parsing tables with crystal field blurbs

In [501]:
# loader for the data in Morrison's book
# this sheet is an export of the data on 
# a Google Spreadsheet "Morrison Data"
morrison_sheets = pd.read_excel('/Users/juan/Downloads/Morrison.xlsx',None)

In [519]:
# these sheets parse the data of tables in Morrison
# that list values for crystal-field components
morrison_cf_components = {}
sheets_with_cf_components = [l for l in morrison_sheets if 'Crystal-field component' in list(morrison_sheets[l])[0]]
for sheetkey_with_cf_components in sheets_with_cf_components:
    sheet_with_cf_components = morrison_sheets[sheetkey_with_cf_components]
    metadata = list(sheet_with_cf_components)[0]
    host = morrison['section_to_host'][int(sheetkey_with_cf_components.split('.')[0])]
    site = metadata.split('for ')[-1]
    cf_params = dict(zip(map(sp.Symbol,sheet_with_cf_components['A_{n,m}']),
                         sheet_with_cf_components['Total']))
    cpg = re.findall(r'\((.*?)\)',site)[0]
    site_with_comments = str(site)
    site = site.split(' (')[0]
    total = {'metadata': metadata,
            'site': site,
            'site_with_comments': site_with_comments,
            'cf_params': cf_params,
            'cpg': cpg}
    morrison_cf_components[host[0]] = total

In [521]:
# these sheets provide information on experimentally
# measured values for F^2, F^4, zeta, and B_40
# the result should be something that is keyed
# by the formulas of the hosts
# and whose values are dataframes
sheets_with_exp_B40 = [l for l in morrison_sheets if 'Experimental Values' in list(morrison_sheets[l])[0]]
for sheetkey in sheets_with_exp_B40:
    print(sheetkey)
    asheet = morrison_sheets[sheetkey]
    metadata = list(asheet)[0]
    asheet = asheet[list(asheet)[1:]]
    host = morrison['section_to_host'][int(sheetkey.split('.')[0])]
    ion = asheet['Ion']

3.4
20.3
