# Parsing Lanthanide Tables

## Parsing Chris' MD transition rates

In [28]:
import re, os
import sympy as sp
from sympy.parsing.latex import parse_latex
import pandas as pd

In [144]:
def parser(chunk, filler):
    chunk = chunk.replace('$','')
    chunk = chunk.replace(' \\\\','')
    if chunk == '\\text{}':
        return filler
    elif 'text' in chunk:
        return sp.Symbol(chunk)
    else:
        chunk = chunk.replace('\\times','*')
        chunk = chunk.replace('.*','.0 *')
        try:
            chunk = float(parse_latex(chunk))
        except:
            chunk = float(sp.S(chunk))
        return chunk
        

In [176]:
tables_folder = '/Users/juan/Downloads/MDEmisLines/'
fnames = [f for f in os.listdir(tables_folder) if '.tab' in f]
elements = list(set([f[:2] for f in fnames]))
all_elements = []
for element in elements:
    tablefiles = [f for f in fnames if f[:2]==element]
    element_data = []
    for tablefile in tablefiles:
        lines = open(os.path.join(tables_folder,tablefile),'r').read().split('\n')
        table_data = []
        for line in lines:
            if 'tabular' in line or line == '':
                continue
            chunks = [c.strip() for c in line.split('&')]
            row = []
            for chunk_idx, chunk in enumerate(chunks):
                parsed = parser(chunk,None)
                row.append(parsed)
            table_data.append(row)
        table_data = sp.Matrix(table_data).T
        for row_idx in range(table_data.rows):
            for col_idx in range(table_data.cols):
                entry = table_data[row_idx,col_idx]
                if entry == None:
                    table_data[row_idx, col_idx] = filler
                else:
                    filler = entry
        table_data = table_data.T
        table_data = [[element,3]+list(table_data[row_idx,:]) for row_idx in range(table_data.rows)]
        table_data = sp.Matrix(table_data)
        element_data.append([table_data])
    element_data = sp.Matrix(sp.BlockMatrix(element_data))
    element_data = [list(element_data[row_idx,:]) for row_idx in range(element_data.rows)]
    all_elements.extend(element_data)
    # if element == 'Yb':
    #     break

In [178]:
col1 = [row[:7] for row in all_elements]
col2 = [row[:2]+row[7:] for row in all_elements]
col1.extend(col2)

In [300]:
dfDodson = pd.DataFrame(col1, columns="Element Charge SLJ E_SLJ/cm^-1 S'L'J' λ/nm A'_{MD}/s^{-1}".split(' '))
dfDodson['Element'] = dfDodson['Element'].apply(str)
good_rows = []
for index, row in dfDodson.iterrows():
    if isinstance(row['SLJ'], sp.core.symbol.Symbol):
        good_rows.append(row)
dfDodson = pd.DataFrame(good_rows, columns="Element Charge SLJ E_SLJ/cm^-1 S'L'J' λ/nm A'_{MD}/s^{-1}".split(' '))
dfDodson.drop_duplicates(inplace=True)
def LSJparser(slj):
    slj = str(slj)
    if slj[:6] == '\\left(':
        sl, j = slj.split('_')
        sl0, sl1 = sl.split(',')
        s1 = (sp.S(sl1.split('^')[-1][0]) - 1)/2
        L1 = sl1.split('^')[-1][1]
        s0 = (sp.S(sl0.split('^')[-1][0]) - 1)/2
        L0 = sl0.split('^')[-1][1]
        j = sp.S(j.replace('\\text','').replace('{','').replace('}',''))
        return [(s0, L0, j), (s1, L1, j)]
    else:
        sl, j = slj.split('_')
        s = (sp.S(sl.split('^')[-1][0])-1)/2
        L = sl.split('^')[-1][1]
        j = sp.S(j.replace('\\text','').replace('{','').replace('}',''))
        return (s, L, j)
dfDodson['SLJ_symb'] = dfDodson['SLJ']
dfDodson['SLJ'] = dfDodson["SLJ"].apply(LSJparser)
dfDodson["S'L'J'_symb"] = dfDodson["S'L'J'"]
dfDodson["S'L'J'"] = dfDodson["S'L'J'"].apply(LSJparser)

In [307]:
dfDodson.to_pickle('./data/lanthanides_MD.pkl')
dfDodson.to_excel('./data/lanthanides_MD.xls')

  dfDodson.to_excel('./data/lanthanides_MD.xls')


In [321]:
dfDodson.to_hdf('./lanthanides_MD.h5',key='lanthanides')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['Element', 'Charge', 'SLJ', 'E_SLJ/cm^-1', 'S'L'J'', 'λ/nm',
       'A'_{MD}/s^{-1}', 'SLJ_symb', 'S'L'J'_symb'],
      dtype='object')]

  pytables.to_hdf(


## Parsing All Tables

In [1]:
import re
from sympy.parsing.latex import parse_latex
import sympy as sp
from wolframclient.evaluation import WolframLanguageSession
from wolframclient.language import wl
import pickle
from random import random
from math import log10, floor
import time

# table_fname = './data/lanthanide_tables/HFEnergyMatrixTables'
# pickle_fname = './data/lanthanide_tables/HFEnergyMatrixTables.pkl'
# table_fname = './data/lanthanide_tables/tab1'
# pickle_fname = './data/lanthanide_tables/tab1.pkl'
save_to_pickle = True
pretty_vars = False # whether the vars are standard or nice

base_rep=[
('B02',sp.Symbol('B_{0,2}')),('B04',sp.Symbol('B_{0,4}')),
('B06',sp.Symbol('B_{0,6}')),('B0x',sp.Symbol('B_{0,x}')),
('B0y',sp.Symbol('B_{0,y}')),('B0z',sp.Symbol('B_{0,z}')),
('B12',sp.Symbol('B_{1,2}')),('B14',sp.Symbol('B_{1,4}')),
('B16',sp.Symbol('B_{1,6}')),('B22',sp.Symbol('B_{2,2}')),
('B24',sp.Symbol('B_{2,4}')),('B26',sp.Symbol('B_{2,6}')),
('B34',sp.Symbol('B_{3,4}')),('B36',sp.Symbol('B_{3,6}')),
('B44',sp.Symbol('B_{4,4}')),('B46',sp.Symbol('B_{4,6}')),
('B56',sp.Symbol('B_{5,6}')),('B66',sp.Symbol('B_{6,6}')),
('E0',sp.Symbol('E_{0}')),('E1',sp.Symbol('E_{1}')),
('E2',sp.Symbol('E_{2}')),('E3',sp.Symbol('E_{3}')),
('eOrbitalRad',sp.Symbol(r'\epsilon_r')),('gI',sp.Symbol('g_{I}')),
('gs',sp.Symbol('g_{s}')),('M0',sp.Symbol('M_{0}')),
('M2',sp.Symbol('M_{2}')),('M4',sp.Symbol('M_{4}')),
('P2',sp.Symbol('P_{2}')),('P4',sp.Symbol('P_{4}')),
('P6',sp.Symbol('P_{6}')),('S12',sp.Symbol('S_{1,2}')),
('S14',sp.Symbol('S_{1,4}')),('S16',sp.Symbol('S_{1,6}')),
('S22',sp.Symbol('S_{2,2}')),('S24',sp.Symbol('S_{2,4}')),
('S26',sp.Symbol('S_{2,6}')),('S34',sp.Symbol('S_{3,4}')),
('S36',sp.Symbol('S_{3,6}')),('S44',sp.Symbol('S_{4,4}')),
('S46',sp.Symbol('S_{4,6}')),('S56',sp.Symbol('S_{5,6}')),
('S66',sp.Symbol('S_{6,6}')),('\[Alpha]',sp.Symbol(r'\alpha')),
('\[Beta]',sp.Symbol(r'\beta')),('\[Beta]BohrMag',sp.Symbol(r'\mu_{B,e}')),
('\[Beta]n',sp.Symbol(r'\mu_{B,n}')),('\[Gamma]',sp.Symbol(r'\gamma')),
('\[Zeta]',sp.Symbol(r'\zeta')),
('T11',sp.Symbol('T_{1,1}')),
('T12',sp.Symbol('T_{1,2}')),
('T14',sp.Symbol('T_{1,4}')),
('T15',sp.Symbol('T_{1,5}')),
('T16',sp.Symbol('T_{1,6}')),
('T17',sp.Symbol('T_{1,7}')),
('T18',sp.Symbol('T_{1,8}')),
('T19',sp.Symbol('T_{1,9}')),
('T2',sp.Symbol('T_{2}')),
('T3',sp.Symbol('T_{3}')),
('T4',sp.Symbol('T_{4}')),
('T5',sp.Symbol('T_{5}')),
('T6',sp.Symbol('T_{6}')),
('T7',sp.Symbol('T_{7}')),
('T8',sp.Symbol('T_{8}'))]

master_rep = {}
for idx, it in enumerate(base_rep):
    key = sp.Symbol('x_{%d}' % (idx+1))
    master_rep[key] = it[1]
inverse_rep = {v:k for k,v in master_rep.items()}

session=WolframLanguageSession()

session.evaluate(r'''vars = {B02, B04, B06, B0x, B0y, B0z, B12, B14, B16, B22, B24, B26, 
   B34, B36, B44, B46, B56, B66, E0, E1, E2, E3, eOrbitalRad, gI, gs, 
   M0, M2, M4, P2, P4, P6, S12, S14, S16, S22, S24, S26, S34, S36, 
   S44, S46, S56, S66, \[Alpha], \[Beta], \[Beta]BohrMag, \[Beta]n, \[Gamma], \[Zeta],
   T11, T12, T14, T15, T16, T17, T18, T19, T2, T3, T4, T6, T7, T8, ee, Q};
svars = Table[
   ToExpression[SubscriptBox["x", ToString[i]]], {i, 1, Length[vars]}];
reps = (#[[1]] -> #[[2]]) & /@ Transpose[{vars, svars}];
ToSympy[expr0_] := (
  expr = Expand[Chop[expr0]];
  expr = expr /. reps;
  str = ToString[FullForm[expr, NumberMarks -> False]];
  str = StringReplace[
    str,
    {"Plus" -> "sp.core.add.Add",
     "Times" -> "sp.core.mul.Mul",
     "Power" -> "sp.core.power.Pow",
     "[" -> "(", "]" -> ")",
     "List" -> "slist",
     "\"" -> ""}];
  str = StringReplace[str,
    {"Subscript(x" -> "sp.SubscriptSymbol('x'",
     "Rational" -> "sp.Rational"}];
  Return[str]
  )
''')


In [77]:
# Abbreviations to simplify parsing
if pretty_vars:
    def SubscriptSymbol(a,b):
        return master_rep[sp.Symbol("%s_{%d}" % (a,int(b)))]
else:
    def SubscriptSymbol(a,b):
        return sp.Symbol("%s_{%d}" % (a,int(b)))
sp.Rational = lambda x,y: sp.S(x)/sp.S(y)
sp.SubscriptSymbol = SubscriptSymbol
Pi = sp.pi
Complex = lambda x,y: (sp.S(x) + sp.I * sp.S(y))
def slist(*args):
    return list([*args])

def parse_mathematica(mathematica_expression):
    seval = session.evaluate('ToSympy[%s]' % mathematica_expression)
    return str(seval)

def lanthanum_cleanup(fname):
    '''
    Data  file  might have large redundancies, this opens it and removes
    all    redundant   definitions;   it   assumes   that  there are  no
    inconsistencies between them, if there are then it fails.
    
    More  importantly  it puts together all the lines that relate to one
    definition in just one string with no newlines. 

    This is assuming that the file only contains definitions for:
       EnergyMatrixTable, AllowedM, and EnergyStatesTable

    Parameters
    ----------
    fname   (str): file name of file to be parsed

    Returns
    -------
    clean_output  (list): a list of strings each with a single definition.
    '''
    global parse
    lanthanum = [l.strip().split(' =')[0] for l in open(fname,'r').readlines() if l[0] != ' ']
    lanthanum = list(filter(lambda x: x != '', lanthanum))
    # this dictionary will have as keys the lhs of definitions
    # and as values will be lists of strings that all attempt
    # to define this symbol
    rhs = {}
    full_lanthanum = [l.strip() for l in open(fname,'r').readlines()]
    control_strings = 'EnergyMatrixTable AllowedM EnergyStatesTable HCFtable'.split(' ')
    for line_idx, line in enumerate(full_lanthanum):
        if line_idx < len(full_lanthanum)-1:
            next_line = full_lanthanum[line_idx+1]
        if 'Attributes[Null]' in line:
            continue
        # if ('EnergyMatrixTable' in line) or ('AllowedM' in line) or ('EnergyStatesTable' in line) or ('HCFtable' in line):
        if any([s in line for s in control_strings]):
            key = line.split('=')[0].strip()
            chunks = []
            try:
                first_chunk = line.split('=')[1]
            except:
                first_chunk = ''
            chunks.append(first_chunk)
            if key not in rhs:
                rhs[key] = []
            continue
        # if line == '' or ('EnergyMatrixTable' in next_line) or ('AllowedM' in next_line) or ('EnergyStatesTable' in next_line) :
        if line == '' or any([s in next_line for s in control_strings]):
            chunks.append(line.strip())
            whole_chunk = ''.join(chunks).strip()
            if whole_chunk != '':
                rhs[key].append(whole_chunk)
            chunks = []
        else:
            chunks.append(line.strip())
    # now to see if there are any redundancies
    clean_output = []
    for k,v in rhs.items():
        unique = list(set(v))
        if len(unique) != 1:
            print(k)
            for uval in unique:
                print(uval)
        assert len(unique) == 1, "There should be only one, this is a loopy loopy file."
        unique = unique[0]
        out = '%s = %s' % (k, unique)
        clean_output.append(out)
    return clean_output
def parse_table(fname, verbose=False):
    global parse
    clear_lanthanum = lanthanum_cleanup(fname)
    EnergyMatrixTables = {}
    for cl in clear_lanthanum:
        if 'EnergyMatrixTable' in cl:
            pre = cl.split(' =')[0]
            if verbose:
                print("Parsing:",pre)
            parse = parse_mathematica(cl)
            args = tuple(eval(re.findall(r'\[.*\]',pre)[0]))
            try:
                EnergyMatrixTables[args] = sp.Matrix(eval(parse))
            except:
                print("failed, trying alternative...")
                parse = parse.replace('*^','*10**')
                EnergyMatrixTables[args] = sp.Matrix(eval(parse))
    EnergyStatesTable = {}
    for cl in clear_lanthanum:
        if 'EnergyStatesTable' in cl:
            lhs = cl.split(' =')[0]
            lhs = tuple(eval(re.findall(r'\[.*\]',lhs)[0]))
            rhs = cl.split('= ')[1]
            parse = parse_mathematica(cl)
            replacements = '2D1 2D2 2F1 2F2 2G1 2G2 2H1 2H2 1S 1D 1G 1I 2F 2P 2I 2L 2K 3P 3F 3H 4D 4S 4F 4G 4I'.split(' ')
            for replacement in replacements:
                parse = re.sub(r'([^"])(%s)([^"])' % replacement, r'\1"\2"\3', parse)
            EnergyStatesTable[lhs] = eval(parse)
    AllowedM = {}
    for cl in clear_lanthanum:
        if 'AllowedM' in cl:
            parse = parse_mathematica(cl)
            AllowedM[parse[0]] = eval(parse)
    return {'EnergyMatrixTables': EnergyMatrixTables,
            'EnergyStatesTable': EnergyStatesTable,
            'AllowedM': AllowedM}

In [78]:
parse = 'fun(2F,1)+gun(2F1,3)'
replacements = '2D1 2D2 2F1 2F2 1S 1D 1G 1I 2F 2P 2L 3P 3F 3H 4D 4S 4F 4G 4I'.split(' ')
for replacement in replacements:
    # parse = parse.replace(replacement,'"%s"' % replacement)
    parse = re.sub(r'([^"])(%s)([^"])' % replacement, r'\1"\2"\3', parse)

In [79]:
# !head -n 20 ./data/lanthanide_tables/tab2

In [80]:
# tab1 NOK
# tab2 OK
# tab3 OK
# tab4 NOK HCFtable confusion
# tab5 OK
# tab6 OK but takes a loong time of about 9 minutes.

In [81]:
# tab6 was originally 3.1 GB
# after importing to Mathematica and reexporting
# the size was reduced to 82 MB

In [82]:
session.evaluate('1+1')

2

In [83]:
table_fname = './data/lanthanide_tables/tab6'
parsed_table = parse_table(table_fname)

failed, trying alternative...


In [84]:
!beep

zsh:1: command not found: beep


In [76]:
for i in [1,2,3,4,5,6]:
    search_term = 'HCFtable[1, "2F", 5/2, -5/2, "2F", 7/2, -7/2]'
    search_term = 'EnergyMatrixTable[1, 5/2, 7/2, 0, 0]'
    # search_term = 'HCFtable[2, "3P", 0, 0, "3P", 0, 0]'
    table_fname = './data/lanthanide_tables/tab%d' % i
    print("Searching in %s" % table_fname)
    table_file = open(table_fname,'r').read()
    if search_term in table_file:
        if (search_term+' =') in table_file:
            but = 'and is defined.'
        else:
            but = 'but not defined.'
        print(" >>> Used in %s" % table_fname, but)


Searching in ./data/lanthanide_tables/tab1
 >>> Used in ./data/lanthanide_tables/tab1 and is defined.
Searching in ./data/lanthanide_tables/tab2
Searching in ./data/lanthanide_tables/tab3
Searching in ./data/lanthanide_tables/tab4
Searching in ./data/lanthanide_tables/tab5
Searching in ./data/lanthanide_tables/tab6


In [None]:
    # search_term = 'HCFtable[2, "3P", 0, 0, "3P", 0, 0]'


In [None]:
    table_fname = './data/lanthanide_tables/tab%d' % i
    print("Searching in %s" % table_fname)
    table_file = open(table_fname,'r').read()
    if search_term in table_file:
        if (search_term+' =') in table_file:
            but = 'and is defined.'
        else:
            but = 'but not defined.'
        print(" >>> Used in %s" % table_fname, but)


In [54]:
parsed_table.keys()

dict_keys(['EnergyMatrixTables', 'EnergyStatesTable', 'AllowedM'])

In [58]:
parsed_table['EnergyMatrixTables'].keys()

dict_keys([(3, 0.5, 0.5, 3.5, 3.5), (3, 1.5, 0.5, 3.5, 3.5), (3, 2.5, 0.5, 3.5, 3.5), (3, 3.5, 0.5, 3.5, 3.5), (3, 4.5, 0.5, 3.5, 3.5), (3, 5.5, 0.5, 3.5, 3.5), (3, 6.5, 0.5, 3.5, 3.5), (3, 7.5, 0.5, 3.5, 3.5), (3, 8.5, 0.5, 3.5, 3.5), (3, 0.5, 1.5, 3.5, 3.5), (3, 1.5, 1.5, 3.5, 3.5), (3, 2.5, 1.5, 3.5, 3.5), (3, 3.5, 1.5, 3.5, 3.5), (3, 4.5, 1.5, 3.5, 3.5), (3, 5.5, 1.5, 3.5, 3.5), (3, 6.5, 1.5, 3.5, 3.5), (3, 7.5, 1.5, 3.5, 3.5), (3, 8.5, 1.5, 3.5, 3.5), (3, 0.5, 2.5, 3.5, 3.5), (3, 1.5, 2.5, 3.5, 3.5), (3, 2.5, 2.5, 3.5, 3.5), (3, 3.5, 2.5, 3.5, 3.5), (3, 4.5, 2.5, 3.5, 3.5), (3, 5.5, 2.5, 3.5, 3.5), (3, 6.5, 2.5, 3.5, 3.5), (3, 7.5, 2.5, 3.5, 3.5), (3, 8.5, 2.5, 3.5, 3.5), (3, 0.5, 3.5, 3.5, 3.5), (3, 1.5, 3.5, 3.5, 3.5), (3, 2.5, 3.5, 3.5, 3.5), (3, 3.5, 3.5, 3.5, 3.5), (3, 4.5, 3.5, 3.5, 3.5), (3, 5.5, 3.5, 3.5, 3.5), (3, 6.5, 3.5, 3.5, 3.5), (3, 7.5, 3.5, 3.5, 3.5), (3, 8.5, 3.5, 3.5, 3.5), (3, 0.5, 4.5, 3.5, 3.5), (3, 1.5, 4.5, 3.5, 3.5), (3, 2.5, 4.5, 3.5, 3.5), (3, 3.5, 4.5, 

In [None]:
for mat_key, mat_val in parsed_table['EnergyMatrixTables'].items():
    print(mat_key)
    print("%d X %d" % (mat_val.rows, mat_val.cols))

In [59]:
parsed_table['EnergyMatrixTables'][(3, 2.5, 0.5, 3.5, 3.5)].cols

336

In [None]:
!beep

In [9]:
parse = open('/Users/juan/Data/very_long','r').read()

In [10]:
chunks = parse.split('slist')

In [23]:
# chunk = chunks[2]
# hargs = chunk[1:-1]
# subexprs = []
# enterparens = 0
# exitsparens = 0
# group = ''
# for char in hargs:
#     if char == '(':
#         enterparens += 1
#     if char == ')':
#         exitsparens += 1
#     if enterparens == exitsparens and enterparens != 0:
#         subexprs.append(group)
#         group = ''
#         continue
#     group += char
# subexprs = [s for s in subexprs if s != '']
# failed = []
# for i in range(len(subexprs)):
#     try:
#         eval(subexprs[i][1:])
#     except:
#         failed.append(subexprs[i][1:])
#         pass

In [None]:
good_chunks = []
for chunk_idx in range(len(chunks)):
    try:
        huh = 'slist'+chunks[chunk_idx][:-2]
        row = (eval(huh))
        good_chunks.append(row)
    except:
        print("ERROR")
        print(chunk_idx)
        1/0
        pass

In [39]:
for line in huh.split(','):
    if '^' in line:
        print(line)

 sp.core.mul.Mul(4.44000444000444*^-6


# Preprocessing Mathematica's FullForm & Function Definition

Best way of doing this was by making replacements on the FullForm representation that mathematica makes of an expression.

Naive attempts that take lots of time try using parse_lates from the CForm or TeXForm representations that Mathematica can make of an expression.

Once the expression have been parsed it also takes a long time to make substitutions for parameters in a sp.Matrix. To avoid this one can define regular python functions out of the string representation of the sp.Matrix.

## Parsing tables

In [1]:
import re
from sympy.parsing.latex import parse_latex
import sympy as sp
from wolframclient.evaluation import WolframLanguageSession
from wolframclient.language import wl
import pickle
from random import random
from math import log10, floor

# This list of symbols/variables was determined by loading
# the definitions into a fresh Mathematica kernel and querying
# Names["Global`*"]

base_rep=[
('B02',sp.Symbol('B_{0,2}')),('B04',sp.Symbol('B_{0,4}')),
('B06',sp.Symbol('B_{0,6}')),('B0x',sp.Symbol('B_{0,x}')),
('B0y',sp.Symbol('B_{0,y}')),('B0z',sp.Symbol('B_{0,z}')),
('B12',sp.Symbol('B_{1,2}')),('B14',sp.Symbol('B_{1,4}')),
('B16',sp.Symbol('B_{1,6}')),('B22',sp.Symbol('B_{2,2}')),
('B24',sp.Symbol('B_{2,4}')),('B26',sp.Symbol('B_{2,6}')),
('B34',sp.Symbol('B_{3,4}')),('B36',sp.Symbol('B_{3,6}')),
('B44',sp.Symbol('B_{4,4}')),('B46',sp.Symbol('B_{4,6}')),
('B56',sp.Symbol('B_{5,6}')),('B66',sp.Symbol('B_{6,6}')),
('E0',sp.Symbol('E_{0}')),('E1',sp.Symbol('E_{1}')),
('E2',sp.Symbol('E_{2}')),('E3',sp.Symbol('E_{3}')),
('eOrbitalRad',sp.Symbol(r'\epsilon')),('gI',sp.Symbol('g_{I}')),
('gs',sp.Symbol('g_{s}')),('M0',sp.Symbol('M_{0}')),
('M2',sp.Symbol('M_{2}')),('M4',sp.Symbol('M_{4}')),
('P2',sp.Symbol('P_{2}')),('P4',sp.Symbol('P_{4}')),
('P6',sp.Symbol('P_{6}')),('S12',sp.Symbol('S_{1,2}')),
('S14',sp.Symbol('S_{1,4}')),('S16',sp.Symbol('S_{1,6}')),
('S22',sp.Symbol('S_{2,2}')),('S24',sp.Symbol('S_{2,4}')),
('S26',sp.Symbol('S_{2,6}')),('S34',sp.Symbol('S_{3,4}')),
('S36',sp.Symbol('S_{3,6}')),('S44',sp.Symbol('S_{4,4}')),
('S46',sp.Symbol('S_{4,6}')),('S56',sp.Symbol('S_{5,6}')),
('S66',sp.Symbol('S_{6,6}')),('\[Alpha]',sp.Symbol(r'\alpha')),
('\[Beta]',sp.Symbol(r'\beta')),('\[Beta]BohrMag',sp.Symbol(r'\mu_{B,e}')),
('\[Beta]n',sp.Symbol(r'\mu_{B,n}')),('\[Gamma]',sp.Symbol(r'\gamma')),
('\[Zeta]',sp.Symbol(r'\zeta'))]

master_rep = {}
for idx, it in enumerate(base_rep):
    key = sp.Symbol('x_{%d}' % (idx+1))
    master_rep[key] = it[1]
inverse_rep = {v:k for k,v in master_rep.items()}

session=WolframLanguageSession()

session.evaluate(r'''vars = {B02, B04, B06, B0x, B0y, B0z, B12, B14, B16, B22, B24, B26, 
   B34, B36, B44, B46, B56, B66, E0, E1, E2, E3, eOrbitalRad, gI, gs, 
   M0, M2, M4, P2, P4, P6, S12, S14, S16, S22, S24, S26, S34, S36, 
   S44, S46, S56, 
   S66, \[Alpha], \[Beta], \[Beta]BohrMag, \[Beta]n, \[Gamma], \
\[Zeta]};
svars = Table[
   ToExpression[SubscriptBox["x", ToString[i]]], {i, 1, Length[vars]}];
reps = (#[[1]] -> #[[2]]) & /@ Transpose[{vars, svars}];
ToSympy[expr0_] := (
  expr = Expand[Chop[expr0]];
  expr = expr /. reps;
  str = ToString[FullForm[expr, NumberMarks -> False]];
  str = StringReplace[
    str,
    {"Plus" -> "sp.core.add.Add",
     "Times" -> "sp.core.mul.Mul",
     "Power" -> "sp.core.power.Pow",
     "[" -> "(", "]" -> ")",
     "List" -> "slist",
     "\"" -> ""}];
  str = StringReplace[str,
    {"Subscript(x" -> "sp.SubscriptSymbol('x'",
     "Rational" -> "sp.Rational"}];
  Return[str]
  )
''')


In [2]:
# Abbreviations to simplify parsing
pretty_vars = False
if pretty_vars:
    def SubscriptSymbol(a,b):
        return master_rep[sp.Symbol("%s_{%d}" % (a,int(b)))]
else:
    def SubscriptSymbol(a,b):
        return sp.Symbol("%s_{%d}" % (a,int(b)))
sp.Rational = lambda x,y: sp.S(x)/sp.S(y)
sp.SubscriptSymbol = SubscriptSymbol
Pi = sp.pi
Complex = lambda x,y: (sp.S(x) + sp.I * sp.S(y))
def slist(*args):
    return list([*args])

def parse_mathematica(mathematica_expression):
    seval = session.evaluate('ToSympy[%s]' % mathematica_expression)
    return str(seval)

def lanthanum_cleanup(fname):
    '''
    Data  file  might have large redundancies, this opens it and removes
    all    redundant   definitions;   it   assumes   that  there are  no
    inconsistencies between them, if there are then it fails.
    
    More  importantly  it puts together all the lines that relate to one
    definition in just one string with no newlines. 

    This is assuming that the file only contains definitions for:
       EnergyMatrixTable, AllowedM, and EnergyStatesTable

    Parameters
    ----------
    fname   (str): file name of file to be parsed

    Returns
    -------
    clean_output  (list): a list of strings each with a single definition.
    '''
    lanthanum = [l.strip().split(' =')[0] for l in open(fname,'r').readlines() if l[0] != ' ']
    lanthanum = list(filter(lambda x: x != '', lanthanum))
    # this dictionary will have as keys the lhs of definitions
    # and as values will be lists of strings that all attempt
    # to define this symbol
    rhs = {}
    full_lanthanum = [l.strip() for l in open(fname,'r').readlines()]
    for line_idx, line in enumerate(full_lanthanum):
        if line_idx < len(full_lanthanum)-1:
            next_line = full_lanthanum[line_idx+1]
        if 'Attributes[Null]' in line:
            continue
        if ('EnergyMatrixTable' in line) or ('AllowedM' in line) or ('EnergyStatesTable' in line):
            key = line.split('=')[0].strip()
            chunks = []
            try:
                first_chunk = line.split('=')[1]
            except:
                first_chunk = ''
            chunks.append(first_chunk)
            if key not in rhs:
                rhs[key] = []
            continue
        if line == '' or ('EnergyMatrixTable' in next_line) or ('AllowedM' in next_line) or ('EnergyStatesTable' in next_line):
            chunks.append(line.strip())
            whole_chunk = ''.join(chunks).strip()
            if whole_chunk != '':
                rhs[key].append(whole_chunk)
            chunks = []
        else:
            chunks.append(line.strip())
    # now to see if there are any redundancies
    clean_output = []
    for k,v in rhs.items():
        unique = list(set(v))
        assert len(unique) == 1, "There should be only one, this is a loopy file."
        unique = unique[0]
        out = '%s = %s' % (k, unique)
        clean_output.append(out)
    return clean_output
def parse_table(fname, verbose=False):
    clear_lanthanum = lanthanum_cleanup(fname)
    EnergyMatrixTables = {}
    counter = 0
    for cl in clear_lanthanum:
        if 'EnergyMatrixTable' in cl:
            pre = cl.split(' =')[0]
            if verbose:
                print("Parsing:",pre)
            parse = parse_mathematica(cl)
            args = tuple(eval(re.findall(r'\[.*\]',pre)[0]))
            EnergyMatrixTables[args] = sp.Matrix(eval(parse))
            counter += 1
    EnergyStatesTable = {}
    for cl in clear_lanthanum:
        if 'EnergyStatesTable' in cl:
            lhs = cl.split(' =')[0]
            lhs = tuple(eval(re.findall(r'\[.*\]',lhs)[0]))
            rhs = cl.split('= ')[1]
            parse = parse_mathematica(cl)
            parse = parse.replace('3P','"3P"').replace('1S','"1S"').replace('3F','"3F"')\
                    .replace('1D','"1D"').replace('1G','"1G"').replace('3H','"3H"')\
                    .replace('1I','"1I"')
            EnergyStatesTable[lhs] = eval(parse)
    AllowedM = {}
    for cl in clear_lanthanum:
        if 'AllowedM' in cl:
            parse = parse_mathematica(cl)
            AllowedM[parse[0]] = eval(parse)
    return {'EnergyMatrixTables': EnergyMatrixTables,
            'EnergyStatesTable': EnergyStatesTable,
            'AllowedM': AllowedM}

In [3]:
parsed = parse_table('./data/lanthanide_tables/HFEnergyMatrixTables')

## Defining Functions from symbolic sp.Matrix

In [78]:
import numpy as np
def matrix_to_fun(mat, mat_args):
    replacements = [('sqrt','np.sqrt'),('I','1j')]
    free_symbs = mat.free_symbols
    funs = {}
    rows = []
    for row in range(mat.rows):
        astr = str(list(mat[row,:]))
        astr = re.sub(r'x_\{(.*?)\}',r'x_\1',astr)
        for replacement in replacements:
            astr = astr.replace(*replacement)
        rows.append(astr)
    represented_vars = sorted(list(map(lambda x: int(re.findall(r'\{(.*)\}',str(x))[0]), mat.free_symbols)))
    fun_args = ','.join(['x_%d' % i for i in represented_vars])
    nice_fun_args = '\n'.join([r'    %s -> %s' %(s, str(master_rep[s])) for s in free_symbs])
    nice_fun_args = '%s\n%s' % ('EnergyMatrix[%s]' % str(mat_args),nice_fun_args)
    trick = '''
import numpy as np
def fun(%s):
    """
%s
    """
    list_def = [%s]
    return(np.array(list_def))
    ''' % (fun_args, nice_fun_args, ','.join(rows))
    exec(trick, funs)
    return funs['fun']
def EnergyMatrix(n, J, Jp, Ii, If):
    '''
    This function returns a function for the provided arguments.

    The returned functions take as argument a dictionary for values of parameters.
    '''
    return matrix_to_fun(parsed['EnergyMatrixTables'][(n, J, Jp, Ii, If)], (n, J, Jp, Ii, If))

In [84]:
from inspect import signature
randMats = []
energyFunctions = {}
for k in parsed['EnergyMatrixTables'].keys():
    faun = EnergyMatrix(*k)
    energyFunctions[k] = faun
    randMats.append(faun(*[random() for i in range(len(signature(faun).parameters))]))

# Custom Mathematica Parser

In [None]:
import re
from sympy.parsing.latex import parse_latex
import sympy as sp
from wolframclient.evaluation import WolframLanguageSession
from wolframclient.language import wl
import pickle
from random import random
from math import log10, floor

session=WolframLanguageSession()

session.evaluate(r'''vars = {B02, B04, B06, B0x, B0y, B0z, B12, B14, B16, B22, B24, B26, 
   B34, B36, B44, B46, B56, B66, E0, E1, E2, E3, eOrbitalRad, gI, gs, 
   M0, M2, M4, P2, P4, P6, S12, S14, S16, S22, S24, S26, S34, S36, 
   S44, S46, S56, 
   S66, \[Alpha], \[Beta], \[Beta]BohrMag, \[Beta]n, \[Gamma], \
\[Zeta]};
svars = Table[
   ToExpression[SubscriptBox["x", ToString[i]]], {i, 1, Length[vars]}];
reps = (#[[1]] -> #[[2]]) & /@ Transpose[{vars, svars}];
ParseSymbol[thing_] :=
 (str = ToString[Chop[thing] /. reps, CForm];
  Return[str]
  )''')

# NB: The order here needs to match the order
# of variable assignment in Mathematica

base_rep=[
('B02',sp.Symbol('B_{0,2}')),('B04',sp.Symbol('B_{0,4}')),
('B06',sp.Symbol('B_{0,6}')),('B0x',sp.Symbol('B_{0,x}')),
('B0y',sp.Symbol('B_{0,y}')),('B0z',sp.Symbol('B_{0,z}')),
('B12',sp.Symbol('B_{1,2}')),('B14',sp.Symbol('B_{1,4}')),
('B16',sp.Symbol('B_{1,6}')),('B22',sp.Symbol('B_{2,2}')),
('B24',sp.Symbol('B_{2,4}')),('B26',sp.Symbol('B_{2,6}')),
('B34',sp.Symbol('B_{3,4}')),('B36',sp.Symbol('B_{3,6}')),
('B44',sp.Symbol('B_{4,4}')),('B46',sp.Symbol('B_{4,6}')),
('B56',sp.Symbol('B_{5,6}')),('B66',sp.Symbol('B_{6,6}')),
('E0',sp.Symbol('E_{0}')),('E1',sp.Symbol('E_{1}')),
('E2',sp.Symbol('E_{2}')),('E3',sp.Symbol('E_{3}')),
('eOrbitalRad',sp.Symbol(r'\epsilon')),('gI',sp.Symbol('g_{I}')),
('gs',sp.Symbol('g_{s}')),('M0',sp.Symbol('M_{0}')),
('M2',sp.Symbol('M_{2}')),('M4',sp.Symbol('M_{4}')),
('P2',sp.Symbol('P_{2}')),('P4',sp.Symbol('P_{4}')),
('P6',sp.Symbol('P_{6}')),('S12',sp.Symbol('S_{1,2}')),
('S14',sp.Symbol('S_{1,4}')),('S16',sp.Symbol('S_{1,6}')),
('S22',sp.Symbol('S_{2,2}')),('S24',sp.Symbol('S_{2,4}')),
('S26',sp.Symbol('S_{2,6}')),('S34',sp.Symbol('S_{3,4}')),
('S36',sp.Symbol('S_{3,6}')),('S44',sp.Symbol('S_{4,4}')),
('S46',sp.Symbol('S_{4,6}')),('S56',sp.Symbol('S_{5,6}')),
('S66',sp.Symbol('S_{6,6}')),('\[Alpha]',sp.Symbol(r'\alpha')),
('\[Beta]',sp.Symbol(r'\beta')),('\[Beta]BohrMag',sp.Symbol(r'\mu_{B,e}')),
('\[Beta]n',sp.Symbol(r'\mu_{B,n}')),('\[Gamma]',sp.Symbol(r'\gamma')),
('\[Zeta]',sp.Symbol(r'\zeta'))]

master_rep = {}
for idx, it in enumerate(base_rep):
    key = sp.Symbol('x_{%d}' % (idx+1))
    master_rep[key] = it[1]
inverse_rep = {v:k for k,v in master_rep.items()}

In [670]:
def lanthanum_cleanup(fname):
    '''
    Data  file  might have large redundancies, this opens it and removes
    all    redundant   definitions;   it   assumes   that   there's   no
    inconsistencies between them, if there is then it fails.
    
    More  importantly  it puts together all the lines that relate to one
    definition in just one string with no newlines. 

    This is assuming that the file only contains definitions for:
       EnergyMatrixTable, AllowedM, and EnergyStatesTable

    Parameters
    ----------
    fname   (str): file name of file to be parsed

    Returns
    -------
    clean_output  (list): a list of strings each with a single definition.
    '''
    lanthanum = [l.strip().split(' =')[0] for l in open(fname,'r').readlines() if l[0] != ' ']
    lanthanum = list(filter(lambda x: x != '', lanthanum))
    # this dictionary will have as keys the lhs of definitions
    # and as values will be lists of strings that all attempt
    # to define this symbol
    rhs = {}
    full_lanthanum = [l.strip() for l in open(fname,'r').readlines()]
    for line_idx, line in enumerate(full_lanthanum):
        if line_idx < len(full_lanthanum)-1:
            next_line = full_lanthanum[line_idx+1]
        if 'Attributes[Null]' in line:
            continue
        if ('EnergyMatrixTable' in line) or ('AllowedM' in line) or ('EnergyStatesTable' in line):
            key = line.split('=')[0].strip()
            chunks = []
            try:
                first_chunk = line.split('=')[1]
            except:
                first_chunk = ''
            chunks.append(first_chunk)
            if key not in rhs:
                rhs[key] = []
            continue
        if line == '' or ('EnergyMatrixTable' in next_line) or ('AllowedM' in next_line) or ('EnergyStatesTable' in next_line):
            chunks.append(line.strip())
            whole_chunk = ''.join(chunks).strip()
            if whole_chunk != '':
                rhs[key].append(whole_chunk)
            chunks = []
        else:
            chunks.append(line.strip())
    # now to see if there are any redundancies
    clean_output = []
    for k,v in rhs.items():
        unique = list(set(v))
        assert len(unique) == 1, "There should be only one, this is a loopy file."
        unique = unique[0]
        out = '%s = %s' % (k, unique)
        clean_output.append(out)
    return clean_output

def parse_it(astr):
    astr = cleanup(astr)
    return sp.expand((parse_latex(astr)))

def cleanup(parse):
    '''
    Some basic cleanup to avoid some errors
    when using parse_latex.
    '''
    parse = re.sub(r'Subscript\(x,(\d{1,2})\)', r'x_{\1}', parse)
    parse = re.sub(r'Complex\((.*?),(.*?)\)',r'(\1+i*(\2))',parse)
    parse = re.sub(r'Sqrt\((.*?)\)', r'sqrt{\1}', parse).replace('sqrt','\sqrt').replace('.*','*').replace('.)',')')
    parse = re.sub(r'(\d\.[\d]+)e([-]{0,1}\d)',r'(\1*10^{\2})', parse)
    parse = parse + ' '
    reps = [('. ','')]
    for rep in reps:
        parse = parse.replace(*rep)
    return parse

def parse_energy_matrix_table(astr):
    '''
    Parse a string that contains the definition for a symbolic matrix.

    Parameters
    ----------
    astr    (str): A definition of the sort EnergyMatrixTable[_,_,_,_,_] = {{...},{...},...}

    Returns
    -------
    lhs, parsed_matrix, rhs
    lhs                 (str): The LHS of the matrix definition
    parsed_matrix (sp.Matrix): The parsed symbolic matrix.
    rhs                 (str): The RHS of the matrix definition
    '''
    lhs = astr.split('= ')[0].strip()
    rhs = astr.split('= ')[-1]
    # define it in the Mathematica session
    session.evaluate(astr)
    # get num rows and columns
    num_rows, num_cols = tuple(session.evaluate("Dimensions[%s]" % lhs))
    parsed_matrix = []
    # Iterate through each element and parse it
    for num_row in range(1,num_rows+1):
        row = []
        for num_col in range(1, num_cols+1):
            parse = session.evaluate("ParseSymbol[%s[[%d,%d]]]" % (lhs, num_row, num_col))
            # Clean up the string before parsing with Sympy
            parse = cleanup(parse)
            parsed = sp.expand(parse_latex(parse)).subs(sp.Symbol('i'),sp.I)
            # The imaginary unit needs to be dealt with separately
            parsed = parsed.subs(sp.Symbol('i'),sp.I)
            row.append(parsed)
        parsed_matrix.append(row)
    parsed_matrix = sp.Matrix(parsed_matrix)
    return lhs, parsed_matrix, rhs

def parse_allowed_m(astr):
    lhs = astr.split('= ')[0].strip()
    rhs = astr.split('= ')[-1]
    # define it in the Mathematica session
    session.evaluate(astr)
    # get num rows
    num_lists = tuple(session.evaluate("Dimensions[%s]" % lhs))[0]
    M_value = int(lhs.split('[')[-1].split(']')[0])
    rows = []
    for list_index in range(1,num_lists+1):
        the_list = session.evaluate("%s[[%d]]" % (lhs, list_index))
        try:
            the_row = [sp.S(x[0])/sp.S(x[1]) for x in the_list]
        except:
            the_row = [sp.S(x) for x in the_list]
        rows.append(the_row)
    return M_value, rows  

def pa(atom):
    special_cases = [('Pi','sp.pi')]
    atom = atom.strip()
    if 'Global' in atom:
        return 'sp.Symbol("%s")' % (atom.replace('Global`',''))
    if '`' in atom:
        return 'sp.Symbol("%s")' % (atom.replace('`',''))
    for special_case in special_cases:
        if atom == special_case[0]:
            return special_case[1]
    else:
        return atom
def parse_multi(match):
    args = list(map(pa,match.split(',')))
    return ';'.join(args)
def parse_mathematica_expr(mathematica_expression):
    session.evaluate('''
    expr = Expand[(%s)/.reps];
    Export["/Users/juan/Temp/mathexp.m",ToString[FullForm[expr,NumberMarks->False]]]''' % mathematica_expression)
    expr = ''.join([re.sub(r'\\$',r'',l.strip()) for l in open("/Users/juan/Temp/mathexp.m",'r').readlines()[1:]])
    expr = expr.replace('"','')
    expr0 = expr
    # replace symbolic subscripts vars
    expr1 = re.sub(r'Subscript\[(.*?), (.*?)\]',r'sp.Symbol("\1_{\2}")', expr0)
    # parse Rational and complex
    expr2 = re.sub(r'Rational\[(.*?),(.*?)\]',
        lambda x: 'sp.S(%s) / sp.S(%s)' % (pa(x.groups()[0]), pa(x.groups()[1])), expr1)
    expr2 = re.sub(r'Complex\[(.*?),(.*?)\]',
        lambda x: '(sp.S(%s) + sp.I*sp.S(%s))' % (pa(x.groups()[0]), pa(x.groups()[1])), expr2)
    # parse Power
    expr3 = re.sub(r'Power\[(.*?),(.*?)\]',
        lambda x: 'sp.core.power.Pow(%s; %s)' % (pa(x.groups()[0]), pa(x.groups()[1])), expr2)
    # Parse Times
    expr4 = re.sub(r'Times\[(.*?)\]',
        lambda x: 'sp.core.mul.Mul(%s)' % parse_multi(x.group(1)), expr3)
    # Parse Plus
    expr5 = re.sub(r'Plus\[(.*?)\]',
        lambda x: 'sp.core.add.Add(%s)' % parse_multi(x.group(1)), expr4)
    exprfinal = expr5.replace(';',',')
    sympyexpr = eval(exprfinal)
    return sympyexpr

def parse_mathematica_matrix(mathematica_matrix_def):
    lhs = mathematica_matrix_def.split('= ')[0].strip()
    rhs = mathematica_matrix_def.split('= ')[-1]
    session.evaluate(mathematica_matrix_def)
    # get num rows and columns
    num_rows, num_cols = tuple(session.evaluate("Dimensions[%s]" % lhs))
    parsed_matrix = []
    # Iterate through each element and parse it
    for num_row in range(1,num_rows+1):
        row = []
        for num_col in range(1, num_cols+1):
            parsed = parse_mathematica_expr('Chop[%s[[%d,%d]]]' % (lhs, num_row, num_col))
            row.append(parsed)
        parsed_matrix.append(row)
    parsed_matrix = sp.Matrix(parsed_matrix)
    return lhs, parsed_matrix, rhs

def parse_energy_states_table(astr):
    lhs = astr.split('= ')[0].strip()
    rhs = astr.split('= ')[-1]
    # define it in the Mathematica session
    session.evaluate(astr)
    # get num rows
    num_rows, num_cols = tuple(session.evaluate("Dimensions[%s]" % lhs))
    parsed_matrix = []
    rows = []
    for num_row in range(1,num_rows+1):
        head = session.evaluate("%s[[%d]][[-1]]" % (lhs, num_row))
        try:
            head = sp.S(head[0])/sp.S(head[1])
        except:
            head = sp.S(head)
        thorax = session.evaluate("%s[[%d]][[1]][[-1]]" % (lhs, num_row))
        try:
            thorax = sp.S(thorax[0])/sp.S(thorax[1])
        except:
            thorax = sp.S(thorax)
        knees = session.evaluate("%s[[%d]][[1]][[1]]" % (lhs, num_row))
        knees = (str(knees[0]), sp.S(knees[1]))
        row = (((knees),thorax),head)
        rows.append(row)
    args = '(%s)' % lhs.split('[')[-1].split(']')[0]
    return sp.S(args), rows

def parse_table(fname):
    '''
    Put everything together to parse
    EnergyMatrixTable, EnergyStatesTable, and AllowedM.

    Parameters
    ----------
    fname   (str): Filename of the file with the Mathematica defs.

    Returns
    -------
    {'EnergyMatrixTables': EnergyMatrixTables,
     'EnergyStatesTable': EnergyStatesTable,
     'AllowedM': AllowedM,
     'EnergyMatrixStrings': EnergyMatrixStrings}
    with
    EnergyMatrixTables    (dict): Keys are tuples () values are symbolic sp.Matrix
    EnergyStatesTable     (dict): Keys are tuples () values are lists
    AlloweM               (dict): Key is an integer corresponding to __, values are
    EnergyMatrixStrings   (dict): Keys are tuples () values are the original strings.

    '''
    clear_lanthanum = lanthanum_cleanup(fname)
    EnergyMatrixTables = {}
    EnergyMatrixStrings = {}
    counter = 0
    for cl in clear_lanthanum:
        if 'EnergyMatrixTable' in cl:
            print('.',end='|')
            pre = cl.split(' =')[0]
            # print(pre)
            # if pre != "EnergyMatrixTable[2, 4, 2, 1/2, 1/2]":
            #     continue
            parse = parse_mathematica_matrix(cl)
            args = sp.S(parse[0].split('[')[-1].split(']')[0])
            EnergyMatrixTables[args] = parse[1].subs(master_rep)
            EnergyMatrixStrings[args] = parse[2]
            counter += 1
            # if counter == 5:
            #     break
    EnergyStatesTable = {}
    for cl in clear_lanthanum:
        if 'EnergyStatesTable' in cl:
            print('.',end='|')
            parse = parse_energy_states_table(cl)
            args = parse[0]
            EnergyStatesTable[args] = parse[1]
    AllowedM = {}
    for cl in clear_lanthanum:
        if 'AllowedM' in cl:
            print('.',end='|')
            parse = parse_allowed_m(cl)
            AllowedM[parse[0]] = parse[1]
    return {'EnergyMatrixTables': EnergyMatrixTables,
            'EnergyStatesTable': EnergyStatesTable,
            'AllowedM': AllowedM,
            'EnergyMatrixStrings': EnergyMatrixStrings}

def rational_simplify(sympy_expr, N=10000):
    '''
    Given a sympy expression this function takes it and
    finds rational  approximations (perhaps including a
    square root).

    Example
    -------

    >> rational_simplify(2.31099*sp.Symbol('x') - 1.14)
    >>> 9 * sqrt(546) * x / 91 - sqrt(130)/10
    '''
    sympy_dict = sympy_expr.as_coefficients_dict()
    for k,v in sympy_dict.items():
        if isinstance(v, sp.core.numbers.Float):
            n = N
            simpler = square_rational_approx(v, n)
            # If the thing was approximated to zero
            # escalate the precision.
            while simpler == 0:
                n = 10*n
                simpler = square_rational_approx(v, n)
            sympy_dict[k] = simpler
    total = sum([k*v for k,v in sympy_dict.items()])
    return total

def rational_approx(x, N):
    '''
    Given  a number x this function returns a fraction
    that approximates it with a denominator that could
    be as large as N.
    '''
    if (int(x) == x):
        return sp.S(int(x))
    sign = 1
    if x < 0:
        sign = -1
        x = -x
    if x > 1:
        ix, dx = int(x), x - int(x)
    else:
        ix = 0
        dx = x
    exponent = -floor(log10(float(dx)))
    tens_multiplier = int(exponent-1)
    dx = dx*(10**tens_multiplier)
    divider = 1/(sp.S(10)**(sp.S(tens_multiplier)))
    sign = sign
    a, b = 0, 1
    c, d = 1, 1
    while (b <= N and d <= N):
        mediant = float(a+c)/(b+d)
        if dx == mediant:
            if b + d <= N:
                return sign*(sp.S(ix)+divider*sp.S(a+c)/sp.S(b+d))
            elif d > b:
                return sign*(sp.S(ix)+divider*sp.S(c)/sp.S(d))
            else:
                return sign*(sp.S(ix)+divider*sp.S(a)/sp.S(b))
        elif dx > mediant:
            a, b = a+c, b+d
        else:
            c, d = a+c,b+d
    if (b > N):
        return sign*(divider*sp.S(c)/sp.S(d) + sp.S(ix))
    else:
        return sign*(divider*sp.S(a)/sp.S(b) + sp.S(ix))

def square_rational_approx(x, N):
    '''
    Given a number x this algorithm finds the best  rational
    approximation to its square, and then returns the signed
    square root of that.
    '''
    if x < 0:
        sign = -1
        x = -x
    else:
        sign = 1
    y = x*x
    return sign*sp.sqrt(rational_approx(y,N))

In [671]:
parsed = parse_table('./data/lanthanide_tables/HFEnergyMatrixTables copy 2')

.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|.|

In [674]:
eval("sp.core.add.Add(sp.Symbol('x'), sp.Symbol('y'), sp.core.mul.Mul(2, sp.Symbol('z')))")

x + y + 2*z

In [698]:
list(1,2,3)

TypeError: list expected at most 1 argument, got 3

In [702]:
def SubscriptSymbol(a,b):
    return sp.Symbol("%s_{%d}" % (a,int(b)))
sp.Rational = lambda x,y: sp.S(x)/sp.S(y)
sp.SubscriptSymbol = SubscriptSymbol
Pi = sp.pi
Complex = lambda x,y: (sp.S(x) + sp.I * sp.S(y))
def slist(*args):
    return list([*args])

In [706]:
mah = eval("slist(slist(sp.core.add.Add(sp.core.mul.Mul(sp.Rational(14, \
13), sp.SubscriptSymbol('x', 19)), \
sp.core.mul.Mul(0.6923076923076925, sp.SubscriptSymbol('x', 20)), \
sp.core.mul.Mul(33., sp.SubscriptSymbol('x', 22)), \
sp.core.mul.Mul(sp.Rational(37, 6), sp.SubscriptSymbol('x', 26)), \
sp.core.mul.Mul(sp.Rational(97, 6), sp.SubscriptSymbol('x', 27)), \
sp.core.mul.Mul(sp.Rational(1205, 66), sp.SubscriptSymbol('x', 28)), \
sp.core.mul.Mul(sp.Rational(43, 540), sp.SubscriptSymbol('x', 29)), \
sp.core.mul.Mul(sp.Rational(19, 1188), sp.SubscriptSymbol('x', 30)), \
sp.core.mul.Mul(sp.Rational(-875, 15444), sp.SubscriptSymbol('x', \
31)), sp.core.mul.Mul(2, sp.SubscriptSymbol('x', 44)), \
sp.SubscriptSymbol('x', 45), sp.core.mul.Mul(sp.Rational(1, 2), \
sp.SubscriptSymbol('x', 6), sp.SubscriptSymbol('x', 24), \
sp.SubscriptSymbol('x', 47)), sp.SubscriptSymbol('x', 48), \
sp.core.mul.Mul(-1, sp.SubscriptSymbol('x', 49))), \
sp.core.add.Add(sp.core.mul.Mul(sp.Rational(-1, 2), \
sp.SubscriptSymbol('x', 4), sp.SubscriptSymbol('x', 24), \
sp.SubscriptSymbol('x', 47)), sp.core.mul.Mul(Complex(0, \
sp.Rational(1, 2)), sp.SubscriptSymbol('x', 5), \
sp.SubscriptSymbol('x', 24), sp.SubscriptSymbol('x', 47))), \
sp.core.add.Add(sp.core.mul.Mul(-17, sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 26)), \
sp.core.mul.Mul(-1, sp.core.power.Pow(3, sp.Rational(-1, 2)), \
sp.SubscriptSymbol('x', 27)), sp.core.mul.Mul(sp.Rational(-5, 11), \
sp.core.power.Pow(3, sp.Rational(-1, 2)), sp.SubscriptSymbol('x', \
28)), sp.core.mul.Mul(sp.Rational(49, 90), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 29)), \
sp.core.mul.Mul(sp.Rational(49, 198), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 30)), \
sp.core.mul.Mul(sp.Rational(175, 2574), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 31)), \
sp.core.mul.Mul(-2, sp.core.power.Pow(3, sp.Rational(1, 2)), \
sp.SubscriptSymbol('x', 49))), 0), \
slist(sp.core.add.Add(sp.core.mul.Mul(sp.Rational(-1, 2), \
sp.SubscriptSymbol('x', 4), sp.SubscriptSymbol('x', 24), \
sp.SubscriptSymbol('x', 47)), sp.core.mul.Mul(Complex(0, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 5), \
sp.SubscriptSymbol('x', 24), sp.SubscriptSymbol('x', 47))), \
sp.core.add.Add(sp.core.mul.Mul(sp.Rational(14, 13), \
sp.SubscriptSymbol('x', 19)), sp.core.mul.Mul(0.6923076923076925, \
sp.SubscriptSymbol('x', 20)), sp.core.mul.Mul(33., \
sp.SubscriptSymbol('x', 22)), sp.core.mul.Mul(sp.Rational(37, 6), \
sp.SubscriptSymbol('x', 26)), sp.core.mul.Mul(sp.Rational(97, 6), \
sp.SubscriptSymbol('x', 27)), sp.core.mul.Mul(sp.Rational(1205, 66), \
sp.SubscriptSymbol('x', 28)), sp.core.mul.Mul(sp.Rational(43, 540), \
sp.SubscriptSymbol('x', 29)), sp.core.mul.Mul(sp.Rational(19, 1188), \
sp.SubscriptSymbol('x', 30)), sp.core.mul.Mul(sp.Rational(-875, \
15444), sp.SubscriptSymbol('x', 31)), sp.core.mul.Mul(2, \
sp.SubscriptSymbol('x', 44)), sp.SubscriptSymbol('x', 45), \
sp.core.mul.Mul(sp.Rational(-1, 2), sp.SubscriptSymbol('x', 6), \
sp.SubscriptSymbol('x', 24), sp.SubscriptSymbol('x', 47)), \
sp.SubscriptSymbol('x', 48), sp.core.mul.Mul(-1, \
sp.SubscriptSymbol('x', 49))), 0, \
sp.core.add.Add(sp.core.mul.Mul(-17, sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 26)), \
sp.core.mul.Mul(-1, sp.core.power.Pow(3, sp.Rational(-1, 2)), \
sp.SubscriptSymbol('x', 27)), sp.core.mul.Mul(sp.Rational(-5, 11), \
sp.core.power.Pow(3, sp.Rational(-1, 2)), sp.SubscriptSymbol('x', \
28)), sp.core.mul.Mul(sp.Rational(49, 90), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 29)), \
sp.core.mul.Mul(sp.Rational(49, 198), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 30)), \
sp.core.mul.Mul(sp.Rational(175, 2574), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 31)), \
sp.core.mul.Mul(-2, sp.core.power.Pow(3, sp.Rational(1, 2)), \
sp.SubscriptSymbol('x', 49)))), \
slist(sp.core.add.Add(sp.core.mul.Mul(-17, sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 26)), \
sp.core.mul.Mul(-1, sp.core.power.Pow(3, sp.Rational(-1, 2)), \
sp.SubscriptSymbol('x', 27)), sp.core.mul.Mul(sp.Rational(-5, 11), \
sp.core.power.Pow(3, sp.Rational(-1, 2)), sp.SubscriptSymbol('x', \
28)), sp.core.mul.Mul(sp.Rational(49, 90), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 29)), \
sp.core.mul.Mul(sp.Rational(49, 198), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 30)), \
sp.core.mul.Mul(sp.Rational(175, 2574), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 31)), \
sp.core.mul.Mul(-2, sp.core.power.Pow(3, sp.Rational(1, 2)), \
sp.SubscriptSymbol('x', 49))), 0, \
sp.core.add.Add(sp.core.mul.Mul(sp.Rational(14, 13), \
sp.SubscriptSymbol('x', 19)), sp.core.mul.Mul(9.692307692307692, \
sp.SubscriptSymbol('x', 20)), sp.core.mul.Mul(sp.Rational(1, 2), \
sp.SubscriptSymbol('x', 6), sp.SubscriptSymbol('x', 24), \
sp.SubscriptSymbol('x', 47))), \
sp.core.add.Add(sp.core.mul.Mul(sp.Rational(-1, 2), \
sp.SubscriptSymbol('x', 4), sp.SubscriptSymbol('x', 24), \
sp.SubscriptSymbol('x', 47)), sp.core.mul.Mul(Complex(0, \
sp.Rational(1, 2)), sp.SubscriptSymbol('x', 5), \
sp.SubscriptSymbol('x', 24), sp.SubscriptSymbol('x', 47)))), slist(0, \
sp.core.add.Add(sp.core.mul.Mul(-17, sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 26)), \
sp.core.mul.Mul(-1, sp.core.power.Pow(3, sp.Rational(-1, 2)), \
sp.SubscriptSymbol('x', 27)), sp.core.mul.Mul(sp.Rational(-5, 11), \
sp.core.power.Pow(3, sp.Rational(-1, 2)), sp.SubscriptSymbol('x', \
28)), sp.core.mul.Mul(sp.Rational(49, 90), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 29)), \
sp.core.mul.Mul(sp.Rational(49, 198), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 30)), \
sp.core.mul.Mul(sp.Rational(175, 2574), sp.core.power.Pow(3, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 31)), \
sp.core.mul.Mul(-2, sp.core.power.Pow(3, sp.Rational(1, 2)), \
sp.SubscriptSymbol('x', 49))), \
sp.core.add.Add(sp.core.mul.Mul(sp.Rational(-1, 2), \
sp.SubscriptSymbol('x', 4), sp.SubscriptSymbol('x', 24), \
sp.SubscriptSymbol('x', 47)), sp.core.mul.Mul(Complex(0, \
sp.Rational(-1, 2)), sp.SubscriptSymbol('x', 5), \
sp.SubscriptSymbol('x', 24), sp.SubscriptSymbol('x', 47))), \
sp.core.add.Add(sp.core.mul.Mul(sp.Rational(14, 13), \
sp.SubscriptSymbol('x', 19)), sp.core.mul.Mul(9.692307692307692, \
sp.SubscriptSymbol('x', 20)), sp.core.mul.Mul(sp.Rational(-1, 2), \
sp.SubscriptSymbol('x', 6), sp.SubscriptSymbol('x', 24), \
sp.SubscriptSymbol('x', 47)))))")

In [707]:
sp.Matrix(mah)

Matrix([
[14*x_{19}/13 + 0.692307692307693*x_{20} + 33.0*x_{22} + x_{24}*x_{47}*x_{6}/2 + 37*x_{26}/6 + 97*x_{27}/6 + 1205*x_{28}/66 + 43*x_{29}/540 + 19*x_{30}/1188 - 875*x_{31}/15444 + 2*x_{44} + x_{45} + x_{48} - x_{49},                                                                                                                                                                     -x_{24}*x_{47}*x_{4}/2 + I*x_{24}*x_{47}*x_{5}/2, -17*sqrt(3)*x_{26}/3 - sqrt(3)*x_{27}/3 - 5*sqrt(3)*x_{28}/33 + 49*sqrt(3)*x_{29}/270 + 49*sqrt(3)*x_{30}/594 + 175*sqrt(3)*x_{31}/7722 - 2*sqrt(3)*x_{49},                                                                                                                                                          0],
[                                                                                                                                                                    -x_{24}*x_{47}*x_{4}/2 - I*x_{24}*x_{47}*x_{5}/2, 14*x_{19}/13 + 0.692307692307693*x

In [676]:
eval("sp.core.add.Add(sp.core.mul.Mul(Rational(14, 13), Subscript(x, 19)), \
sp.core.mul.Mul(0.6923076923076925`, Subscript(x, 20)), \
sp.core.mul.Mul(33.`, Subscript(x, 22)), sp.core.mul.Mul(Rational(37, \
6), Subscript(x, 26)), sp.core.mul.Mul(Rational(97, 6), Subscript(x, \
27)), sp.core.mul.Mul(Rational(1205, 66), Subscript(x, 28)), \
sp.core.mul.Mul(Rational(43, 540), Subscript(x, 29)), \
sp.core.mul.Mul(Rational(19, 1188), Subscript(x, 30)), \
sp.core.mul.Mul(Rational(-875, 15444), Subscript(x, 31)), \
sp.core.mul.Mul(2, Subscript(x, 44)), Subscript(x, 45), \
sp.core.mul.Mul(Rational(1, 2), Subscript(x, 6), Subscript(x, 24), \
Subscript(x, 47)), Subscript(x, 48), sp.core.mul.Mul(-1, Subscript(x, \
49)))")

SyntaxError: invalid syntax (<string>, line 1)

In [668]:
!ls ./data/lanthanide_tables/

[31mHFEnergyMatrixTables[m[m        [31mHFEnergyMatrixTables copy 2[m[m
[31mHFEnergyMatrixTables copy[m[m   [31mfirst_table.pkl[m[m


# Parsing the CForm

In [662]:
import re
from sympy.parsing.latex import parse_latex
import sympy as sp
from wolframclient.evaluation import WolframLanguageSession
from wolframclient.language import wl
import pickle
from random import random
from math import log10, floor

session=WolframLanguageSession()

session.evaluate(r'''vars = {B02, B04, B06, B0x, B0y, B0z, B12, B14, B16, B22, B24, B26, 
   B34, B36, B44, B46, B56, B66, E0, E1, E2, E3, eOrbitalRad, gI, gs, 
   M0, M2, M4, P2, P4, P6, S12, S14, S16, S22, S24, S26, S34, S36, 
   S44, S46, S56, 
   S66, \[Alpha], \[Beta], \[Beta]BohrMag, \[Beta]n, \[Gamma], \
\[Zeta]};
svars = Table[
   ToExpression[SubscriptBox["x", ToString[i]]], {i, 1, Length[vars]}];
reps = (#[[1]] -> #[[2]]) & /@ Transpose[{vars, svars}];
ParseSymbol[thing_] :=
 (str = ToString[Chop[thing] /. reps, CForm];
  Return[str]
  )''')

# NB: The order here needs to match the order
# of variable assignment in Mathematica

base_rep=[
('B02',sp.Symbol('B_{0,2}')),('B04',sp.Symbol('B_{0,4}')),
('B06',sp.Symbol('B_{0,6}')),('B0x',sp.Symbol('B_{0,x}')),
('B0y',sp.Symbol('B_{0,y}')),('B0z',sp.Symbol('B_{0,z}')),
('B12',sp.Symbol('B_{1,2}')),('B14',sp.Symbol('B_{1,4}')),
('B16',sp.Symbol('B_{1,6}')),('B22',sp.Symbol('B_{2,2}')),
('B24',sp.Symbol('B_{2,4}')),('B26',sp.Symbol('B_{2,6}')),
('B34',sp.Symbol('B_{3,4}')),('B36',sp.Symbol('B_{3,6}')),
('B44',sp.Symbol('B_{4,4}')),('B46',sp.Symbol('B_{4,6}')),
('B56',sp.Symbol('B_{5,6}')),('B66',sp.Symbol('B_{6,6}')),
('E0',sp.Symbol('E_{0}')),('E1',sp.Symbol('E_{1}')),
('E2',sp.Symbol('E_{2}')),('E3',sp.Symbol('E_{3}')),
('eOrbitalRad',sp.Symbol(r'\epsilon')),('gI',sp.Symbol('g_{I}')),
('gs',sp.Symbol('g_{s}')),('M0',sp.Symbol('M_{0}')),
('M2',sp.Symbol('M_{2}')),('M4',sp.Symbol('M_{4}')),
('P2',sp.Symbol('P_{2}')),('P4',sp.Symbol('P_{4}')),
('P6',sp.Symbol('P_{6}')),('S12',sp.Symbol('S_{1,2}')),
('S14',sp.Symbol('S_{1,4}')),('S16',sp.Symbol('S_{1,6}')),
('S22',sp.Symbol('S_{2,2}')),('S24',sp.Symbol('S_{2,4}')),
('S26',sp.Symbol('S_{2,6}')),('S34',sp.Symbol('S_{3,4}')),
('S36',sp.Symbol('S_{3,6}')),('S44',sp.Symbol('S_{4,4}')),
('S46',sp.Symbol('S_{4,6}')),('S56',sp.Symbol('S_{5,6}')),
('S66',sp.Symbol('S_{6,6}')),('\[Alpha]',sp.Symbol(r'\alpha')),
('\[Beta]',sp.Symbol(r'\beta')),('\[Beta]BohrMag',sp.Symbol(r'\mu_{B,e}')),
('\[Beta]n',sp.Symbol(r'\mu_{B,n}')),('\[Gamma]',sp.Symbol(r'\gamma')),
('\[Zeta]',sp.Symbol(r'\zeta'))]

master_rep = {}
for idx, it in enumerate(base_rep):
    key = sp.Symbol('x_{%d}' % (idx+1))
    master_rep[key] = it[1]
inverse_rep = {v:k for k,v in master_rep.items()}

In [663]:
def lanthanum_cleanup(fname):
    '''
    Data  file  might have large redundancies, this opens it and removes
    all    redundant   definitions;   it   assumes   that   there's   no
    inconsistencies between them, if there is then it fails.
    
    More  importantly  it puts together all the lines that relate to one
    definition in just one string with no newlines. 

    This is assuming that the file only contains definitions for:
       EnergyMatrixTable, AllowedM, and EnergyStatesTable

    Parameters
    ----------
    fname   (str): file name of file to be parsed

    Returns
    -------
    clean_output  (list): a list of strings each with a single definition.
    '''
    lanthanum = [l.strip().split(' =')[0] for l in open(fname,'r').readlines() if l[0] != ' ']
    lanthanum = list(filter(lambda x: x != '', lanthanum))
    # this dictionary will have as keys the lhs of definitions
    # and as values will be lists of strings that all attempt
    # to define this symbol
    rhs = {}
    full_lanthanum = [l.strip() for l in open(fname,'r').readlines()]
    for line_idx, line in enumerate(full_lanthanum):
        if line_idx < len(full_lanthanum)-1:
            next_line = full_lanthanum[line_idx+1]
        if 'Attributes[Null]' in line:
            continue
        if ('EnergyMatrixTable' in line) or ('AllowedM' in line) or ('EnergyStatesTable' in line):
            key = line.split('=')[0].strip()
            chunks = []
            try:
                first_chunk = line.split('=')[1]
            except:
                first_chunk = ''
            chunks.append(first_chunk)
            if key not in rhs:
                rhs[key] = []
            continue
        if line == '' or ('EnergyMatrixTable' in next_line) or ('AllowedM' in next_line) or ('EnergyStatesTable' in next_line):
            chunks.append(line.strip())
            whole_chunk = ''.join(chunks).strip()
            if whole_chunk != '':
                rhs[key].append(whole_chunk)
            chunks = []
        else:
            chunks.append(line.strip())
    # now to see if there are any redundancies
    clean_output = []
    for k,v in rhs.items():
        unique = list(set(v))
        assert len(unique) == 1, "There should be only one, this is a loopy file."
        unique = unique[0]
        out = '%s = %s' % (k, unique)
        clean_output.append(out)
    return clean_output

def parse_it(astr):
    astr = cleanup(astr)
    return sp.expand((parse_latex(astr)))

def cleanup(parse):
    '''
    Some basic cleanup to avoid some errors
    when using parse_latex.
    '''
    parse = re.sub(r'Subscript\(x,(\d{1,2})\)', r'x_{\1}', parse)
    parse = re.sub(r'Complex\((.*?),(.*?)\)',r'(\1+i*(\2))',parse)
    parse = re.sub(r'Sqrt\((.*?)\)', r'sqrt{\1}', parse).replace('sqrt','\sqrt').replace('.*','*').replace('.)',')')
    parse = re.sub(r'(\d\.[\d]+)e([-]{0,1}\d)',r'(\1*10^{\2})', parse)
    parse = parse + ' '
    reps = [('. ','')]
    for rep in reps:
        parse = parse.replace(*rep)
    return parse

def parse_energy_matrix_table(astr):
    '''
    Parse a string that contains the definition for a symbolic matrix.

    Parameters
    ----------
    astr    (str): A definition of the sort EnergyMatrixTable[_,_,_,_,_] = {{...},{...},...}

    Returns
    -------
    lhs, parsed_matrix, rhs
    lhs                 (str): The LHS of the matrix definition
    parsed_matrix (sp.Matrix): The parsed symbolic matrix.
    rhs                 (str): The RHS of the matrix definition
    '''
    lhs = astr.split('= ')[0].strip()
    rhs = astr.split('= ')[-1]
    # define it in the Mathematica session
    session.evaluate(astr)
    # get num rows and columns
    num_rows, num_cols = tuple(session.evaluate("Dimensions[%s]" % lhs))
    parsed_matrix = []
    # Iterate through each element and parse it
    for num_row in range(1,num_rows+1):
        row = []
        for num_col in range(1, num_cols+1):
            parse = session.evaluate("ParseSymbol[%s[[%d,%d]]]" % (lhs, num_row, num_col))
            # Clean up the string before parsing with Sympy
            parse = cleanup(parse)
            parsed = sp.expand(parse_latex(parse)).subs(sp.Symbol('i'),sp.I)
            # The imaginary unit needs to be dealt with separately
            parsed = parsed.subs(sp.Symbol('i'),sp.I)
            row.append(parsed)
        parsed_matrix.append(row)
    parsed_matrix = sp.Matrix(parsed_matrix)
    return lhs, parsed_matrix, rhs

def parse_allowed_m(astr):
    lhs = astr.split('= ')[0].strip()
    rhs = astr.split('= ')[-1]
    # define it in the Mathematica session
    session.evaluate(astr)
    # get num rows
    num_lists = tuple(session.evaluate("Dimensions[%s]" % lhs))[0]
    M_value = int(lhs.split('[')[-1].split(']')[0])
    rows = []
    for list_index in range(1,num_lists+1):
        the_list = session.evaluate("%s[[%d]]" % (lhs, list_index))
        try:
            the_row = [sp.S(x[0])/sp.S(x[1]) for x in the_list]
        except:
            the_row = [sp.S(x) for x in the_list]
        rows.append(the_row)
    return M_value, rows  

def parse_energy_states_table(astr):
    lhs = astr.split('= ')[0].strip()
    rhs = astr.split('= ')[-1]
    # define it in the Mathematica session
    session.evaluate(astr)
    # get num rows
    num_rows, num_cols = tuple(session.evaluate("Dimensions[%s]" % lhs))
    parsed_matrix = []
    rows = []
    for num_row in range(1,num_rows+1):
        head = session.evaluate("%s[[%d]][[-1]]" % (lhs, num_row))
        try:
            head = sp.S(head[0])/sp.S(head[1])
        except:
            head = sp.S(head)
        thorax = session.evaluate("%s[[%d]][[1]][[-1]]" % (lhs, num_row))
        try:
            thorax = sp.S(thorax[0])/sp.S(thorax[1])
        except:
            thorax = sp.S(thorax)
        knees = session.evaluate("%s[[%d]][[1]][[1]]" % (lhs, num_row))
        knees = (str(knees[0]), sp.S(knees[1]))
        row = (((knees),thorax),head)
        rows.append(row)
    args = '(%s)' % lhs.split('[')[-1].split(']')[0]
    return sp.S(args), rows

def parse_table(fname):
    '''
    Put everything together to parse
    EnergyMatrixTable, EnergyStatesTable, and AllowedM.

    Parameters
    ----------
    fname   (str): Filename of the file with the Mathematica defs.

    Returns
    -------
    {'EnergyMatrixTables': EnergyMatrixTables,
     'EnergyStatesTable': EnergyStatesTable,
     'AllowedM': AllowedM,
     'EnergyMatrixStrings': EnergyMatrixStrings}
    with
    EnergyMatrixTables    (dict): Keys are tuples () values are symbolic sp.Matrix
    EnergyStatesTable     (dict): Keys are tuples () values are lists
    AlloweM               (dict): Key is an integer corresponding to __, values are
    EnergyMatrixStrings   (dict): Keys are tuples () values are the original strings.

    '''
    clear_lanthanum = lanthanum_cleanup(fname)
    EnergyMatrixTables = {}
    EnergyMatrixStrings = {}
    counter = 0
    for cl in clear_lanthanum:
        if 'EnergyMatrixTable' in cl:
            print('.',end='|')
            pre = cl.split(' =')[0]
            # print(pre)
            # if pre != "EnergyMatrixTable[2, 4, 2, 1/2, 1/2]":
            #     continue
            parse = parse_energy_matrix_table(cl)
            args = sp.S(parse[0].split('[')[-1].split(']')[0])
            EnergyMatrixTables[args] = parse[1].subs(master_rep)
            EnergyMatrixStrings[args] = parse[2]
            counter += 1
            # if counter == 5:
            #     break
    EnergyStatesTable = {}
    for cl in clear_lanthanum:
        if 'EnergyStatesTable' in cl:
            print('.',end='|')
            parse = parse_energy_states_table(cl)
            args = parse[0]
            EnergyStatesTable[args] = parse[1]
    AllowedM = {}
    for cl in clear_lanthanum:
        if 'AllowedM' in cl:
            print('.',end='|')
            parse = parse_allowed_m(cl)
            AllowedM[parse[0]] = parse[1]
    return {'EnergyMatrixTables': EnergyMatrixTables,
            'EnergyStatesTable': EnergyStatesTable,
            'AllowedM': AllowedM,
            'EnergyMatrixStrings': EnergyMatrixStrings}

def rational_simplify(sympy_expr, N=10000):
    '''
    Given a sympy expression this function takes it and
    finds rational  approximations (perhaps including a
    square root).

    Example
    -------

    >> rational_simplify(2.31099*sp.Symbol('x') - 1.14)
    >>> 9 * sqrt(546) * x / 91 - sqrt(130)/10
    '''
    sympy_dict = sympy_expr.as_coefficients_dict()
    for k,v in sympy_dict.items():
        if isinstance(v, sp.core.numbers.Float):
            n = N
            simpler = square_rational_approx(v, n)
            # If the thing was approximated to zero
            # escalate the precision.
            while simpler == 0:
                n = 10*n
                simpler = square_rational_approx(v, n)
            sympy_dict[k] = simpler
    total = sum([k*v for k,v in sympy_dict.items()])
    return total

def rational_approx(x, N):
    '''
    Given  a number x this function returns a fraction
    that approximates it with a denominator that could
    be as large as N.
    '''
    if (int(x) == x):
        return sp.S(int(x))
    sign = 1
    if x < 0:
        sign = -1
        x = -x
    if x > 1:
        ix, dx = int(x), x - int(x)
    else:
        ix = 0
        dx = x
    exponent = -floor(log10(float(dx)))
    tens_multiplier = int(exponent-1)
    dx = dx*(10**tens_multiplier)
    divider = 1/(sp.S(10)**(sp.S(tens_multiplier)))
    sign = sign
    a, b = 0, 1
    c, d = 1, 1
    while (b <= N and d <= N):
        mediant = float(a+c)/(b+d)
        if dx == mediant:
            if b + d <= N:
                return sign*(sp.S(ix)+divider*sp.S(a+c)/sp.S(b+d))
            elif d > b:
                return sign*(sp.S(ix)+divider*sp.S(c)/sp.S(d))
            else:
                return sign*(sp.S(ix)+divider*sp.S(a)/sp.S(b))
        elif dx > mediant:
            a, b = a+c, b+d
        else:
            c, d = a+c,b+d
    if (b > N):
        return sign*(divider*sp.S(c)/sp.S(d) + sp.S(ix))
    else:
        return sign*(divider*sp.S(a)/sp.S(b) + sp.S(ix))

def square_rational_approx(x, N):
    '''
    Given a number x this algorithm finds the best  rational
    approximation to its square, and then returns the signed
    square root of that.
    '''
    if x < 0:
        sign = -1
        x = -x
    else:
        sign = 1
    y = x*x
    return sign*sp.sqrt(rational_approx(y,N))

In [None]:
clear_lanthanum = lanthanum_cleanup('./data/lanthanide_tables/HFEnergyMatrixTables copy 2')
EnergyMatrixTables = {}
for cl in clear_lanthanum:
    if 'EnergyMatrixTable' in cl:
        print('.',end='|')
        pre = cl.split(' =')[0]
        # print(pre)
        # if pre != "EnergyMatrixTable[2, 4, 2, 1/2, 1/2]":
        #     continue
        parse = parse_mathematica_matrix(cl)
        args = sp.S(parse[0].split('[')[-1].split(']')[0])
        EnergyMatrixTables[args] = parse[1].subs(master_rep)
        EnergyMatrixStrings[args] = parse[2]
        counter += 1
        # if counter == 5:
        #     break

In [650]:
'%s[[%d,%d]]' % (lhs, num_row, num_col)

NameError: name 'lhs' is not defined

In [None]:

expr0 = str(expr)
# Parse rational first
expr1 = re.sub(r'Rational\[(.*?),(.*?)\]',
    lambda x: 'sp.S(%s) / sp.S(%s)' % (pa(x.groups()[0]), pa(x.groups()[1])), expr0)
# Then parse power
expr2 = re.sub(r'Power\[(.*?),(.*?)\]',
    lambda x: 'sp.core.power.Pow(%s; %s)' % (pa(x.groups()[0]), pa(x.groups()[1])), expr1)
# Parse mul second
expr3 = re.sub(r'Times\[(.*?)\]',
    lambda x: 'sp.core.mul.Mul(%s)' % parse_multi(x.group(1)), expr2)
# Parse add final
expr4 = re.sub(r'Plus\[(.*?)\]',
    lambda x: 'sp.core.add.Add(%s)' % parse_multi(x.group(1)), expr3)
exprfinal = expr4.replace(';',',')

In [503]:
expr0 = str(expr)
# Parse power first
expr1 = re.sub(r'Rational\[(.*?),(.*?)\]',
    lambda x: 'sp.S(%s) / sp.S(%s)' % (pa(x.groups()[0]), pa(x.groups()[1])), expr0)
# expr2 = 
expr2 = re.sub(r'Power\[(.*?),(.*?)\]',
    lambda x: 'sp.core.power.Pow(%s; %s)' % (pa(x.groups()[0]), pa(x.groups()[1])), expr1)
# Parse mul second
expr3 = re.sub(r'Times\[(.*?)\]',
    lambda x: 'sp.core.mul.Mul(%s)' % parse_multi(x.group(1)), expr2)
# Parse add final
expr4 = re.sub(r'Plus\[(.*?)\]',
    lambda x: 'sp.core.add.Add(%s)' % parse_multi(x.group(1)), expr3)
exprfinal = expr4.replace(';',',')

In [504]:
expr

'Plus[0., Times[Rational[14, 13], E0], Times[0.6923076923076925, E1], Times[33., E3], Times[Rational[37, 6], M0], Times[Rational[97, 6], M2], Times[Rational[1205, 66], M4], Times[Rational[43, 540], P2], Times[Rational[19, 1188], P4], Times[Rational[-875, 15444], P6], Times[2, \\\\[Alpha]], \\\\[Beta], Times[Rational[1, 2], B0z, gI, \\\\[Beta]n], \\\\[Gamma], Times[-1, \\\\[Zeta]]]'

In [505]:
exprfinal

'sp.core.add.Add(0.,sp.core.mul.Mul(sp.S(14) / sp.S(13),E0),sp.core.mul.Mul(0.6923076923076925,E1),sp.core.mul.Mul(33.,E3),sp.core.mul.Mul(sp.S(37) / sp.S(6),M0),sp.core.mul.Mul(sp.S(97) / sp.S(6),M2),sp.core.mul.Mul(sp.S(1205) / sp.S(66),M4),sp.core.mul.Mul(sp.S(43) / sp.S(540),P2),sp.core.mul.Mul(sp.S(19) / sp.S(1188),P4),sp.core.mul.Mul(sp.S(-875) / sp.S(15444),P6),sp.core.mul.Mul(2,\\\\[Alpha)), \\\\[Beta], sp.core.mul.Mul(sp.S(1) / sp.S(2),B0z,gI,\\\\[Beta)n], \\\\[Gamma], sp.core.mul.Mul(-1,\\\\[Zeta)]]'

In [388]:
# expr0 = str(expr)
# # Parse power first
# expr1 = re.sub(r'Rational\[(.*?),(.*?)\]',
#     lambda x: 'sp.S(%s) / sp.S(%s)' % (pa(x.groups()[0]), pa(x.groups()[1])), expr0)
# # expr2 = 
# expr2 = re.sub(r'Power\[(.*?),(.*?)\]',
#     lambda x: 'sp.core.power.Pow(%s; %s)' % (pa(x.groups()[0]), pa(x.groups()[1])), expr1)
# # Parse mul second
# expr3 = re.sub(r'Times\[(.*?)\]',
#     lambda x: 'sp.core.mul.Mul(%s)' % parse_multi(x.group(1)), expr2)
# # Parse add final
# expr4 = re.sub(r'Plus\[(.*?)\]',
#     lambda x: 'sp.core.add.Add(%s)' % parse_multi(x.group(1)), expr3)
# exprfinal = expr4.replace(';',',')

In [389]:
parse_Plus('Plus[1,2,3]')

'Plus[1;2;3]'

In [393]:
re.sub(r'Plus\[(.*?)]',
    lambda x: 'sp.core.add.Add(%s)' % parse_Plus(x.group(1)), 'Plus[1,2,3,5,6,7]')

'sp.core.add.Add(1;2;3;5;6;7)'

In [379]:
expr3

'sp.core.mul.Mul(sp.S(1) / sp.S(22); Pi, sp.core.power.Pow(sp.Symbol("t"); -1))'

In [380]:
expr4

'sp.core.mul.Mul(sp.S(1) / sp.S(22); Pi, sp.core.power.Pow(sp.Symbol("t"); -1))'

In [164]:

expr1 = (re.sub(r'Power\[(.*?),(.*?)\]',
    r'sp.core.power.Pow(sp.Symbol("\1"),\2)',
    expr0))
expr2 = (re.sub(r'Times\[(.*?),(.*?)\]',
    r'sp.core.mul.Mul(\1,\2)',
    expr1))

In [None]:
parsed_table = parse_table('/Volumes/GoogleDrive/My Drive/Zia Lab/Codebase/qdef/data/lanthanide_tables/HFEnergyMatrixTables copy 2')

In [None]:
print("Simplifying numeric coefficients ...")
for k,v in parsed_table['EnergyMatrixTables'].items():
    num_rows = v.rows
    num_cols = v.cols
    for num_row in range(num_rows):
        for num_col in range(num_cols):
            v[num_row,num_col] = sp.expand(rational_simplify(v[num_row,num_col]))
print("Validating parsing by stochastic evaluation ...")
diffs = {}
for k,v in parsed_table['EnergyMatrixTables'].items():
    free_symbs = v.free_symbols
    free_symbs_values = {v: random() for v in free_symbs}
    mathematica_values = {inverse_rep[k]:v for k,v in free_symbs_values.items()}
    num_try = sp.N(v.subs(free_symbs_values))
    energyMatrixString = parsed_table['EnergyMatrixStrings'][k]
    mathematica_subs = ', '.join(['Subscript[x,%s] -> %s' % (str(str(k).split('{')[-1].split('}')[0]), str(v)) for k,v in mathematica_values.items()])
    mathematica_subs = '{%s}' % mathematica_subs
    mathematica_try = sp.Matrix(session.evaluate('Re[Chop[(%s /. reps)] /. %s]' % (energyMatrixString, mathematica_subs))) +\
                 sp.I*sp.Matrix(session.evaluate('Im[Chop[(%s /. reps)] /. %s]' % (energyMatrixString, mathematica_subs))) 
    diff_mat = (mathematica_try - num_try)
    mathematica_norm = mathematica_try.norm()
    if mathematica_norm == 0:
        diffs[k] = diff_mat.norm()
    else:
        diffs[k] = diff_mat.norm()/mathematica_norm
max_diff = max(diffs.values())
print("Max difference = {:e}".format(max_diff))
assert(max_diff < 1e-6)

In [26]:
!beep silent

silence


In [37]:
pickle.dump(parsed_table,open('./data/lanthanide_tables/first_table.pkl','wb'))

In [None]:
# print("Simplifying numeric coefficients ...")
# for k,v in parsed_table['EnergyMatrixTables'].items():
#     num_rows = v.rows
#     num_cols = v.cols
#     for num_row in range(num_rows):
#         for num_col in range(num_cols):
#             v[num_row,num_col] = sp.simplify(rational_simplify(v[num_row,num_col]))
# print("Validating parsing by stochastic evaluation ...")
# diffs = {}
# for k,v in parsed_table['EnergyMatrixTables'].items():
#     free_symbs = v.free_symbols
#     free_symbs_values = {v: random() for v in free_symbs}
#     mathematica_values = {inverse_rep[k]:v for k,v in free_symbs_values.items()}
#     num_try = sp.N(v.subs(free_symbs_values))
#     energyMatrixString = parsed_table['EnergyMatrixStrings'][k]
#     mathematica_subs = ', '.join(['Subscript[x,%s] -> %s' % (str(str(k).split('{')[-1].split('}')[0]), str(v)) for k,v in mathematica_values.items()])
#     mathematica_subs = '{%s}' % mathematica_subs
#     mathematica_try = sp.Matrix(session.evaluate('Re[(%s /. reps) /. %s]' % (energyMatrixString, mathematica_subs))) +\
#                  sp.I*sp.Matrix(session.evaluate('Im[(%s /. reps) /. %s]' % (energyMatrixString, mathematica_subs))) 
#     diff_mat = (mathematica_try - num_try)
#     mathematica_norm = mathematica_try.norm()
#     if mathematica_norm == 0:
#         diffs[k] = diff_mat.norm()
#     else:
#         diffs[k] = diff_mat.norm()/mathematica_norm
# max_diff = max(diffs.values())
# print("Max difference = {:e}".format(max_diff))
# assert(max_diff < 1e-6)


In [77]:
# def pairwise(iterable):
#     "s -> (s0, s1), (s2, s3), (s4, s5), ..."
#     a = iter(iterable)
#     return zip(a, a)
# def FromSympForm(astring):
#     lines = astring.split('\n')
#     numVars = int(lines[0])
#     intermediate_vars = [sp.Symbol('x_{%d}' % i) for i in range(1,numVars+2)]
#     varStrings = lines[1].split(';;')
#     # original_vars = [parse_latex(x) for x in varStrings]
#     original_vars = [sp.Symbol(x) for x in varStrings]
#     translator = dict(zip(intermediate_vars, original_vars))
#     expr = sp.S(0)
#     for line in (lines[2:]):
#         parts = line.split(';;')
#         chunk = sp.S(1)
#         for a, b in pairwise(parts):
#             a = parse_latex(a)
#             b = parse_latex(b)
#             chunk *= a**b
#         expr += chunk
#     return expr, translator
    


# session.evaluate('''
# SympyFormer[expr_] := (
#    head = Head[expr];
#    Which[
#     head === Plus,
#     Return[SympyFormer /@ List @@ expr],
#     head === Real,
#     Return[{expr, 1}],
#     head === Power,
#     Return[List @@ expr],
#     head === Times,
#     Return[List @@ expr],
#     head === Subscript,
#     Return[{1, expr}]
#     ]
#    );
# Addendum[list_] := (
#    If[NumericQ[list[[1]]],
#     Return[list],
#     Return[Prepend[list, 1]]]
#    );
# ToTeX[list_] := (
#    numeric = {ToString[list[[1]][[1]], TeXForm], 
#      ToString[list[[1]][[2]]]};
#    symbolic = list[[2 ;;]];
#    symbolic = {ToString[#[[1]], TeXForm], 
#        ToString[#[[2]], TeXForm]} & /@ symbolic;
#    Return[{numeric, symbolic}]
#    );
# Inverter[expr_] := (
#   head = Head[expr];
#   If[head === Power,
#    Return[List @@ expr],
#    Return[{expr, 1}]
#    ]
#   )

# SympyForm[expr0_] := (
#   If[expr0 === 0,
#    Return["0\n\n0;;1"]];
#   expr = expr0;
#   expr = Expand[expr];
#   exprVars = Variables[expr];
#   newVars = Table[Subscript[x, i], {i, 1, Length[exprVars]}];
#   translator = (#[[1]] -> #[[2]]) & /@ 
#     Transpose[{exprVars, newVars}];
#   expr = Chop[expr /. translator];
#   sympy = SympyFormer[expr];
#   sympy = Addendum /@ sympy;
#   sympy = Table[Inverter /@ sp, {sp, sympy}];
#   sympy = Flatten[ToTeX[#]] & /@ sympy;
#   sympy = Prepend[sympy, ToString[#, TeXForm] & /@ exprVars];
#   sympy = Prepend[sympy, {ToString[Length[exprVars]]}];
#   sympy = Map[StringJoin[Riffle[#, ";;"]] &, sympy];
#   sympy = StringJoin[Riffle[sympy, "\n"]];
#   Return[sympy])
# ''')

In [None]:
# def parse_energy_matrix_table(astr):
#     global mercy
#     lhs = astr.split('= ')[0].strip()
#     rhs = astr.split('= ')[-1]
#     # define it in the Mathematica session
#     session.evaluate(astr)
#     # get num rows
#     num_rows, num_cols = tuple(session.evaluate("Dimensions[%s]" % lhs))
#     parsed_matrix = []
#     for num_row in range(1,num_rows+1):
#         row = []
#         for num_col in range(1, num_cols+1):
#             parse = session.evaluate("ParseSymbol[%s[[%d,%d]]]" % (lhs, num_row, num_col))
#             parse = re.sub(r'Subscript\(x,(\d{1,2})\)', r'x_{\1}', parse)
#             parse = re.sub(r'Complex\((.*?),(.*?)\)',r'(\1+i*(\2))',parse)
#             parse = re.sub(r'Sqrt\((.*?)\)', r'sqrt{\1}', parse).replace('sqrt','\sqrt').replace('.*','*').replace('.)',')')
#             parse = re.sub(r'(\d\.[\d]+)e([-]{0,1}\d)',r'(\1*10^{\2})', parse)
#             parse = cleanup(parse)
#             parsed = sp.expand(parse_latex(parse)).subs(sp.Symbol('i'),sp.I)
#             row.append(parsed)
#         parsed_matrix.append(row)
#     return lhs, sp.Matrix(parsed_matrix), rhs

In [97]:
# for cl in clear_lanthanum:
#     if 'EnergyMatrixTable' in cl:
#         print('.',end='|')
#         lhs = cl.split(' =')[0]
#         rhs = cl.split(' =')[1].strip()
#         # define the symbol in mathematica
#         session.evaluate('%s' % cl)
#         num_rows, num_cols = tuple(session.evaluate("Dimensions[%s]" % lhs))
#         parsed_matrix = []
#         for num_row in range(1,num_rows+1):
#             row = []
#             for num_col in range(1, num_cols+1):
#                 parse = str(session.evaluate("SympyForm[%s[[%d,%d]]]" % (lhs, num_row, num_col)))
#                 parse = parse.replace('.;',';')
#                 parsed = sp.simplify(FromSympForm(parse)).subs(sp.Symbol('i'),sp.I)
#                 row.append(parsed[0])
#             parsed_matrix.append(row)
#         parsed_matrix = sp.Matrix(parsed_matrix)
#         break

.|

In [56]:
astring = r'''5
\beta;;\cos (\xi );;x_1;;x_2;;x_5
1;;1;;x_1;;-1;;x_2;;1
1.0100100000000000000\times 10^{19};;1;;x_3;;1
1;;1;;x_4;;1
0.1;;1;;x_1;;-1;;x_5;;1'''
FromSympForm(astring)

10100100000000000000.0*x_{1} + x_{2} + 0.1*x_{5}/beta + cos(xi)/beta

x_{2} + x_{3} + 0.1*x_{4}/x_{1} + 5/x_{1}

In [34]:
intermediate_vars

[x_{1}, x_{2}, x_{3}, x_{4}, x_{5}]

In [21]:
parts

['1', '1', 'x_3', '-1', 'x_4', '1']

In [18]:
varStrings

['x_1', 'x_2', '\\beta', 'x_5']

In [11]:
lines

['4',
 'x_1;;x_2;;\x08eta;;x_5',
 '1;;1;;x_1;;1',
 '1;;1;;x_2;;1',
 '1;;1;;x_3;;-1;;x_4;;1']

In [4]:
import numpy as np

In [5]:
from sympy.parsing.sym_expr import SymPyExpression

In [39]:
for num_row in range(diff_mat.rows):
    for num_col in range(diff_mat.cols):
        if abs(diff_mat[num_row,num_col]) == maxy:
            print(num_row,num_col)

12 22
13 23


In [None]:
!beep silent

(2, 4, 2, 1/2, 1/2)

In [130]:
for k,v in parsed_table['EnergyMatrixTables'].items():
    pickle.dump(v,open('./data/dummy.pkl','wb'))

In [133]:
free_symbs = v.free_symbols
free_symbs_values = {v: random() for v in free_symbs}
mathematica_values = {inverse_rep[k]:v for k,v in free_symbs_values.items()}
num_try = sp.N(v.subs(free_symbs_values))
energyMatrixString = parsed_table['EnergyMatrixStrings'][k]
mathematica_subs = ', '.join(['Subscript[x,%s] -> %s' % (str(str(k).split('{')[-1].split('}')[0]), str(v)) for k,v in mathematica_values.items()])
mathematica_subs = '{%s}' % mathematica_subs
mathematica_try = sp.Matrix(session.evaluate('Re[(%s /. reps) /. %s]' % (energyMatrixString, mathematica_subs))) +\
sp.I*sp.Matrix(session.evaluate('Im[(%s /. reps) /. %s]' % (energyMatrixString, mathematica_subs))) 

In [None]:
counter = 0
for k, v in parsed_table['EnergyMatrixTables'].items():
    print(k)
    display(v)
    if counter == 5:
        break
    counter += 1

## Functional TeXForm

In [17]:
import re
from sympy.parsing.latex import parse_latex
import sympy as sp
from wolframclient.evaluation import WolframLanguageSession
from wolframclient.language import wl
session=WolframLanguageSession()

session.evaluate(r'''vars = {B02, B04, B06, B0x, B0y, B0z, B12, B14, B16, B22, B24, B26, 
   B34, B36, B44, B46, B56, B66, E0, E1, E2, E3, eOrbitalRad, gI, gs, 
   M0, M2, M4, P2, P4, P6, S12, S14, S16, S22, S24, S26, S34, S36, 
   S44, S46, S56, 
   S66, \[Alpha], \[Beta], \[Beta]BohrMag, \[Beta]n, \[Gamma], \
\[Zeta]};
svars = Table[
   ToExpression[SubscriptBox["x", ToString[i]]], {i, 1, Length[vars]}];
reps = (#[[1]] -> #[[2]]) & /@ Transpose[{vars, svars}];
ParseSymbol[thing_] :=
 (str = ToString[Chop[thing] /. reps, TeXForm];
  Return[str]
  )''')

base_rep=[
('B02',sp.Symbol('B_{0,2}')),
('B04',sp.Symbol('B_{0,4}')),
('B06',sp.Symbol('B_{0,6}')),
('B0x',sp.Symbol('B_{0,x}')),
('B0y',sp.Symbol('B_{0,y}')),
('B0z',sp.Symbol('B_{0,z}')),
('B12',sp.Symbol('B_{1,2}')),
('B14',sp.Symbol('B_{1,4}')),
('B16',sp.Symbol('B_{1,6}')),
('B22',sp.Symbol('B_{2,2}')),
('B24',sp.Symbol('B_{2,4}')),
('B26',sp.Symbol('B_{2,6}')),
('B34',sp.Symbol('B_{3,4}')),
('B36',sp.Symbol('B_{3,6}')),
('B44',sp.Symbol('B_{4,4}')),
('B46',sp.Symbol('B_{4,6}')),
('B56',sp.Symbol('B_{5,6}')),
('B66',sp.Symbol('B_{6,6}')),
('E0',sp.Symbol('E_{0}')),
('E1',sp.Symbol('E_{1}')),
('E2',sp.Symbol('E_{2}')),
('E3',sp.Symbol('E_{3}')),
('eOrbitalRad',sp.Symbol(r'\epsilon')),
('gI',sp.Symbol('g_{I}')),
('gs',sp.Symbol('g_{s}')),
('M0',sp.Symbol('M_{0}')),
('M2',sp.Symbol('M_{2}')),
('M4',sp.Symbol('M_{4}')),
('P2',sp.Symbol('P_{2}')),
('P4',sp.Symbol('P_{4}')),
('P6',sp.Symbol('P_{6}')),
('S12',sp.Symbol('S_{1,2}')),
('S14',sp.Symbol('S_{1,4}')),
('S16',sp.Symbol('S_{1,6}')),
('S22',sp.Symbol('S_{2,2}')),
('S24',sp.Symbol('S_{2,4}')),
('S26',sp.Symbol('S_{2,6}')),
('S34',sp.Symbol('S_{3,4}')),
('S36',sp.Symbol('S_{3,6}')),
('S44',sp.Symbol('S_{4,4}')),
('S46',sp.Symbol('S_{4,6}')),
('S56',sp.Symbol('S_{5,6}')),
('S66',sp.Symbol('S_{6,6}')),
('\[Alpha]',sp.Symbol(r'\alpha')),
('\[Beta]',sp.Symbol(r'\beta')),
('\[Beta]BohrMag',sp.Symbol(r'\mu_{B,e}')),
('\[Beta]n',sp.Symbol(r'\mu_{B,e}')),
('\[Gamma]',sp.Symbol(r'\gamma')),
('\[Zeta]',sp.Symbol(r'\zeta'))];
master_rep = {}
for idx, it in enumerate(base_rep):
    key = sp.Symbol('x_{%d}' % (idx+1))
    master_rep[key] = it[1]

In [None]:
re.sub()

In [47]:
re.sub(r'x_(\d)',r'(x_{\1})','x_9')

'(x_{9})'

In [179]:
def lanthanum_cleanup(fname):
    lanthanum = [l.strip().split(' =')[0] for l in open(fname,'r').readlines() if l[0] != ' ']
    lanthanum = list(filter(lambda x: x != '', lanthanum))
    rhs = {}
    full_lanthanum = [l.strip() for l in open('/Volumes/GoogleDrive/My Drive/Zia Lab/Codebase/qdef/data/lanthanide_tables/HFEnergyMatrixTables copy 2','r').readlines()]
    for line_idx, line in enumerate(full_lanthanum):
        if line_idx < len(full_lanthanum)-1:
            next_line = full_lanthanum[line_idx+1]
        if 'Attributes[Null]' in line:
            continue
        if ('EnergyMatrixTable' in line) or ('AllowedM' in line) or ('EnergyStatesTable' in line):
            key = line.split('=')[0].strip()
            chunks = []
            try:
                first_chunk = line.split('=')[1]
            except:
                first_chunk = ''
            chunks.append(first_chunk)
            if key not in rhs:
                rhs[key] = []
            continue
        if line == '' or ('EnergyMatrixTable' in next_line) or ('AllowedM' in next_line) or ('EnergyStatesTable' in next_line):
            chunks.append(line.strip())
            whole_chunk = ''.join(chunks).strip()
            if whole_chunk != '':
                rhs[key].append(whole_chunk)
            chunks = []
        else:
            chunks.append(line.strip())
    clean_output = []
    for k,v in rhs.items():
        unique = list(set(v))
        assert len(unique) == 1
        unique = unique[0]
        out = '%s = %s' % (k, unique)
        clean_output.append(out)
    return clean_output

def parse_it(astr):
    astr = cleanup(astr)
    return sp.expand((parse_latex(astr)))

def cleanup(astr):
    reps = [('. ',''),
            (r'\right)',')'),
            (r'\left(','('),
            ]
    for rep in reps:
        astr = astr.replace(*rep)
    return astr

def parse_energy_matrix_table(astr):
    lhs = astr.split('= ')[0].strip()
    rhs = astr.split('= ')[-1]
    # define it in the Mathematica session
    session.evaluate(astr)
    # get num rows
    num_rows, num_cols = tuple(session.evaluate("Dimensions[%s]" % lhs))
    parsed_matrix = []
    for num_row in range(1,num_rows+1):
        row = []
        for num_col in range(1, num_cols+1):
            parse = cleanup(session.evaluate("ParseSymbol[%s[[%d,%d]]]" % (lhs, num_row, num_col)))
            parse = re.sub(r'x_(\d)',r'(x_{\1})', parse)
            parse = re.sub(r'i',r'(i)',parse)
            parsed = sp.expand(parse_latex(parse)).subs(sp.Symbol('i'),sp.I)
            row.append(parsed)
        parsed_matrix.append(row)
    return lhs, sp.Matrix(parsed_matrix)

def parse_allowed_m(astr):
    lhs = astr.split('= ')[0].strip()
    rhs = astr.split('= ')[-1]
    # define it in the Mathematica session
    session.evaluate(astr)
    # get num rows
    num_lists = tuple(session.evaluate("Dimensions[%s]" % lhs))[0]
    M_value = int(lhs.split('[')[-1].split(']')[0])
    rows = []
    for list_index in range(1,num_lists+1):
        the_list = session.evaluate("%s[[%d]]" % (lhs, list_index))
        try:
            the_row = [sp.S(x[0])/sp.S(x[1]) for x in the_list]
        except:
            the_row = [sp.S(x) for x in the_list]
        rows.append(the_row)
    return M_value, rows  

def parse_energy_states_table(astr):
    lhs = astr.split('= ')[0].strip()
    rhs = astr.split('= ')[-1]
    # define it in the Mathematica session
    session.evaluate(astr)
    # get num rows
    num_rows, num_cols = tuple(session.evaluate("Dimensions[%s]" % lhs))
    parsed_matrix = []
    rows = []
    for num_row in range(1,num_rows+1):
        head = session.evaluate("%s[[%d]][[-1]]" % (lhs, num_row))
        try:
            head = sp.S(head[0])/sp.S(head[1])
        except:
            head = sp.S(head)
        thorax = session.evaluate("%s[[%d]][[1]][[-1]]" % (lhs, num_row))
        try:
            thorax = sp.S(thorax[0])/sp.S(thorax[1])
        except:
            thorax = sp.S(thorax)
        knees = session.evaluate("%s[[%d]][[1]][[1]]" % (lhs, num_row))
        knees = (str(knees[0]), sp.S(knees[1]))
        row = (((knees),thorax),head)
        rows.append(row)
    args = '(%s)' % lhs.split('[')[-1].split(']')[0]
    return sp.S(args), rows

def parse_table(fname):
    clear_lanthanum = lanthanum_cleanup(fname)
    EnergyMatrixTables = {}
    counter = 0
    for cl in clear_lanthanum:
        if 'EnergyMatrixTable' in cl:
            print('.',end='|')
            parse = parse_energy_matrix_table(cl)
            args = sp.S(parse[0].split('[')[-1].split(']')[0])
            EnergyMatrixTables[args] = parse[1].subs(master_rep)
            counter += 1
            # if counter == 5:
            #     break
    EnergyStatesTable = {}
    for cl in clear_lanthanum:
        if 'EnergyStatesTable' in cl:
            print('.',end='|')
            parse = parse_energy_states_table(cl)
            args = parse[0]
            EnergyStatesTable[args] = parse[1]
    AllowedM = {}
    for cl in clear_lanthanum:
        if 'AllowedM' in cl:
            print('.',end='|')
            parse = parse_allowed_m(cl)
            AllowedM[parse[0]] = parse[1]
    return {'EnergyMatrixTables': EnergyMatrixTables,
            'EnergyStatesTable': EnergyStatesTable,
            'AllowedM': AllowedM}
def rational_approx(x, N):
    sign = 1
    if x < 0:
        sign = -1
        x = -x
    if x > 1:
        ix, dx = int(x), x - int(x)
    else:
        ix = 0
        dx = x
    a, b = 0, 1
    c, d = 1, 1
    while (b <= N and d <= N):
        mediant = float(a+c)/(b+d)
        if dx == mediant:
            if b + d <= N:
                return sign*(sp.S(ix)+sp.S(a+c)/sp.S(b+d))
            elif d > b:
                return sign*(sp.S(ix)+sp.S(c)/sp.S(d))
            else:
                return sign*(sp.S(ix)+sp.S(a)/sp.S(b))
        elif dx > mediant:
            a, b = a+c, b+d
        else:
            c, d = a+c,b+d

    if (b > N):
        return sign*(sp.S(c)/sp.S(d) + sp.S(ix))
    else:
        return sign*(sp.S(a)/sp.S(b) + sp.S(ix))
def rational_simplify(sympy_expr):
    sympy_dict = sympy_expr.as_coefficients_dict()
    for k,v in sympy_dict.items():
        if isinstance(v, sp.core.numbers.Float):
            sympy_dict[k] = rational_approx(v,1000)
    total = sum([k*v for k,v in sympy_dict.items()])
    return total

In [180]:
# teststring = '(Subscript(x,46)*(2*Sqrt(2)*Subscript(x,6)*(-1 + Subscript(x,25)) + ((Sqrt(2) + 1/(2.*Sqrt(5)))*Subscript(x,24)*Subscript(x,25)*Subscript(x,47))/Subscript(x,23)))/(2.*Sqrt(3))'
# teststring = re.sub(r'Subscript\(x,(\d{1,2})\)', r'x_{\1}', teststring)
# teststring = re.sub(r'Sqrt\((.*?)\)', r'sqrt{\1}', teststring).replace('sqrt','\sqrt').replace('.*','*')
# sp.expand(parse_latex(teststring))

In [181]:
parsed_table = parse_table('/Volumes/GoogleDrive/My Drive/Zia Lab/Codebase/qdef/data/lanthanide_tables/HFEnergyMatrixTables copy 2')
for k,v in parsed_table['EnergyMatrixTables'].items():
    num_rows = v.rows
    num_cols = v.cols
    for num_row in range(num_rows):
        for num_col in range(num_cols):
            v[num_row,num_col] = rational_simplify(v[num_row,num_col])

.|

LaTeXParsingError: I don't understand this
(14*Subscr(i)pt(x,19))/13+ 0.6923076923076925*Subscr(i)pt(x,20) + 33.*Subscr(i)pt(x,22) + (37*Subscr(i)pt(x,26))/6+ (97*Subscr(i)pt(x,27))/6+ (1205*Subscr(i)pt(x,28))/66+ (43*Subscr(i)pt(x,29))/540+ (19*Subscr(i)pt(x,30))/1188- (875*Subscr(i)pt(x,31))/15444+ 2*Subscr(i)pt(x,44) + Subscr(i)pt(x,45) + (Subscr(i)pt(x,6)*Subscr(i)pt(x,24)*Subscr(i)pt(x,47))/2+ Subscr(i)pt(x,48) - Subscr(i)pt(x,49)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^

In [66]:
import pickle

In [170]:
for k,v in parsed_table['EnergyMatrixTables'].items():
    pickle.dump(v,open('./data/dummy.pkl','wb'))

In [178]:
sp.expand(v[0,0]*sp.I)

-sqrt(2)*I*B_{4,4}/9 + sqrt(2)*S_{4,4}/9

In [177]:
v[0,0]

-sqrt(2)*B_{4,4}/9 - sqrt(2)*I*S_{4,4}/9

In [7]:
for k,v in parsed_table['EnergyMatrixTables'].items():
    num_rows = v.rows
    num_cols = v.cols
    for num_row in range(num_rows):
        for num_col in range(num_cols):
            v[num_row,num_col] = rational_simplify(v[num_row,num_col])

In [225]:
for k,v in parsed_table['EnergyMatrixTables'].items():
    parsed_table['EnergyMatrixTables'][k] = v.subs(master_rep)

In [283]:
rational_simplify(v[2,2])

B_{0,z}*\beta_{n}*g_{I}/2 + 14*E_{0}/13 + 126*E_{1}/13

In [281]:
for k,v in expr.as_coefficients_dict().items():
    if isinstance(v, sp.core.numbers.Float):
                print(v)

0.692308000000000


In [289]:
for k,v in parsed_table['EnergyMatrixTables'].items():
    num_rows = v.rows
    num_cols = v.cols
    for num_row in range(num_rows):
        for num_col in range(num_cols):
            v[num_row,num_col] = rational_simplify(v[num_row,num_col])

## ScratchPad

In [None]:
import re

In [None]:
lanthanum = [l.strip().split(' =')[0] for l in open('/Volumes/GoogleDrive/My Drive/Zia Lab/Codebase/qdef/data/lanthanide_tables/HFEnergyMatrixTables copy 2','r').readlines() if l[0] != ' ']
lanthanum = list(filter(lambda x: x != '', lanthanum))

In [None]:
parEnergyMatrixTable = False
parAllowedM = False
parEnergyStatesTable = False
rhs = {}
counter = 0
full_lanthanum = [l.strip() for l in open('/Volumes/GoogleDrive/My Drive/Zia Lab/Codebase/qdef/data/lanthanide_tables/HFEnergyMatrixTables copy 2','r').readlines()]
for line_idx, line in enumerate(full_lanthanum):
    # if counter == 27:
    #     break
    counter = counter+1
    if line_idx < len(full_lanthanum)-1:
        next_line = full_lanthanum[line_idx+1]
    if 'Attributes[Null]' in line:
        continue
    if ('EnergyMatrixTable' in line) or ('AllowedM' in line) or ('EnergyStatesTable' in line):
        key = line.split('=')[0].strip()
        chunks = []
        try:
            first_chunk = line.split('=')[1]
        except:
            first_chunk = ''
        chunks.append(first_chunk)
        if key not in rhs:
            rhs[key] = []
        continue
    if line == '' or ('EnergyMatrixTable' in next_line) or ('AllowedM' in next_line) or ('EnergyStatesTable' in next_line):
        chunks.append(line.strip())
        whole_chunk = ''.join(chunks).strip()
        if whole_chunk != '':
            rhs[key].append(whole_chunk)
        chunks = []
    else:
        chunks.append(line.strip())

In [None]:
clean_output = []
for k,v in rhs.items():
    unique = list(set(v))
    assert len(unique) == 1
    unique = unique[0]
    out = '%s = %s' % (k, unique)
    clean_output.append(out)

In [None]:
differences = set()
for k,v in rhs.items():
    # print(k)
    # print(len(v),len(list(set(v))))
    differences.add(len(list(set(v))))