In [1]:
import os

import numpy as np
import pandas
import matplotlib.pyplot as plt
import h5py
import astropy.units as u
import ChiantiPy.tools.util as ch_util
import ChiantiPy.tools.io as ch_io
import ChiantiPy.core as ch

%matplotlib inline

 found PyQt4 widgets
 using CLI for selections
 reading chiantirc file


# Parse Raw CHIANTI Data
In this notebook, we'll develop some tools for parsing the raw ASCII data in the CHIANTI atomic database and transforming it into pandas dataframes. It is important to include a way to very easily parse the actual raw data in the ChiantiPy package.

Then, the next step is to transform these dataframes into one large HDF5 file. ChiantiPy will then use this file like a database to stream the needed atomic data from. This provides a sleeker interface to the data itself and can be much more efficient than the current system.

## Notes on Database Structure
Other directories/files that are **not** ions:
* __abundance__
* ancillary_data
* continuum
* dem
* __ioneq__
* __ip__
* masterlist
* VERSION (file)

The items in bold can be parsed appropriately and attached to the ion data objects.

The filetypes for each ion are:
* ~~**.elvlc**~~
* ~~**.wgfa**~~
* ~~.scups~~
* .psplups
* .rrparams
* .trparams
* .diparams
* .drparams
* ~~**.easplom**~~
* ~~**.easplups**~~
* ~~**.fblvl**~~
* .cilvl
* .reclvl

The items in bold have a basic row-column structure and can be easily read using just a list of column names and datatypes.

Those not bolded will be a bit more complicated.

One possible idea would be to force everything into the basic row-column format. This could be done by providing preprocessors for specific filetypes and then a general filereader. This would help the code be less verbose and more maintable.

For those entries which are arrays, the preprocesor could take those rows, turn them into comma separated lists, make them an item in the row (i.e. a single entry) and then give them a custom datatype that can later be used to just convert this into a Numpy array.

Goal of the preprocessor should be to force every distinct entry into a single space-delimited row where each entry is unique.

## Read and Parse Raw ASCII Data

In [36]:
ch_dbase = os.environ['XUVTOP']

In [None]:
# preprocessor for scups example
foo_lines = []
j = 0
for i,line in enumerate(lines):
    if i%3 == 0:
        foo_lines.append(line.strip().split())
        j += 1
    else:
        foo_lines[j-1].append(','.join(line.strip().split()))

In [39]:
def generic_preprocessor(table,line,index):
    table.append(line)

In [40]:
def parse_table(ion_name,filetype,names_and_types,preprocessor=generic_preprocessor):
    # parse element and ion name
    # allow for multiple input formats???
    element = ion_name.split('_')[0]
    # read table
    with open(os.path.join(ch_dbase,element,ion_name,'.'.join([ion_name,filetype])),'r') as f:
        lines = f.readlines()
    # parse table
    table = []
    for i,line in enumerate(lines):
        line = list(filter(None,line.strip().split('  ')))
        if line[0] == '-1':
            comment = ''.join(lines[i+1:len(lines)])
            break
        else:
            # preprocess table depending on filetype
            preprocessor(table,line,i)
    # construct dataframe
    df = pandas.DataFrame(table,columns=[h[0] for h in names_and_types])
    for h,d in names_and_types:
        df[h] = df[h].astype(d)
    # add metadata
    df._metadata.append(comment)
    return df

### `.scups` Files

In [45]:
dtypes = [int,int,float,float,float,int,int,float,'object','object']
headings = ['lower level index','upper level index','delta energy','oscillator strength',
            'high-temperature limit','number of scaled temperatures','Burgess-Tully scaling type',
            'Burgess-Tully scaling parameter','Burgess-Tully scaled temperatures',
            'Burgess-Tully scaled effective collisions strengths']
scups_header_info = [(h,d) for h,d in zip(headings,dtypes)]

In [46]:
def scups_preprocessor(table,line,index):
    if index%3 == 0:
        # main data
        table.append(line)
    else:
        # scaled temperature or collision strengths
        scaled = np.array(line,dtype=float)
        table[-1].append(scaled)

In [49]:
scups_df = parse_table('fe_9','scups',scups_header_info,preprocessor=scups_preprocessor)

In [50]:
scups_df

Unnamed: 0,lower level index,upper level index,delta energy,oscillator strength,high-temperature limit,number of scaled temperatures,Burgess-Tully scaling type,Burgess-Tully scaling parameter,Burgess-Tully scaled temperatures,Burgess-Tully scaled effective collisions strengths
0,1,2,3.698,-1.000000,1.000000e-06,12,2,0.5629,"[0.0, 0.04698, 0.1097, 0.1977, 0.3302, 0.552, ...","[0.09773, 0.0823, 0.0617, 0.0496, 0.0397, 0.02..."
1,1,3,3.721,0.000318,3.420000e-04,12,4,1.1000,"[0.0, 0.2062, 0.3894, 0.5534, 0.7012, 0.8362, ...","[0.2426, 1.757, 1.057, 0.642, 0.3542, 0.1447, ..."
2,1,4,3.770,-1.000000,1.000000e-06,12,2,0.5521,"[0.0, 0.04698, 0.1097, 0.1977, 0.3302, 0.552, ...","[0.3122, 0.283, 0.244, 0.211, 0.178, 0.134, 0...."
3,1,5,3.880,-1.000000,1.000000e-06,12,2,0.5365,"[0.0, 0.04698, 0.1097, 0.1977, 0.3302, 0.552, ...","[0.25, 0.235, 0.215, 0.189, 0.163, 0.124, 0.09..."
4,1,6,3.912,-1.000000,5.530000e-03,12,2,0.5321,"[0.0, 0.04698, 0.1097, 0.1977, 0.3302, 0.552, ...","[0.3092, 0.268, 0.213, 0.175, 0.145, 0.106, 0...."
5,1,7,3.953,-1.000000,1.000000e-06,12,2,0.5266,"[0.0, 0.04698, 0.1097, 0.1977, 0.3302, 0.552, ...","[0.173, 0.155, 0.131, 0.112, 0.0954, 0.0713, 0..."
6,1,8,4.148,-1.000000,4.940000e-02,12,2,0.5018,"[0.0, 0.04698, 0.1097, 0.1977, 0.3302, 0.552, ...","[0.2222, 0.208, 0.189, 0.17, 0.145, 0.105, 0.0..."
7,1,9,4.160,-1.000000,1.000000e-06,12,2,0.5004,"[0.0, 0.04698, 0.1097, 0.1977, 0.3302, 0.552, ...","[0.1465, 0.148, 0.15, 0.142, 0.124, 0.087, 0.0..."
8,1,10,4.188,0.005120,4.890000e-03,12,1,1.1000,"[0.0, 0.1877, 0.3625, 0.5256, 0.6784, 0.8228, ...","[0.05525, 0.04817, 0.04157, 0.03898, 0.03489, ..."
9,1,11,4.208,-1.000000,1.000000e-06,12,2,0.4947,"[0.0, 0.04698, 0.1097, 0.1977, 0.3302, 0.552, ...","[0.08901, 0.095, 0.103, 0.101, 0.0902, 0.0656,..."


In [51]:
%%bash
head -n 10 $XUVTOP/fe/fe_9/fe_9.scups

      1      2   3.698e+00  -1.000e+00   1.000e-06   12    2   5.629e-01
   0.000e+00   4.698e-02   1.097e-01   1.977e-01   3.302e-01   5.520e-01   7.114e-01   8.313e-01   9.249e-01   9.610e-01   9.801e-01   1.000e+00
   9.773e-02   8.230e-02   6.170e-02   4.960e-02   3.970e-02   2.870e-02   2.160e-02   1.550e-02   9.190e-03   5.770e-03   3.430e-03   1.000e-06
      1      3   3.721e+00   3.181e-04   3.420e-04   12    4   1.100e+00
   0.000e+00   2.062e-01   3.894e-01   5.534e-01   7.012e-01   8.362e-01   8.950e-01   9.294e-01   9.541e-01   9.647e-01   9.716e-01   1.000e+00
   2.426e-01   1.757e+00   1.057e+00   6.420e-01   3.542e-01   1.447e-01   7.062e-02   3.437e-02   1.352e-02   6.851e-03   3.695e-03   3.420e-04
      1      4   3.770e+00  -1.000e+00   1.000e-06   12    2   5.521e-01
   0.000e+00   4.698e-02   1.097e-01   1.977e-01   3.302e-01   5.520e-01   7.114e-01   8.313e-01   9.249e-01   9.610e-01   9.801e-01   1.000e+00
   3.122e-01   2.830e-01   2.440e-01   2.110e-01   1.780

### `.psplups` Files

In [53]:
%%bash
head -n 10 $XUVTOP/ca/ca_17/ca_17.psplups

  2  3  2 0.000e+00 1.021e-01 4.320e+02-3.160e-14 1.020e-14 4.365e-13 1.931e-12 3.927e-12 5.391e-12 5.420e-12 3.411e-12-2.418e-13
  2  4  2 0.000e+00 3.520e-01 2.145e+02-2.704e-12 1.242e-11 7.822e-11 1.433e-10 1.818e-10 1.913e-10 1.760e-10 1.349e-10 5.749e-11
  3  4  2 0.000e+00 2.499e-01 2.915e+02-6.370e-12 1.294e-11 6.796e-11 1.184e-10 1.470e-10 1.534e-10 1.401e-10 1.069e-10 4.349e-11
 -1
%filename: ca_17.psplups
%rates: Ryans R.S.I., Foster-Woods V.J, Copeland F., Keenan F.P., Matthews A., Reid R.H.G., 1998, ADNDT 70, 179-229
%energies: Bhatia A.K., Feldman U., Seely J.F., 1986, ADNDT 35, 449-472
%comment: Fits valid for temperatures 1e6 to 2e8 K.
%produced as part of the Arcetri/Cambridge/NRL 'CHIANTI' atomic data base collaboration
%


### `.elvlc` Files

In [5]:
dtypes = [np.int,str,np.int,str,np.float,np.float,np.float]
headings = ['level index','configuration','multiplicity',
            'orbital angular momentum','total angular momentum',
            'observed energy','theoretical energy']
elvlc_header_info = [(h,d) for h,d in zip(headings,dtypes)]

In [6]:
elvlc_df = parse_table('h_1','elvlc',elvlc_header_info)    

In [7]:
elvlc_df

Unnamed: 0,level index,configuration,multiplicity,orbital angular momentum,total angular momentum,observed energy,theoretical energy
0,1,1s,2,S,0.5,0.0,0.0
1,2,2s,2,S,0.5,82258.956,82303.0
2,3,2p,2,P,0.5,82258.921,82303.0
3,4,2p,2,P,1.5,82259.287,82303.0
4,5,3s,2,S,0.5,97492.224,97544.0
5,6,3p,2,P,0.5,97492.213,97544.0
6,7,3p,2,P,1.5,97492.321,97544.0
7,8,3d,2,D,1.5,97492.321,97544.0
8,9,3d,2,D,2.5,97492.357,97544.0
9,10,4s,2,S,0.5,102823.855,102879.0


### `.easplom` Files

In [8]:
dtypes = [int,int,int,float,float,float,'object']
headings = ['lower level index','upper level index','Burgess-Tully scaling type','Gaunt factor','delta energy',
            'Burgess-Tully scaling parameter','Burgess-Tully scaled cross-section']
easplom_header_info = [(h,d) for h,d in zip(headings,dtypes)]

In [9]:
def easplom_splups_preprocessor(table,line,index):
    line = list(filter(None,('      '.join(line)).split()))
    scaled_cs = np.array(line[8:],dtype=float)
    row = line[2:8] + [scaled_cs]
    table.append(row)

In [11]:
easplom_df = parse_table('fe_6','easplom',easplom_header_info,preprocessor=easplom_splups_preprocessor)

In [12]:
easplom_df

Unnamed: 0,lower level,upper level,Burgess-Tully scaling type,Gaunt factor,delta energy,Bethe coefficient,Burgess-Tully scaled cross-section
0,1,2,2,0.0,7.633,1.2,"[0.1138, 0.1354, 0.157, 0.1812, 0.2265]"
1,1,3,4,0.3533,7.86,1.3,"[1.225, 0.9187, 0.6202, 0.3265, 0.04335]"
2,1,4,4,0.03593,9.028,1.6,"[0.1132, 0.09183, 0.07065, 0.04493, 0.004365]"
3,1,5,4,8e-06,10.41,1.8,"[0.01005, 0.009518, 0.008805, 0.006223, 3.055e..."
4,1,6,4,0.2973,9.017,1.8,"[0.2545, 0.1938, 0.1444, 0.0948, 0.03355]"
5,1,7,4,0.1625,10.49,1.6,"[0.08102, 0.0619, 0.04555, 0.03028, 0.01549]"
6,1,8,4,0.2093,9.777,1.6,"[0.1767, 0.1302, 0.09287, 0.06075, 0.02225]"
7,1,9,1,0.1796,11.41,1.6,"[0.01348, 0.01133, 0.01055, 0.01103, 0.01571]"
8,1,10,4,0.2022,10.2,1.6,"[0.1198, 0.08732, 0.06112, 0.03855, 0.01977]"


In [13]:
%%bash
head -n 10 $XUVTOP/fe/fe_6/fe_6.easplom

 26  6  1  2  2 0.000e+00 7.633e+00 1.200e+00 1.138e-01 1.354e-01 1.570e-01 1.812e-01 2.265e-01
 26  6  1  3  4 3.533e-01 7.860e+00 1.300e+00 1.225e+00 9.187e-01 6.202e-01 3.265e-01 4.335e-02
 26  6  1  4  4 3.593e-02 9.028e+00 1.600e+00 1.132e-01 9.183e-02 7.065e-02 4.493e-02 4.365e-03
 26  6  1  5  4 7.989e-06 1.041e+01 1.800e+00 1.005e-02 9.518e-03 8.805e-03 6.223e-03 3.055e-05
 26  6  1  6  4 2.973e-01 9.017e+00 1.800e+00 2.545e-01 1.938e-01 1.444e-01 9.480e-02 3.355e-02
 26  6  1  7  4 1.625e-01 1.049e+01 1.600e+00 8.102e-02 6.190e-02 4.555e-02 3.028e-02 1.549e-02
 26  6  1  8  4 2.093e-01 9.777e+00 1.600e+00 1.767e-01 1.302e-01 9.287e-02 6.075e-02 2.225e-02
 26  6  1  9  1 1.796e-01 1.141e+01 1.600e+00 1.348e-02 1.133e-02 1.055e-02 1.103e-02 1.571e-02
 26  6  1 10  4 2.022e-01 1.020e+01 1.600e+00 1.198e-01 8.732e-02 6.112e-02 3.855e-02 1.977e-02
-1


### `.easplups` Files

In [14]:
dtypes = [int,int,int,float,float,float,'object']
headings = ['lower level index','upper level index','Burgess-Tully scaling type','Gaunt factor','delta energy',
            'upsilon coefficient','excitation-autoionization rate coefficients']
easplups_header_info = [(h,d) for h,d in zip(headings,dtypes)]

In [15]:
easplups_df = parse_table('fe_6','easplups',easplups_header_info,preprocessor=easplom_splups_preprocessor)

In [16]:
easplups_df

Unnamed: 0,lower level,upper level,Burgess-Tully scaling type,Gaunt factor,delta energy,upsilon coefficient,excitation-autoionization rate coefficients
0,1,2,2,0.0,7.633,1.8,"[0.1138, 0.1354, 0.157, 0.1812, 0.2265]"
1,1,3,4,0.3533,7.86,1.3,"[1.225, 0.9187, 0.6202, 0.3265, 0.04335]"
2,1,4,4,0.03593,9.028,2.0,"[0.1132, 0.09183, 0.07065, 0.04493, 0.004365]"
3,1,5,4,8e-06,10.41,2.4,"[0.01005, 0.009518, 0.008805, 0.006223, 3.055e..."
4,1,6,4,0.2973,9.017,2.0,"[0.2545, 0.1938, 0.1444, 0.0948, 0.03355]"
5,1,7,4,0.1625,10.49,1.6,"[0.08102, 0.0619, 0.04555, 0.03028, 0.01549]"
6,1,8,4,0.2093,9.777,1.7,"[0.1767, 0.1302, 0.09287, 0.06075, 0.02225]"
7,1,9,1,0.1796,11.41,2.2,"[0.01348, 0.01133, 0.01055, 0.01103, 0.01571]"
8,1,10,4,0.2022,10.2,1.6,"[0.1198, 0.08732, 0.06112, 0.03855, 0.01977]"


In [17]:
%%bash
head -n 10 $XUVTOP/fe/fe_6/fe_6.easplups

 26  6  1  2  2 0.000e+00 7.633e+00 1.800e+00 1.138e-01 1.354e-01 1.570e-01 1.812e-01 2.265e-01
 26  6  1  3  4 3.533e-01 7.860e+00 1.300e+00 1.225e+00 9.187e-01 6.202e-01 3.265e-01 4.335e-02
 26  6  1  4  4 3.593e-02 9.028e+00 2.000e+00 1.132e-01 9.183e-02 7.065e-02 4.493e-02 4.365e-03
 26  6  1  5  4 7.989e-06 1.041e+01 2.400e+00 1.005e-02 9.518e-03 8.805e-03 6.223e-03 3.055e-05
 26  6  1  6  4 2.973e-01 9.017e+00 2.000e+00 2.545e-01 1.938e-01 1.444e-01 9.480e-02 3.355e-02
 26  6  1  7  4 1.625e-01 1.049e+01 1.600e+00 8.102e-02 6.190e-02 4.555e-02 3.028e-02 1.549e-02
 26  6  1  8  4 2.093e-01 9.777e+00 1.700e+00 1.767e-01 1.302e-01 9.287e-02 6.075e-02 2.225e-02
 26  6  1  9  1 1.796e-01 1.141e+01 2.200e+00 1.348e-02 1.133e-02 1.055e-02 1.103e-02 1.571e-02
 26  6  1 10  4 2.022e-01 1.020e+01 1.600e+00 1.198e-01 8.732e-02 6.112e-02 3.855e-02 1.977e-02
-1


### `.fblvl` Files

In [18]:
dtypes = [int,str,int,int,str,int,float,float]
headings = ['level index','configuration','principal quantum number','azimuthal quantum number',
            'orbital angular momentum','multiplicity',
            'observed energy','theoretical energy']
fblvl_header_info = [(h,d) for h,d in zip(headings,dtypes)]

In [19]:
fblvl_df = parse_table('ar_16','fblvl',fblvl_header_info)

In [20]:
fblvl_df

Unnamed: 0,level index,configuration,principal quantum number,azimuthal quantum number,orbital angular momentum,multiplicity,observed energy,theoretical energy
0,1,1s22s,2,0,s,2,0.0,0.0
1,2,1s22p,2,1,p,6,274026.667,274039.521
2,3,1s23s,3,0,s,2,4176024.0,4177011.75
3,4,1s23p,3,1,p,6,4244333.333,4253472.333
4,5,1s23d,3,2,d,10,4282560.0,4284798.4
5,6,1s24s,4,0,s,2,5605760.0,5606934.0
6,7,1s24p,4,1,p,6,5636873.333,5638415.833
7,8,1s24d,4,2,d,10,5649608.0,5651632.8
8,9,1s24f,4,3,f,14,0.0,5653632.857
9,10,1s25s,5,0,s,2,6259500.0,6261525.5


In [21]:
%%bash
head -n 30 $XUVTOP/h/h_1/h_1.fblvl

    1                  1s    1    0  s    2               0.000               0.000
    2                  2s    2    0  s    2           82258.956           82303.000
    3                  2p    2    1  p    6           82259.165           82303.000
    4                  3s    3    0  s    2           97492.224           97544.000
    5                  3p    3    1  p    6           97492.285           97544.000
    6                  3d    3    2  d   10           97492.343           97544.000
    7                  4s    4    0  s    2          102823.855          102879.000
    8                  4p    4    1  p    6          102823.881          102879.000
    9                  4d    4    2  d   10          102823.905          102879.000
   10                  4f    4    3  f   14          102823.916          102879.000
   11                  5s    5    0  s    2          105291.633          105348.000
   12                  5p    5    1  p    6          105291.646          105

### `.wgfa` Files

In [22]:
dtypes = [np.int,np.int,np.float,np.float,np.float,
          str,np.int,str,np.float,
          str,np.int,str,np.float]
headings = ['lower level index','upper level index',
            'transition wavelength','oscillator strength','radiative decay rate',
            'lower level configuration','lower level multiplicity','lower level orbital angular momentum',
            'lower level total angular momentum',
            'upper level configuration','upper level multiplicity','upper level orbital angular momentum',
            'upper level total angular momentum']
wgfa_header_info = [(h,d) for h,d in zip(headings,dtypes)]

In [23]:
def wgfa_preprocessor(table,line,index):
    ### lower ###
    tmp = line[-2].strip().split()
    del tmp[-1] # delete rogue dash
    tmp_pretty = tmp[-1]
    config = ' '.join(tmp[:-1])
    mult = tmp_pretty[0]
    orb = tmp_pretty[1]
    frac = tmp_pretty[2:]
    if len(frac) == 1:
        frac = frac[0]
    else:
        frac = float(frac.split('/')[0])/float(frac.split('/')[-1])
    lower = [config,mult,orb,frac] 
    ### upper ###
    tmp = line[-1].strip().split()
    tmp_pretty = tmp[-1]
    config = ' '.join(tmp[:-1])
    mult = tmp_pretty[0]
    orb = tmp_pretty[1]
    frac = tmp_pretty[2:]
    if len(frac) == 1:
        frac = frac[0]
    else:
        frac = float(frac.split('/')[0])/float(frac.split('/')[-1])
    upper = [config,mult,orb,frac] 
    ### recombine and assemble ###
    table.append(line[:-2] + lower + upper)

In [24]:
wgfa_df = parse_table('he_2','wgfa',wgfa_header_info,preprocessor=wgfa_preprocessor)

In [25]:
wgfa_df

Unnamed: 0,lower level index,upper level index,transition wavelength,oscillator strength,radiative decay rate,lower level configuration,lower level multiplicity,lower level orbital angular momentum,lower level total angular momentum,upper level configuration,upper level multiplicity,upper level orbital angular momentum,upper level total angular momentum
0,1,2,0.000,0.000000,5.266000e+02,1s,2,S,0.5,2s,2,S,0.5
1,1,3,303.786,0.277200,1.002000e+10,1s,2,S,0.5,2p,2,P,0.5
2,1,4,303.781,0.555200,1.003000e+10,1s,2,S,0.5,2p,2,P,1.5
3,1,6,256.318,0.052680,2.675000e+09,1s,2,S,0.5,3p,2,P,0.5
4,1,7,256.317,0.105500,2.679000e+09,1s,2,S,0.5,3p,2,P,1.5
5,1,11,243.027,0.019310,1.090000e+09,1s,2,S,0.5,4p,2,P,0.5
6,1,12,243.027,0.038670,1.092000e+09,1s,2,S,0.5,4p,2,P,1.5
7,1,18,237.331,0.009284,5.498000e+08,1s,2,S,0.5,5p,2,P,0.5
8,1,19,237.331,0.018600,5.506000e+08,1s,2,S,0.5,5p,2,P,1.5
9,2,6,1640.394,0.289600,3.590000e+08,2s,2,S,0.5,3p,2,P,0.5


In [26]:
%%bash
head -n 100 $XUVTOP/fe/fe_3/fe_3.wgfa

    1    2        229253.      0.000e+00      2.880e-03       3s2 3p6 3d6 5D4 -        3s2 3p6 3d6 5D3
    2    3        330360.      0.000e+00      1.790e-03       3s2 3p6 3d6 5D3 -        3s2 3p6 3d6 5D2
    3    4        516796.      0.000e+00      6.830e-04       3s2 3p6 3d6 5D2 -        3s2 3p6 3d6 5D1
    4    5       1053740.      0.000e+00      1.380e-04       3s2 3p6 3d6 5D1 -        3s2 3p6 3d6 5D0
    4    6       5413.482      0.000e+00      3.900e-02       3s2 3p6 3d6 5D1 -        3s2 3p6 3d6 3P2
    2    6       5271.870      0.000e+00      4.270e-01       3s2 3p6 3d6 5D3 -        3s2 3p6 3d6 3P2
    5    6       5441.437      0.000e+00      2.600e-05       3s2 3p6 3d6 5D0 -        3s2 3p6 3d6 3P2
    3    6       5357.363      0.000e+00      3.350e-05       3s2 3p6 3d6 5D2 -        3s2 3p6 3d6 3P2
    6    7         77906.      0.000e+00      4.770e-02       3s2 3p6 3d6 3P2 -        3s2 3p6 3d6 3P1
    4    7       5061.753      0.000e+00      1.210e-04       3s2 3p6 3d6