# Prototyping a Data Storage Model for ChiantiPy

In [2]:
import os

import numpy as np
import pandas
import matplotlib.pyplot as plt
import h5py
import astropy.units as u
#import ChiantiPy.tools.util as ch_util
#import ChiantiPy.tools.io as ch_io
#import ChiantiPy.core as ch
import fiasco

%matplotlib inline

## Data Access Model

CHIANTI has several file formats that it stores for each ion. The most notable are,

* `.elvlc`: energy levels (in cm$^{-1}$) with additional level configuration
* `.wgfa`: wavelengths, oscillator strengths, and Einstein A coefficients for the transitions
* `.scups`: temperatures and effective collision strenghts for each transition. Replaces the old `.splups` files. There are also still `.psplups` files
* Additional files:
  * `.fblvl`: information for calculating free-bound continuum
  * `.cilvl`, `.reclvl`: ionization and recombination rates

Essentially, we want to have a property for each of these files. Each of these properties returns an object with a `__getitem__` method that takes in the keys associated with each of these files. These objects return the relevant data streamed out of the HDF5 file.

Ideally, this file would be built once the first time you download ChiantiPy and then only rebuilt when your installed CHIANTI database gets updated. The filename is then stored at the package level. We'll use our CHIANTI database HDF5 file that we've been using in `synthesizAR`.

In [3]:
chianti_hdf5_filename = '/Users/willbarnes/.fiasco/chianti_dbase.h5'

In [56]:
class DataIndexer(object):
    
    def __init__(self,top_level_path):
        self.top_level_path = top_level_path
    
    def __getitem__(self,key):
        with h5py.File(chianti_hdf5_filename,'r') as hf:
            grp = hf[self.top_level_path]
            if key not in grp:
                raise IndexError('{} not a valid dataset for {}'.format(key,self.top_level_path))
            ds = grp[key]
            if ds.attrs['unit'] is 'SKIP':
                data = np.array(ds)
            else:
                data = u.Quantity(ds,ds.attrs['unit'])
        return data
    
    def __repr__(self):
        with h5py.File(chianti_hdf5_filename,'r') as hf:
            grp = hf[self.top_level_path]
            var_names = [(key,grp[key].dtype.str,grp[key].attrs['unit']) 
                         if grp[key].attrs['unit']!='SKIP' else (key,'') for key in grp]
            footer = grp.attrs['footer']
            
        name_strs = '\n'.join(['{} ({}) -- {}'.format(v[0],v[1],v[2]) for v in var_names])
        return '''{top_level_path}
        
Fields
------
{vars_and_units}

Footer
------
{footer}
        '''.format(top_level_path=self.top_level_path,vars_and_units=name_strs,footer=footer)
    

In [57]:
class GenericChiantiData(object):
    
    def __init__(self,ion_name):
        self.ion_name = ion_name
        self.element = ion_name.split('_')[0]
        #self.Z = ch_util.el2z(self.element)
        self.stage = ion_name.split('_')[-1]
        
    @property
    def elvlc(self):
        return DataIndexer('/'.join([self.element,self.ion_name,'elvlc']))
    
    @property
    def wgfa(self):
        return DataIndexer('/'.join([self.element,self.ion_name,'wgfa']))
    

In [58]:
test = GenericChiantiData('h_1')

In [59]:
test.elvlc['observed energy']

<Quantity [      0.   ,  82258.956,  82258.921,  82259.287,  97492.224,
             97492.213,  97492.321,  97492.321,  97492.357, 102823.855,
            102823.851, 102823.896, 102823.896, 102823.911, 102823.911,
            102823.919, 105291.633, 105291.631, 105291.654, 105291.654,
            105291.662, 105291.662, 105291.666, 105291.666, 105291.668] 1 / cm>

In [60]:
test.elvlc

h/h_1/elvlc
        
Fields
------
configuration (|S2) -- 
level index (<i8) -- 
level label (|S1) -- 
multiplicity (<i8) -- 
observed energy (<f8) -- 1 / cm
orbital angular momentum (|S1) -- 
theoretical energy (<f8) -- 1 / cm
total angular momentum (<f8) -- 

Footer
------
%filename: h_1.elvlc
%observed energy levels: Fuhr et al, 1999, NIST Atomic Spectra Database Version 2.0
%produced as part of the Arcetri/Cambridge/NRL 'CHIANTI' atomic data base collaboration
%
%  Ken Dere  May 3 2001

        

In [55]:
with h5py.File(chianti_hdf5_filename,'r') as hf:
    grp = hf['/h/h_1/elvlc']
    var_names = [(key,grp[key].attrs['unit']) if grp[key].attrs['unit']!='SKIP' else (key,'') for key in grp]
    print(grp['level index'].dtype.str)

<i8


In [37]:
var_names

[('configuration', ''),
 ('level index', ''),
 ('level label', ''),
 ('multiplicity', ''),
 ('observed energy', '1 / cm'),
 ('orbital angular momentum', ''),
 ('theoretical energy', '1 / cm'),
 ('total angular momentum', '')]

This is specifically for an ion. We could also implement an even more generic class for the other non-ion-specific datasets, e.g. abundance, ionization potential, miscellaneous continuum data. 

Alternatively, when the CHIANTI HDF5 database is created, these could just be broken up by ion appropriately. This would work except for the continuum data which is maybe a special case anyway. 

Basically, we just want to avoid having to index things over and over again. Better to just refer to it by the ion name.

Since this kind of data is used in quite a few places, we could provide it as a generic object. This also makes the CHIANTI data easily accessible without the baggage of the ion object if users want to extend it in anyway.

In [29]:
print('{} -- {}'.format('\n'.join(['1','2','4'])))

1
2
4
