In [1]:
import pandas as pd
import numpy as np
from io import StringIO
import matplotlib.pyplot as plt
%matplotlib inline

from ipywidgets import FloatProgress
from IPython.display import display

Open cov data, set group constant based on the data

In [2]:
f = open('scale.rev08.252groupcov7.1.new.toc', 'r')

In [3]:
_COV_GROUPS = 252

Some helper functions

In [4]:
def parse_material_line(line):
    """
    Parses a line like
     Material 1= 1013027 reaction 1= 2, Material 2= 1013027 reaction 2= 2
    for mat # and rx #
    """
    parts = line.strip().split()
    #
    mat_id_1 = int(parts[2])
    rx_1 = int(parts[5].rstrip(','))
    
    mat_id_2 = int(parts[8])
    rx_2 = int(parts[11])
    
    return mat_id_1, rx_1, mat_id_2, rx_2


In [5]:
def read_corr_mat(f):
    """
    Reads lines in f generating a corr matrix by parsing the found corr lines
    """
    
    df = pd.DataFrame(index=range(1, 1+_COV_GROUPS), columns=range(1, 1+_COV_GROUPS))
    
    for line in f:
        if 'column' in line:
            # skip this line that only contains column
            header = [int(i) for i in f.readline().split()]
            # skip the next line that is ---- ---- (seperators)
            f.readline()
            # read all the cov group for this chunk of data
            
            cov_lines = [f.readline() for i in range(_COV_GROUPS)]
            str_line = StringIO(' '.join(cov_lines))
            _temp_df = pd.read_csv(str_line, delim_whitespace=True, header=None)
            df.loc[:, header] = _temp_df.iloc[:, 1:].values
            
        if header[-1] == _COV_GROUPS:
            break

    return df
        
    

Header for std dev tables

In [6]:
std_dev_header = ['groups', 'e low', 'e high', 'x-sec(1)', 'x-sec(2)', 'rel.s.d.(1)',  'rel.s.d(2)', 's.d.(1)', 's.d(2)']

Setup hdf5 table to store data in

In [7]:
h = pd.HDFStore('scale_cov_252.h5', 'w')
#h['mat_1/rx_1/with_mat_2/rx_2/std_dev'] = std_dev_df
#h['mat_1/rx_1/with_mat_2/rx_2/corr'] = corr_mat_df

Optionally turn off warnings so we can label variables as #'s which are not python variable names

In [8]:
import warnings
warnings.filterwarnings('ignore')

Parse the tables

In [9]:
prog_bar = FloatProgress(min=0, max=4563 - 1) # number of cov matrix in scale data - 1
display(prog_bar)

for line in f:
    # this outer loop keeps things going until the EOF
    if 'Material' in line:
        mat_id_1, rx_1, mat_id_2, rx_2 = parse_material_line(line)

        # read forward # groups lines
        cov_lines = [f.readline() for i in range(_COV_GROUPS + 1)] # first line is header, include it when reading
        str_line = StringIO(' '.join(cov_lines[1:])) # remove the first line which is header
        std_dev_df = pd.read_csv(str_line, delim_whitespace=True, header=None)
        std_dev_df.columns = std_dev_header
        std_dev_df.set_index('groups', inplace=True)
    
        f.readline() #   *** correlation matrix ***
        f.readline() #  column=material 1     row=material 2
        corr_mat_df = read_corr_mat(f)
        h.put('{0}/{1}/{2}/{3}/std_dev'.format(mat_id_1, rx_1, mat_id_2, rx_2), std_dev_df)
        h.put('{0}/{1}/{2}/{3}/corr'.format(mat_id_1, rx_1, mat_id_2, rx_2), corr_mat_df)
        prog_bar.value += 1


In [10]:
h.close()
f.close()