In [None]:
fields = ['ITERATION', 'ENERGY', '1e-ENERGY', '2e-ENERGY', 'NORM[dD(SAO)]', 'TOL',
          'Exc', 'N', 'Norm[diis error]']

from fortranformat import FortranRecordReader

FortranRecordReader('(I4,F18.11,F17.7,F17.7,E13.3,E10.2)').read(lines[4])

In [None]:
raw = '''
 
                                              current damping :  0.650
 ITERATION  ENERGY          1e-ENERGY        2e-ENERGY     NORM[dD(SAO)]  TOL
   5  -800.90831057908    -3344.4859460     1407.4390040    0.316D+00 0.167D-10
                            Exc =  -109.654430018853     N = 124.00120409    
          Norm of current diis error: 0.35473    
          max. resid. norm for Fia-block=  1.632D-02 for orbital     56a         
          max. resid. fock norm         =  5.640D-02 for orbital    722a         
          irrep a   : virtual orbitals shifted by    0.09398
 mo-orthogonalization: Cholesky decomposition
          Delta Eig. =    13.9367250550 eV 
 
'''

In [2]:
from io import StringIO

with open('data/aoforce.out') as f:
    raw = StringIO(f.read())

In [None]:
anchor_txt = '''

          ---------------------------------------------------
          NORMAL MODES and VIBRATIONAL FREQUENCIES (cm**(-1))
          ---------------------------------------------------

'''

In [19]:
from io import StringIO
from pyparsing import Word, nums, Literal, LineStart, LineEnd, OneOrMore
from fortranformat import FortranRecordReader
import pandas as pd

In [33]:
class FortranLineParser(object):
    def __init__(self, pattern, name=None, after_read_hook=None):
        self._reader = FortranRecordReader(pattern)
        self.name = name
        self._after_read_hook = after_read_hook
    
    def __call__(self, line):
        data = self._reader.read(line)
        return self._after_read_hook(data)

In [34]:
class BaseParser(object):
    def __init__(self, raw):
        self.raw = StringIO(raw)
    
    def _scan_forward(self, anchor, before_match=False):
        loc = self.raw.tell()
        scanner = anchor.scanString(self.raw.read())
        match, start, end = next(scanner)
        scanner.close()
        if before_match:
            self.raw.seek(loc + start)
        else:
            self.raw.seek(loc + end)
    
    def _next_content_line(self, skip=0):
        while True:
            line = self.raw.readline()
            if line is '':
                raise RuntimeError('EOF reached')
            if line.strip() is not '':
                if skip > 0:
                    skip -= 1
                else:
                    return line

In [35]:
class VibrSpectrum(BaseParser):
    _anchors = {
        'MAIN': LineStart() + Word('-') + Literal('NORMAL MODES and VIBRATIONAL FREQUENCIES (cm**(-1))') + Word('-') + LineEnd(),
        'MODE': LineStart() + Literal('mode') + OneOrMore(Word(nums)) + LineEnd(),
    }
    
    _parser = {
        'MODE': FortranRecordReader('(A20,6I9)'),
        'FREQUENCY': FortranRecordReader('(A20,6F9.2)'),
        'IR': FortranRecordReader('(A20,6A9)'),
    }
    
    def __init__(self, raw, natoms):
        self.raw = StringIO(raw)
        self.natoms = natoms
        self.nmodes = natoms * 3
        self._data = pd.DataFrame(columns=['MODE', 'FREQUENCY', 'IR'])
        self._parse(self._data)
    
    def _chunks(self, sequence, n):
        """Yield successive n-sized chunks from sequence."""
        for i in range(0, len(sequence), n):
            yield sequence[i:i+n]
    
    def _parse(self, df):
        NCOLS = 6
        self._scan_forward(VibrSpectrum._anchors['MAIN'])
        data = {
            'MODE': [],
            'FREQUENCY': [],
            'IR': []
        }
        for chunk in self._chunks(range(self.nmodes), NCOLS):
            modes, frequencies, ir = self._parse_block(chunk)
            data['MODE'].extend(modes)
            data['FREQUENCY'].extend(frequencies)
            data['IR'].extend(ir)
        print(data['MODE'])
        print(data['FREQUENCY'])
    
    def _parse_block(self, mode_indices):
        self._scan_forward(VibrSpectrum._anchors['MODE'], before_match=True)
        line = self._next_content_line()
        modes = self._parser['MODE'].read(line)
        line = self._next_content_line()
        frequencies = self._parser['FREQUENCY'].read(line)
        line = self._next_content_line(skip=1)
        ir = self._parser['IR'].read(line)
        ir = [_.strip() for _ in ir]
        return modes[1:], frequencies[1:], ir[1:]
        

In [36]:
raw.seek(0)
VibrSpectrum(raw.getvalue(), 24)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 35.16, 75.53, 126.0, 171.29, 211.05, 242.74, 295.71, 333.81, 370.87, 398.44, 447.83, 460.96, 523.95, 546.84, 563.81, 619.54, 684.43, 710.89, 714.52, 746.39, 757.28, 805.3, 818.05, 835.24, 862.9, 866.23, 885.6, 897.48, 974.85, 1011.22, 1032.92, 1076.42, 1081.26, 1112.5, 1130.46, 1149.67, 1197.74, 1214.75, 1239.26, 1282.01, 1305.85, 1329.32, 1345.01, 1352.74, 1363.87, 1412.14, 1426.91, 1438.7, 1443.36, 1481.8, 1544.63, 1570.58, 1615.63, 1616.79, 2944.5, 2965.81, 2987.14, 3011.15, 3098.67, 3103.94, 3114.37, 3126.34, 3175.83, 3388.75, 3472.13, 3588.63]


<__main__.VibrSpectrum at 0x7f41b3e7c668>

In [None]:
sample = '''
adfasdfasdfasdfasdfasdf fasdf sdf dfasdf dfasd sdfasdf fasdf f asdfsadfdsf


       mode              61       62       63       64       65       66

     frequency        2944.50  2965.81  2987.14  3011.15  3098.67  3103.94

     symmetry            a        a        a        a        a        a   

        IR               YES      YES      YES      -        YES      YES
|dDIP/dQ|   (a.u.)     0.0044   0.0044   0.0040   0.0051   0.0013   0.0012
intensity (km/mol)      34.90    34.99    28.78    47.10     2.92     2.56
intensity (  %   )      17.49    17.53    14.42    23.60     1.46     1.28
 
       RAMAN             YES      YES      YES      YES      YES      YES

'''

raw_block = StringIO(sample)
lines = raw_block.readlines()

In [None]:
mode_anchor = LineStart() + Literal('mode') + Word(nums) * 6 + LineEnd()

In [None]:
mode_anchor.searchString(sample)

In [None]:
frequncy_line = FortranRecordReader('(A20,6F9.2)')
symmetry_line = FortranRecordReader('(A20,6A9)')
IR_line = FortranRecordReader('(A20,6A9)')

In [None]:
frequncy_line.read(lines[6])

In [None]:
import re

In [None]:
label = 'mode', pattern = '\d+', n = 6, converter = int

match = re.search(r'^\s+mode((\s+\d+){6})\s+$', sample, re.MULTILINE)
[int(mode_id) for mode_id in match.groups(0)[0].split()]

In [None]:
match = re.search(r'^\s+frequency((\s+[\d.]+){6})\s+$', sample, re.MULTILINE)
[float(freq) for freq in match.groups(0)[0].split()]

In [None]:
match = re.search(r'^\s+IR((\s+(YES)|(-)){6}\s*$)', sample, re.MULTILINE)
match
#[float(freq) for freq in match.groups(0)[0].split()]

In [None]:
from pyparsing import Word, nums, Literal, Or

In [None]:
Integer = Word(nums).setParseAction(lambda x: int(x[0]))
Float = Word(nums+'.')
YesNo = Or(['-', 'YES'])

In [None]:
ir_activity = Literal('IR') + (YesNo * 6)
mode_label = Literal('mode') + (Integer('is') * 3) + (Integer('iis') * 3)

In [None]:
res = ir_activity.scanString(sample)

In [None]:
next(res)