In [None]:
fields = ['ITERATION', 'ENERGY', '1e-ENERGY', '2e-ENERGY', 'NORM[dD(SAO)]', 'TOL',
          'Exc', 'N', 'Norm[diis error]']

from fortranformat import FortranRecordReader

FortranRecordReader('(I4,F18.11,F17.7,F17.7,E13.3,E10.2)').read(lines[4])

In [None]:
raw = '''
 
                                              current damping :  0.650
 ITERATION  ENERGY          1e-ENERGY        2e-ENERGY     NORM[dD(SAO)]  TOL
   5  -800.90831057908    -3344.4859460     1407.4390040    0.316D+00 0.167D-10
                            Exc =  -109.654430018853     N = 124.00120409    
          Norm of current diis error: 0.35473    
          max. resid. norm for Fia-block=  1.632D-02 for orbital     56a         
          max. resid. fock norm         =  5.640D-02 for orbital    722a         
          irrep a   : virtual orbitals shifted by    0.09398
 mo-orthogonalization: Cholesky decomposition
          Delta Eig. =    13.9367250550 eV 
 
'''

In [1]:
from io import StringIO

with open('data/aoforce.out') as f:
    raw = StringIO(f.read())

In [5]:
anchor_txt = '''

          ---------------------------------------------------
          NORMAL MODES and VIBRATIONAL FREQUENCIES (cm**(-1))
          ---------------------------------------------------

'''

In [100]:
from io import StringIO
from pyparsing import Word, nums, Literal, LineStart, LineEnd, OneOrMore
from fortranformat import FortranRecordReader

In [98]:
class VibrSpectrum(object):
    anchor = LineStart() + Word('-') + Literal('NORMAL MODES and VIBRATIONAL FREQUENCIES (cm**(-1))') + Word('-') + LineEnd()
    anchor_mode = LineStart() + Literal('mode') + OneOrMore(Word(nums)) + LineEnd()
    
    def __init__(self, raw, natoms):
        self.raw = StringIO(raw)
        self.natoms = natoms
        self.nmodes = natoms * 3
        self._parse()
    
    def _scan_forward(self, anchor, before_match=False):
        loc = self.raw.tell()
        scanner = anchor.scanString(self.raw.read())
        match, start, end = next(scanner)
        print(match)
        scanner.close()
        if before_match:
            self.raw.seek(loc + start)
        else:
            self.raw.seek(loc + end)
    
    def _chunks(self, sequence, n):
        """Yield successive n-sized chunks from sequence."""
        for i in range(0, len(sequence), n):
            yield sequence[i:i+n]
    
    def _parse(self):
        NCOLS = 6
        self._scan_forward(VibrSpectrum.anchor)
        for chunk in self._chunks(range(self.nmodes), NCOLS):
            self._parse_block(chunk)
    
    def _parse_block(self, mode_indices):
        self._scan_forward(VibrSpectrum.anchor_mode, before_match=True)
        

In [99]:
raw.seek(0)
VibrSpectrum(raw.getvalue(), 24)

['---------------------------------------------------', 'NORMAL MODES and VIBRATIONAL FREQUENCIES (cm**(-1))', '---------------------------------------------------', '\n']
['mode', '1', '2', '3', '4', '5', '6', '\n']
['mode', '7', '8', '9', '10', '11', '12', '\n']
['mode', '13', '14', '15', '16', '17', '18', '\n']
['mode', '19', '20', '21', '22', '23', '24', '\n']
['mode', '25', '26', '27', '28', '29', '30', '\n']
['mode', '31', '32', '33', '34', '35', '36', '\n']
['mode', '37', '38', '39', '40', '41', '42', '\n']
['mode', '43', '44', '45', '46', '47', '48', '\n']
['mode', '49', '50', '51', '52', '53', '54', '\n']
['mode', '55', '56', '57', '58', '59', '60', '\n']
['mode', '61', '62', '63', '64', '65', '66', '\n']
['mode', '67', '68', '69', '70', '71', '72', '\n']


<__main__.VibrSpectrum at 0x7fcdca68eef0>

In [68]:
sample = '''
adfasdfasdfasdfasdfasdf fasdf sdf dfasdf dfasd sdfasdf fasdf f asdfsadfdsf


       mode              61       62       63       64       65       66

     frequency        2944.50  2965.81  2987.14  3011.15  3098.67  3103.94

     symmetry            a        a        a        a        a        a   

        IR               YES      YES      YES      -        YES      YES
|dDIP/dQ|   (a.u.)     0.0044   0.0044   0.0040   0.0051   0.0013   0.0012
intensity (km/mol)      34.90    34.99    28.78    47.10     2.92     2.56
intensity (  %   )      17.49    17.53    14.42    23.60     1.46     1.28
 
       RAMAN             YES      YES      YES      YES      YES      YES

'''

raw_block = StringIO(sample)
lines = raw_block.readlines()

In [76]:
mode_anchor = LineStart() + Literal('mode') + Word(nums) * 6 + LineEnd()

In [77]:
mode_anchor.searchString(sample)

([(['mode', '61', '62', '63', '64', '65', '66', '\n'], {})], {})

In [41]:
frequncy_line = FortranRecordReader('(A20,6F9.2)')
symmetry_line = FortranRecordReader('(A20,6A9)')
IR_line = FortranRecordReader('(A20,6A9)')

In [42]:
frequncy_line.read(lines[6])

['     frequency      ', 2944.5, 2965.81, 2987.14, 3011.15, 3098.67, 3103.94]

In [5]:
import re

In [15]:
label = 'mode', pattern = '\d+', n = 6, converter = int

match = re.search(r'^\s+mode((\s+\d+){6})\s+$', sample, re.MULTILINE)
[int(mode_id) for mode_id in match.groups(0)[0].split()]

In [23]:
match = re.search(r'^\s+frequency((\s+[\d.]+){6})\s+$', sample, re.MULTILINE)
[float(freq) for freq in match.groups(0)[0].split()]

[2944.5, 2965.81, 2987.14, 3011.15, 3098.67, 3103.94]

In [40]:
match = re.search(r'^\s+IR((\s+(YES)|(-)){6}\s*$)', sample, re.MULTILINE)
match
#[float(freq) for freq in match.groups(0)[0].split()]

In [44]:
from pyparsing import Word, nums, Literal, Or

In [62]:
Integer = Word(nums).setParseAction(lambda x: int(x[0]))
Float = Word(nums+'.')
YesNo = Or(['-', 'YES'])

In [68]:
ir_activity = Literal('IR') + (YesNo * 6)
mode_label = Literal('mode') + (Integer('is') * 3) + (Integer('iis') * 3)

In [77]:
res = ir_activity.scanString(sample)

In [79]:
next(res)

((['IR', 'YES', 'YES', 'YES', '-', 'YES', 'YES'], {}), 236, 301)