diff --git a/mt940/models.py b/mt940/models.py index 3f158d4..0bc3944 100644 --- a/mt940/models.py +++ b/mt940/models.py @@ -315,6 +315,49 @@ def strip(cls, lines): if line: yield line + @classmethod + def normalize_tag_id(cls, tag_id): + # Since non-digit tags exist, make the conversion optional + if tag_id.isdigit(): + tag_id = int(tag_id) + + return tag_id + + @classmethod + def sanatize_tag_id_matches(cls, matches): + i_next = 0 + for i, match in enumerate(matches): + # match was rejected + if i < i_next: + continue + + # next match would be + i_next = i + 1 + + # normalize tag id + tag_id = cls.normalize_tag_id(match.group('tag')) + + # tag should be known + assert tag_id in mt940.tags.TAG_BY_ID, 'Unknown tag %r ' \ + 'in line: %r' % (tag_id, match.group(0)) + + # special treatment for long tag content with possible + # bad line wrap which produces tag_id like line beginnings + # seen with :86: tag + if tag_id == mt940.tags.Tags.TRANSACTION_DETAILS.value.id: + # search subsequent tags for unknown tag ids + # these lines likely belong to the previous tag + for j in range(i_next, len(matches)): + next_tag_id = cls.normalize_tag_id(matches[j].group('tag')) + if next_tag_id in mt940.tags.TAG_BY_ID: + # this one is the next valid match + i_next = j + break + # else reject match + + # a valid match + yield match + def parse(self, data): '''Parses mt940 data, expects a string with data @@ -334,16 +377,13 @@ def parse(self, data): re.MULTILINE) matches = list(tag_re.finditer(data)) - for i, match in enumerate(matches): - tag_id = match.group('tag') - # Since non-digit tags exist, make the conversion optional - - if tag_id.isdigit(): - tag_id = int(tag_id) + # identify valid matches + valid_matches = list(self.sanatize_tag_id_matches(matches)) - assert tag_id in mt940.tags.TAG_BY_ID, 'Unknown tag %r ' \ - 'in line: %r' % (tag_id, match.group(0)) + for i, match in enumerate(valid_matches): + tag_id = self.normalize_tag_id(match.group('tag')) + # get tag instance corresponding to tag id tag = mt940.tags.TAG_BY_ID.get(match.group('full_tag')) \ or mt940.tags.TAG_BY_ID[tag_id] @@ -351,8 +391,9 @@ def parse(self, data): # regex matches have a `end()` and `start()` to indicate the start # and end index of the match. - if matches[i + 1:]: - tag_data = data[match.end():matches[i + 1].start()].strip() + if valid_matches[i + 1:]: + tag_data = \ + data[match.end():valid_matches[i + 1].start()].strip() else: tag_data = data[match.end():].strip() diff --git a/tests/self-provided/transaction_details_wrapped.sta b/tests/self-provided/transaction_details_wrapped.sta new file mode 100644 index 0000000..b450135 --- /dev/null +++ b/tests/self-provided/transaction_details_wrapped.sta @@ -0,0 +1,11 @@ + +:20:STARTUMSE +:25:12345678/1020304050 +:28C:00000/001 +:60F:C160229EUR1200,00 +:61:1602300301DR6,00N024NONREF +:86:805?00ENTGELTABSCHLUSS?106666?20Pauschalen?3012345678?1122334 +45566?602017-01-01T13 +:12:11 +:62F:C160301EUR1194,00 +- diff --git a/tests/self-provided/transaction_details_wrapped.yml b/tests/self-provided/transaction_details_wrapped.yml new file mode 100644 index 0000000..7564caf --- /dev/null +++ b/tests/self-provided/transaction_details_wrapped.yml @@ -0,0 +1,91 @@ + &id001 !!python/object:mt940.models.Transactions +data: + account_identification: 12345678/1020304050 + final_closing_balance: !!python/object:mt940.models.Balance + amount: !!python/object:mt940.models.Amount + amount: !!python/object/apply:decimal.Decimal ['1194.00'] + currency: EUR + date: !!python/object/apply:mt940.models.Date + - !!binary | + B+ADAQ== + status: C + final_opening_balance: !!python/object:mt940.models.Balance + amount: !!python/object:mt940.models.Amount + amount: !!python/object/apply:decimal.Decimal ['1200.00'] + currency: EUR + date: !!python/object/apply:mt940.models.Date + - !!binary | + B+ACHQ== + status: C + sequence_number: '001' + statement_number: '00000' + transaction_reference: STARTUMSE +processors: + post_account_identification: [] + post_available_balance: [] + post_closing_balance: [] + post_date_time_indication: [] + post_final_closing_balance: [] + post_final_opening_balance: [] + post_floor_limit_indicator: [] + post_forward_available_balance: [] + post_intermediate_closing_balance: [] + post_intermediate_opening_balance: [] + post_non_swift: [] + post_opening_balance: [] + post_related_reference: [] + post_statement: [!!python/name:mt940.processors.date_cleanup_post_processor ''] + post_statement_number: [] + post_sum_credit_entries: [] + post_sum_debit_entries: [] + post_transaction_details: [!!python/name:mt940.processors.transaction_details_post_processor ''] + post_transaction_reference_number: [] + pre_account_identification: [] + pre_available_balance: [] + pre_closing_balance: [] + pre_date_time_indication: [] + pre_final_closing_balance: [] + pre_final_opening_balance: [] + pre_floor_limit_indicator: [] + pre_forward_available_balance: [] + pre_intermediate_closing_balance: [] + pre_intermediate_opening_balance: [] + pre_non_swift: [] + pre_opening_balance: [] + pre_related_reference: [] + pre_statement: [!!python/name:mt940.processors.date_fixup_pre_processor ''] + pre_statement_number: [] + pre_sum_credit_entries: [] + pre_sum_debit_entries: [] + pre_transaction_details: [] + pre_transaction_reference_number: [] +transactions: +- !!python/object:mt940.models.Transaction + data: + additional_purpose: '2017-01-01T13:12:11' + amount: !!python/object:mt940.models.Amount + amount: !!python/object/apply:decimal.Decimal ['-6.00'] + currency: EUR + applicant_bin: '12345678' + applicant_iban: null + applicant_name: null + bank_reference: null + currency: EUR + customer_reference: NONREF + date: !!python/object/apply:mt940.models.Date + - !!binary | + B+ACHQ== + entry_date: !!python/object/apply:mt940.models.Date + - !!binary | + B+ADAQ== + extra_details: '' + funds_code: R + id: N024 + posting_text: ENTGELTABSCHLUSS + prima_nota: '6666' + purpose: Pauschalen + recipient_name: null + return_debit_notes: null + status: D + transaction_code: '805' + transactions: *id001