Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix issue #15 by change regexp on _MULTI_QUOTE_HDR_REGEX #48

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions email_reply_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ class EmailMessage(object):
"""

SIG_REGEX = re.compile(r'(--|__|-\w)|(^Sent from my (\w+\s*){1,3})')
QUOTE_HDR_REGEX = re.compile('On.*wrote:$')
QUOTE_HDR_REGEX = re.compile('On\s((?!\sOn\s).)+wrote:\s*$')
QUOTED_REGEX = re.compile(r'(>+)')
HEADER_REGEX = re.compile(r'^\*?(From|Sent|To|Subject):\*? .+')
_MULTI_QUOTE_HDR_REGEX = r'(?!On.*On\s.+?wrote:)(On\s(.+?)wrote:)'
_MULTI_QUOTE_HDR_REGEX =r'(\bOn\s((?!\bOn\b).){2,100}wrote:)'
MULTI_QUOTE_HDR_REGEX = re.compile(_MULTI_QUOTE_HDR_REGEX, re.DOTALL | re.MULTILINE)
MULTI_QUOTE_HDR_REGEX_MULTILINE = re.compile(_MULTI_QUOTE_HDR_REGEX, re.DOTALL)

Expand Down
15 changes: 15 additions & 0 deletions test/emails/email_issue_15.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
And this is a response to the test response.
On function On wrote:

On Fri, Jan 5, 2018 at 12:34 PM, Adam Taylor <sampleaddress@example.com>
wrote:

> And this is a test response.
>
> On Fri, Jan 5, 2018 at 12:34 PM, Adam Taylor <sampleaddress@example.com>
> wrote:
>
>> This is just a test.
>>
>
>
38 changes: 37 additions & 1 deletion test/test_email_reply_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from email_reply_parser import EmailReplyParser
from email_reply_parser import EmailMessage



class EmailMessageTest(unittest.TestCase):
Expand Down Expand Up @@ -196,6 +198,40 @@ def get_email(self, name):
text = f.read()
return EmailReplyParser.read(text)


def test_issue_15(self):
message = self.get_email("email_issue_15")
self.assertEqual('And this is a response to the test response.\nOn function On wrote:', message.reply)

def test_multi_quater_reg(self):
test_string='On Fri, Jan 5, 2018 at 12:39 PM, Adam Taylor <sampleaddress@example.com>\n\
wrote: > And this is a test response.\n\
>\n\
> On Fri, Jan 5, 2018 at 12:34 PM, Adam Taylor <sampleaddress@example.com>\n\
> wrote:'
wrong_result='On Fri, Jan 5, 2018 at 12:34 PM, Adam Taylor <sampleaddress@example.com>\n\
> wrote:'
right_result='On Fri, Jan 5, 2018 at 12:39 PM, Adam Taylor <sampleaddress@example.com>\n\
wrote:'
#the original reg,find the second reply which is 'at 12:34 PM'
_MULTI_QUOTE_HDR_REGEX =r'(?!On.*On\s.+?wrote:)(On\s(.+?)wrote:)'
MULTI_QUOTE_HDR_REGEX = re.compile(_MULTI_QUOTE_HDR_REGEX, re.DOTALL | re.MULTILINE)
is_multi_quote_header = MULTI_QUOTE_HDR_REGEX.search(test_string)
self.assertEqual(wrong_result,is_multi_quote_header.groups()[0])

#the new reg,find the first reply
# r'(\bOn\s((?!\bOn\b).){2,100}wrote:)'
NEW_MULTI_QUOTE_HDR_REGEX=re.compile(EmailMessage._MULTI_QUOTE_HDR_REGEX, re.DOTALL | re.MULTILINE)
self.assertEqual(right_result,NEW_MULTI_QUOTE_HDR_REGEX.search(test_string).groups()[0])

#test greedy case, don't get 'On functiOn On wrote:\n'
greedy_string=' '.join(['On functiOn On wrote:\n',test_string])
self.assertEqual(wrong_result,MULTI_QUOTE_HDR_REGEX.search(greedy_string).groups()[0])
self.assertEqual(right_result,NEW_MULTI_QUOTE_HDR_REGEX.search(greedy_string).groups()[0])

#test On, On
greedy_string_more=' '.join([' On functiOn with an other On, On wrote: ',test_string])
self.assertEqual(wrong_result,MULTI_QUOTE_HDR_REGEX.search(greedy_string_more).groups()[0])
self.assertEqual(right_result,NEW_MULTI_QUOTE_HDR_REGEX.search(greedy_string_more).groups()[0])

if __name__ == '__main__':
unittest.main()