diff --git a/email_reply_parser/__init__.py b/email_reply_parser/__init__.py index 063f65b..fbb24cb 100644 --- a/email_reply_parser/__init__.py +++ b/email_reply_parser/__init__.py @@ -36,18 +36,20 @@ class EmailMessage(object): """ An email message represents a parsed email body. """ - SIG_REGEX = re.compile(r'(--|__|-\w)|(^Sent from my (\w+\s*){1,3})') - QUOTE_HDR_REGEX = re.compile('On.*wrote:$') - QUOTED_REGEX = re.compile(r'(>+)') - HEADER_REGEX = re.compile(r'^\*?(From|Sent|To|Subject):\*? .+') - _MULTI_QUOTE_HDR_REGEX = r'(?!On.*On\s.+?wrote:)(On\s(.+?)wrote:)' + SIG_REGEX = re.compile(r"(--|__|-\w)|(^Sent from my (\w+\s*){1,3})") + BLACKBERRY_REGEX = re.compile(r"(--|__|-\w)|(^Sent with Black(\w+\s*){1,3})") + QUOTE_HDR_REGEX = re.compile("On.*wrote:$") + QUOTED_REGEX = re.compile(r"(>+)") + HEADER_REGEX = re.compile(r"^\*?(From|Sent|To|Subject):\*? .+") + ALT_HEADER_REGEX = re.compile(r"^\*{2}?(From|Sent|To|Subject):\*{2}? .+") + _MULTI_QUOTE_HDR_REGEX = r"(?!On.*On\s.+?wrote:)(On\s(.+?)wrote:)" MULTI_QUOTE_HDR_REGEX = re.compile(_MULTI_QUOTE_HDR_REGEX, re.DOTALL | re.MULTILINE) MULTI_QUOTE_HDR_REGEX_MULTILINE = re.compile(_MULTI_QUOTE_HDR_REGEX, re.DOTALL) def __init__(self, text): self.fragments = [] self.fragment = None - self.text = text.replace('\r\n', '\n') + self.text = text.replace("\r\n", "\n") self.found_visible = False def read(self): @@ -61,13 +63,16 @@ def read(self): is_multi_quote_header = self.MULTI_QUOTE_HDR_REGEX_MULTILINE.search(self.text) if is_multi_quote_header: - self.text = self.MULTI_QUOTE_HDR_REGEX.sub(is_multi_quote_header.groups()[0].replace('\n', ''), self.text) + self.text = self.MULTI_QUOTE_HDR_REGEX.sub( + is_multi_quote_header.groups()[0].replace("\n", ""), self.text + ) - # Fix any outlook style replies, with the reply immediately above the signature boundary line + # Fix any outlook style replies, with the reply + # immediately above the signature boundary line # See email_2_2.txt for an example - self.text = re.sub('([^\n])(?=\n ?[_-]{7,})', '\\1\n', self.text, re.MULTILINE) + self.text = re.sub("([^\n])(?=\n ?[_-]{7,})", "\\1\n", self.text, re.MULTILINE) - self.lines = self.text.split('\n') + self.lines = self.text.split("\n") self.lines.reverse() for line in self.lines: @@ -87,7 +92,7 @@ def reply(self): for f in self.fragments: if not (f.hidden or f.quoted): reply.append(f.content) - return '\n'.join(reply) + return "\n".join(reply) def _scan_line(self, line): """ Reviews each line in email message and determines fragment type @@ -96,16 +101,24 @@ def _scan_line(self, line): """ is_quote_header = self.QUOTE_HDR_REGEX.match(line) is not None is_quoted = self.QUOTED_REGEX.match(line) is not None - is_header = is_quote_header or self.HEADER_REGEX.match(line) is not None + is_alt_header = self.ALT_HEADER_REGEX.match(line) is not None + is_header = ( + is_quote_header + or self.HEADER_REGEX.match(line) is not None + or is_alt_header + ) if self.fragment and len(line.strip()) == 0: - if self.SIG_REGEX.match(self.fragment.lines[-1].strip()): + if self.SIG_REGEX.match( + self.fragment.lines[-1].strip() + ) or self.BLACKBERRY_REGEX.match(self.fragment.lines[-1].strip()): self.fragment.signature = True self._finish_fragment() - if self.fragment \ - and ((self.fragment.headers == is_header and self.fragment.quoted == is_quoted) or - (self.fragment.quoted and (is_quote_header or len(line.strip()) == 0))): + if self.fragment and ( + (self.fragment.headers == is_header and self.fragment.quoted == is_quoted) + or (self.fragment.quoted and (is_quote_header or len(line.strip()) == 0)) + ): self.fragment.lines.append(line) else: @@ -128,16 +141,20 @@ def _finish_fragment(self): if self.fragment: self.fragment.finish() if self.fragment.headers: - # Regardless of what's been seen to this point, if we encounter a headers fragment, - # all the previous fragments should be marked hidden and found_visible set to False. + # Regardless of what's been seen to this point, + # if we encounter a headers fragment, + # all the previous fragments should be marked hidden + # and found_visible set to False. self.found_visible = False for f in self.fragments: f.hidden = True if not self.found_visible: - if self.fragment.quoted \ - or self.fragment.headers \ - or self.fragment.signature \ - or (len(self.fragment.content.strip()) == 0): + if ( + self.fragment.quoted + or self.fragment.headers + or self.fragment.signature + or (len(self.fragment.content.strip()) == 0) + ): self.fragment.hidden = True else: @@ -164,7 +181,7 @@ def finish(self): belonging to fragment. """ self.lines.reverse() - self._content = '\n'.join(self.lines) + self._content = "\n".join(self.lines) self.lines = None @property diff --git a/email_reply_parser/version.py b/email_reply_parser/version.py index eaf6e6a..ed8f140 100644 --- a/email_reply_parser/version.py +++ b/email_reply_parser/version.py @@ -1 +1 @@ -VERSION = '0.5.11' +VERSION = "0.5.11" diff --git a/setup.py b/setup.py index 5d3078e..8ca6524 100644 --- a/setup.py +++ b/setup.py @@ -8,22 +8,25 @@ except ImportError: from distutils.core import setup -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'email_reply_parser')) -import version +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "email_reply_parser")) +try: + import version +except Exception as e: + raise setup( - name='email_reply_parser', + name="email_reply_parser", version=version.VERSION, - description='Email reply parser', - packages=['email_reply_parser'], - package_data={'email_reply_parser': ['../VERSION']}, - author='Royce Haynes', - author_email='royce.haynes@gmail.com', - url='https://github.com/zapier/email-reply-parser', - license='MIT', - test_suite='test', + description="Email reply parser", + packages=["email_reply_parser"], + package_data={"email_reply_parser": ["../VERSION"]}, + author="Royce Haynes", + author_email="royce.haynes@gmail.com", + url="https://github.com/zapier/email-reply-parser", + license="MIT", + test_suite="test", classifiers=[ - 'Topic :: Software Development', + "Topic :: Software Development", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", @@ -31,5 +34,5 @@ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", - ] -) \ No newline at end of file + ], +) diff --git a/test/test_email_reply_parser.py b/test/test_email_reply_parser.py index 8d2849b..a3e8434 100644 --- a/test/test_email_reply_parser.py +++ b/test/test_email_reply_parser.py @@ -5,43 +5,40 @@ import time -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) -from email_reply_parser import EmailReplyParser +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +try: + from email_reply_parser import EmailReplyParser +except Exception as e: + raise class EmailMessageTest(unittest.TestCase): def test_simple_body(self): - message = self.get_email('email_1_1') + message = self.get_email("email_1_1") self.assertEqual(3, len(message.fragments)) - self.assertEqual( - [False, True, True], - [f.signature for f in message.fragments] - ) - self.assertEqual( - [False, True, True], - [f.hidden for f in message.fragments] - ) + self.assertEqual([False, True, True], [f.signature for f in message.fragments]) + self.assertEqual([False, True, True], [f.hidden for f in message.fragments]) self.assertTrue("folks" in message.fragments[0].content) self.assertTrue("riak-users" in message.fragments[2].content) def test_reads_bottom_message(self): - message = self.get_email('email_1_2') + message = self.get_email("email_1_2") self.assertEqual(6, len(message.fragments)) self.assertEqual( [False, True, False, True, False, False], - [f.quoted for f in message.fragments] + [f.quoted for f in message.fragments], ) self.assertEqual( [False, False, False, False, False, True], - [f.signature for f in message.fragments] + [f.signature for f in message.fragments], ) self.assertEqual( [False, False, False, True, True, True], - [f.hidden for f in message.fragments] + [f.hidden for f in message.fragments], ) self.assertTrue("Hi," in message.fragments[0].content) @@ -50,152 +47,153 @@ def test_reads_bottom_message(self): self.assertTrue("riak-users" in message.fragments[5].content) def test_reads_inline_replies(self): - message = self.get_email('email_1_8') + message = self.get_email("email_1_8") self.assertEqual(7, len(message.fragments)) self.assertEqual( [True, False, True, False, True, False, False], - [f.quoted for f in message.fragments] + [f.quoted for f in message.fragments], ) self.assertEqual( [False, False, False, False, False, False, True], - [f.signature for f in message.fragments] + [f.signature for f in message.fragments], ) self.assertEqual( [False, False, False, False, True, True, True], - [f.hidden for f in message.fragments] + [f.hidden for f in message.fragments], ) def test_reads_top_post(self): - message = self.get_email('email_1_3') + message = self.get_email("email_1_3") self.assertEqual(5, len(message.fragments)) def test_multiline_reply_headers(self): - message = self.get_email('email_1_6') - self.assertTrue('I get' in message.fragments[0].content) - self.assertTrue('On' in message.fragments[1].content) + message = self.get_email("email_1_6") + self.assertTrue("I get" in message.fragments[0].content) + self.assertTrue("On" in message.fragments[1].content) def test_captures_date_string(self): - message = self.get_email('email_1_4') + message = self.get_email("email_1_4") - self.assertTrue('Awesome' in message.fragments[0].content) - self.assertTrue('On' in message.fragments[1].content) - self.assertTrue('Loader' in message.fragments[1].content) + self.assertTrue("Awesome" in message.fragments[0].content) + self.assertTrue("On" in message.fragments[1].content) + self.assertTrue("Loader" in message.fragments[1].content) def test_complex_body_with_one_fragment(self): - message = self.get_email('email_1_5') + message = self.get_email("email_1_5") self.assertEqual(1, len(message.fragments)) def test_verify_reads_signature_correct(self): - message = self.get_email('correct_sig') + message = self.get_email("correct_sig") self.assertEqual(2, len(message.fragments)) - self.assertEqual( - [False, False], - [f.quoted for f in message.fragments] - ) + self.assertEqual([False, False], [f.quoted for f in message.fragments]) - self.assertEqual( - [False, True], - [f.signature for f in message.fragments] - ) + self.assertEqual([False, True], [f.signature for f in message.fragments]) - self.assertEqual( - [False, True], - [f.hidden for f in message.fragments] - ) + self.assertEqual([False, True], [f.hidden for f in message.fragments]) - self.assertTrue('--' in message.fragments[1].content) + self.assertTrue("--" in message.fragments[1].content) def test_deals_with_windows_line_endings(self): - msg = self.get_email('email_1_7') + msg = self.get_email("email_1_7") - self.assertTrue(':+1:' in msg.fragments[0].content) - self.assertTrue('On' in msg.fragments[1].content) - self.assertTrue('Steps 0-2' in msg.fragments[1].content) + self.assertTrue(":+1:" in msg.fragments[0].content) + self.assertTrue("On" in msg.fragments[1].content) + self.assertTrue("Steps 0-2" in msg.fragments[1].content) def test_reply_is_parsed(self): - message = self.get_email('email_1_2') + message = self.get_email("email_1_2") self.assertTrue("You can list the keys for the bucket" in message.reply) def test_reply_from_gmail(self): - with open('test/emails/email_gmail.txt') as f: - self.assertEqual('This is a test for inbox replying to a github message.', - EmailReplyParser.parse_reply(f.read())) + with open("test/emails/email_gmail.txt") as f: + self.assertEqual( + "This is a test for inbox replying to a github message.", + EmailReplyParser.parse_reply(f.read()), + ) def test_parse_out_just_top_for_outlook_reply(self): - with open('test/emails/email_2_1.txt') as f: - self.assertEqual("Outlook with a reply", EmailReplyParser.parse_reply(f.read())) + with open("test/emails/email_2_1.txt") as f: + self.assertEqual( + "Outlook with a reply", EmailReplyParser.parse_reply(f.read()) + ) def test_parse_out_just_top_for_outlook_with_reply_directly_above_line(self): - with open('test/emails/email_2_2.txt') as f: - self.assertEqual("Outlook with a reply directly above line", EmailReplyParser.parse_reply(f.read())) + with open("test/emails/email_2_2.txt") as f: + self.assertEqual( + "Outlook with a reply directly above line", + EmailReplyParser.parse_reply(f.read()), + ) def test_parse_out_just_top_for_outlook_with_unusual_headers_format(self): - with open('test/emails/email_2_3.txt') as f: + with open("test/emails/email_2_3.txt") as f: self.assertEqual( "Outlook with a reply above headers using unusual format", - EmailReplyParser.parse_reply(f.read())) + EmailReplyParser.parse_reply(f.read()), + ) def test_sent_from_iphone(self): - with open('test/emails/email_iPhone.txt') as email: - self.assertTrue("Sent from my iPhone" not in EmailReplyParser.parse_reply(email.read())) + with open("test/emails/email_iPhone.txt") as email: + self.assertTrue( + "Sent from my iPhone" not in EmailReplyParser.parse_reply(email.read()) + ) def test_email_one_is_not_on(self): - with open('test/emails/email_one_is_not_on.txt') as email: + with open("test/emails/email_one_is_not_on.txt") as email: self.assertTrue( - "On Oct 1, 2012, at 11:55 PM, Dave Tapley wrote:" not in EmailReplyParser.parse_reply(email.read())) + "On Oct 1, 2012, at 11:55 PM, Dave Tapley wrote:" + not in EmailReplyParser.parse_reply(email.read()) + ) def test_partial_quote_header(self): - message = self.get_email('email_partial_quote_header') + message = self.get_email("email_partial_quote_header") self.assertTrue("On your remote host you can run:" in message.reply) self.assertTrue("telnet 127.0.0.1 52698" in message.reply) self.assertTrue("This should connect to TextMate" in message.reply) def test_email_headers_no_delimiter(self): - message = self.get_email('email_headers_no_delimiter') - self.assertEqual(message.reply.strip(), 'And another reply!') + message = self.get_email("email_headers_no_delimiter") + self.assertEqual(message.reply.strip(), "And another reply!") def test_multiple_on(self): message = self.get_email("greedy_on") - self.assertTrue(re.match('^On your remote host', message.fragments[0].content)) - self.assertTrue(re.match('^On 9 Jan 2014', message.fragments[1].content)) + self.assertTrue(re.match("^On your remote host", message.fragments[0].content)) + self.assertTrue(re.match("^On 9 Jan 2014", message.fragments[1].content)) self.assertEqual( - [False, True, False], - [fragment.quoted for fragment in message.fragments] + [False, True, False], [fragment.quoted for fragment in message.fragments] ) self.assertEqual( [False, False, False], - [fragment.signature for fragment in message.fragments] + [fragment.signature for fragment in message.fragments], ) self.assertEqual( - [False, True, True], - [fragment.hidden for fragment in message.fragments] + [False, True, True], [fragment.hidden for fragment in message.fragments] ) def test_pathological_emails(self): t0 = time.time() - message = self.get_email("pathological") + self.get_email("pathological") self.assertTrue(time.time() - t0 < 1, "Took too long") def test_doesnt_remove_signature_delimiter_in_mid_line(self): - message = self.get_email('email_sig_delimiter_in_middle_of_line') + message = self.get_email("email_sig_delimiter_in_middle_of_line") self.assertEqual(1, len(message.fragments)) def get_email(self, name): """ Return EmailMessage instance """ - with open('test/emails/%s.txt' % name) as f: + with open("test/emails/%s.txt" % name) as f: text = f.read() return EmailReplyParser.read(text) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main()