Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

initial commit

  • Loading branch information...
commit 9bd70c8ad0a85972ae7a98f2b943e54877442696 0 parents
@roycehaynes roycehaynes authored
Showing with 433 additions and 0 deletions.
  1. BIN  .DS_Store
  2. +2 −0  .gitignore
  3. +10 −0 .pydevproject
  4. 0  .travis.yml
  5. 0  LICENSE
  6. +19 −0 README.md
  7. 0  __init__.py
  8. BIN  __init__.pyc
  9. +123 −0 email_reply_parser.py
  10. BIN  email_reply_parser.pyc
  11. 0  requirments.txt
  12. 0  setup.py
  13. BIN  tests/.DS_Store
  14. 0  tests/__init__.py
  15. BIN  tests/__init__.pyc
  16. +5 −0 tests/context.py
  17. BIN  tests/context.pyc
  18. +4 −0 tests/emails/correct_sig.txt
  19. +13 −0 tests/emails/email_1_1.txt
  20. +51 −0 tests/emails/email_1_2.txt
  21. +55 −0 tests/emails/email_1_3.txt
  22. +5 −0 tests/emails/email_1_4.txt
  23. +15 −0 tests/emails/email_1_5.txt
  24. +15 −0 tests/emails/email_1_6.txt
  25. +12 −0 tests/emails/email_1_7.txt
  26. +25 −0 tests/emails/email_2_1.txt
  27. +3 −0  tests/emails/email_BlackBerry.txt
  28. +22 −0 tests/emails/email_bullets.txt
  29. +3 −0  tests/emails/email_iPhone.txt
  30. +3 −0  tests/emails/email_multi_word_sent_from_my_mobile_device.txt
  31. +10 −0 tests/emails/email_one_is_not_on.txt
  32. +3 −0  tests/emails/email_sent_from_my_not_signature.txt
  33. +35 −0 tests/tests.py
  34. BIN  tests/tests.pyc
BIN  .DS_Store
Binary file not shown
2  .gitignore
@@ -0,0 +1,2 @@
+.project
+
10 .pydevproject
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?>
+
+<pydev_project>
+<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
+<path>/email_reply_parser</path>
+</pydev_pathproperty>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Python 2.7.3</pydev_property>
+</pydev_project>
0  .travis.yml
No changes.
0  LICENSE
No changes.
19 README.md
@@ -0,0 +1,19 @@
+# Email Replay Parser for Python
+
+Email Reply Parser is a python library that breaks an email into subject, body, and signature fragments.
+
+## Example Use Cases
+
+## Installation
+
+## Known Issues
+
+## Author
+
+
+
+## TODO
+
+- Post to Travis CI
+- Create Test Cases
+- Fix the issue
0  __init__.py
No changes.
BIN  __init__.pyc
Binary file not shown
123 email_reply_parser.py
@@ -0,0 +1,123 @@
+import re
+
+"""
+ Email Reply Parser description
+"""
+
+
+class EmailReplyParser():
+ """
+ """
+
+ @staticmethod
+ def read(text):
+ """ Factory method that splits email into list of fragments
+
+ text - A string email body
+
+ Returns an EmailMessage instance
+ """
+ return EmailMessage(text).read()
+
+ @staticmethod
+ def parse_reply(text):
+ pass
+
+
+class EmailMessage():
+ """ An email message represents a parsed email body string.
+ """
+
+ SIG_REGEX = '(--|__|\w-$)|(^(\w+\s*){1,3} #{"Sent from my".reverse})'
+
+ def __init__(self, text):
+ self.fragments = []
+ self.fragment = None
+ self.text = text
+
+ def read(self):
+ """ Creates new fragment for each line
+ and labels as a signature, quote, or hidden.
+ """
+
+ self.found_visible = False
+
+ self.text = self.text.replace('\r\n', '\n')
+
+ if re.match('^(On\s(.+)wrote:)', self.text):
+ self.text = self.text.rstrip('\n')
+
+ self.lines = self.text.split('\n')
+ self.lines.reverse()
+
+ for line in self.lines:
+ self._scan_line(line)
+
+ self._finish_fragment()
+
+ for f in self.fragments:
+ print "================== begin fragment =================="
+ print f.content
+ print "================== end fragment =================="
+
+ return self
+
+ def _scan_line(self, line):
+
+ line.strip('\n')
+
+ if re.match(self.SIG_REGEX, line):
+ line.lstrip()
+
+ is_quoted = re.match('(>+)', line) != None
+ #print "is_quoted is %s" % is_quoted
+
+ if self.fragment and not line:
+ if re.match(self.SIG_REGEX, self.fragment.lines[-1]):
+ self.fragment.signature = True
+ self._finish_fragment()
+
+ if self.fragment and ((self.fragment.quoted == is_quoted)
+ or (self.fragment.quoted and (self.quote_header(line) or not line))):
+ self.fragment.lines.append(line)
+ else:
+ self._finish_fragment()
+ self.fragment = Fragment(is_quoted, line)
+
+ def quote_header(self, line):
+ return re.match('^:etorw.*nO', line) != None
+
+ def _finish_fragment(self):
+ if self.fragment:
+ self.fragment.finish()
+ if not self.found_visible:
+ if self.fragment.quoted or self.fragment.signature or not self.fragment.content:
+ self.fragment.hidden = True
+ else:
+ self.found_visible = True
+ else:
+ self.fragments.append(self.fragment)
+ self.fragment = None
+
+
+class Fragment():
+ """
+ """
+
+ def __init__(self, quoted, first_line):
+ self.signature = False
+ self.hidden = False
+ self.quoted = quoted
+ self.content = None
+ self.lines = [first_line]
+
+ def finish(self):
+ self.content = '\n'.join(self.lines)
+ self.lines = None
+
+ @property
+ def content(self):
+ return self.content
+
+ def inspect(self):
+ pass
BIN  email_reply_parser.pyc
Binary file not shown
0  requirments.txt
No changes.
0  setup.py
No changes.
BIN  tests/.DS_Store
Binary file not shown
0  tests/__init__.py
No changes.
BIN  tests/__init__.pyc
Binary file not shown
5 tests/context.py
@@ -0,0 +1,5 @@
+import os
+import sys
+sys.path.insert(0, os.path.abspath('..'))
+
+import email_reply_parser
BIN  tests/context.pyc
Binary file not shown
4 tests/emails/correct_sig.txt
@@ -0,0 +1,4 @@
+this is an email with a correct -- signature.
+
+--
+rick
13 tests/emails/email_1_1.txt
@@ -0,0 +1,13 @@
+Hi folks
+
+What is the best way to clear a Riak bucket of all key, values after
+running a test?
+I am currently using the Java HTTP API.
+
+-Abhishek Kona
+
+
+_______________________________________________
+riak-users mailing list
+riak-users@lists.basho.com
+http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
51 tests/emails/email_1_2.txt
@@ -0,0 +1,51 @@
+Hi,
+On Tue, 2011-03-01 at 18:02 +0530, Abhishek Kona wrote:
+> Hi folks
+>
+> What is the best way to clear a Riak bucket of all key, values after
+> running a test?
+> I am currently using the Java HTTP API.
+
+You can list the keys for the bucket and call delete for each. Or if you
+put the keys (and kept track of them in your test) you can delete them
+one at a time (without incurring the cost of calling list first.)
+
+Something like:
+
+ String bucket = "my_bucket";
+ BucketResponse bucketResponse = riakClient.listBucket(bucket);
+ RiakBucketInfo bucketInfo = bucketResponse.getBucketInfo();
+
+ for(String key : bucketInfo.getKeys()) {
+ riakClient.delete(bucket, key);
+ }
+
+
+would do it.
+
+See also
+
+http://wiki.basho.com/REST-API.html#Bucket-operations
+
+which says
+
+"At the moment there is no straightforward way to delete an entire
+Bucket. There is, however, an open ticket for the feature. To delete all
+the keys in a bucket, you’ll need to delete them all individually."
+
+>
+> -Abhishek Kona
+>
+>
+> _______________________________________________
+> riak-users mailing list
+> riak-users@lists.basho.com
+> http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
+
+
+
+
+_______________________________________________
+riak-users mailing list
+riak-users@lists.basho.com
+http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
55 tests/emails/email_1_3.txt
@@ -0,0 +1,55 @@
+Oh thanks.
+
+Having the function would be great.
+
+-Abhishek Kona
+
+On 01/03/11 7:07 PM, Russell Brown wrote:
+> Hi,
+> On Tue, 2011-03-01 at 18:02 +0530, Abhishek Kona wrote:
+>> Hi folks
+>>
+>> What is the best way to clear a Riak bucket of all key, values after
+>> running a test?
+>> I am currently using the Java HTTP API.
+> You can list the keys for the bucket and call delete for each. Or if you
+> put the keys (and kept track of them in your test) you can delete them
+> one at a time (without incurring the cost of calling list first.)
+>
+> Something like:
+>
+> String bucket = "my_bucket";
+> BucketResponse bucketResponse = riakClient.listBucket(bucket);
+> RiakBucketInfo bucketInfo = bucketResponse.getBucketInfo();
+>
+> for(String key : bucketInfo.getKeys()) {
+> riakClient.delete(bucket, key);
+> }
+>
+>
+> would do it.
+>
+> See also
+>
+> http://wiki.basho.com/REST-API.html#Bucket-operations
+>
+> which says
+>
+> "At the moment there is no straightforward way to delete an entire
+> Bucket. There is, however, an open ticket for the feature. To delete all
+> the keys in a bucket, you’ll need to delete them all individually."
+>
+>> -Abhishek Kona
+>>
+>>
+>> _______________________________________________
+>> riak-users mailing list
+>> riak-users@lists.basho.com
+>> http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
+>
+
+
+_______________________________________________
+riak-users mailing list
+riak-users@lists.basho.com
+http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com
5 tests/emails/email_1_4.txt
@@ -0,0 +1,5 @@
+Awesome! I haven't had another problem with it.
+
+On Aug 22, 2011, at 7:37 PM, defunkt<reply@reply.github.com> wrote:
+
+> Loader seems to be working well.
15 tests/emails/email_1_5.txt
@@ -0,0 +1,15 @@
+One: Here's what I've got.
+
+- This would be the first bullet point that wraps to the second line
+to the next
+- This is the second bullet point and it doesn't wrap
+- This is the third bullet point and I'm having trouble coming up with enough
+to say
+- This is the fourth bullet point
+
+Two:
+- Here is another bullet point
+- And another one
+
+This is a paragraph that talks about a bunch of stuff. It goes on and on
+for a while.
15 tests/emails/email_1_6.txt
@@ -0,0 +1,15 @@
+I get proper rendering as well.
+
+Sent from a magnificent torch of pixels
+
+On Dec 16, 2011, at 12:47 PM, Corey Donohoe
+<reply@reply.github.com>
+wrote:
+
+> Was this caching related or fixed already? I get proper rendering here.
+>
+> ![](https://img.skitch.com/20111216-m9munqjsy112yqap5cjee5wr6c.jpg)
+>
+> ---
+> Reply to this email directly or view it on GitHub:
+> https://github.com/github/github/issues/2278#issuecomment-3182418
12 tests/emails/email_1_7.txt
@@ -0,0 +1,12 @@
+:+1:
+
+On Tue, Sep 25, 2012 at 8:59 AM, Chris Wanstrath
+<notifications@github.com>wrote:
+
+> Steps 0-2 are in prod. Gonna let them sit for a bit then start cleaning up
+> the old code with 3 & 4.
+>
+>
+> Reply to this email directly or view it on GitHub.
+>
+>
25 tests/emails/email_2_1.txt
@@ -0,0 +1,25 @@
+Outlook with a reply
+
+
+ ------------------------------
+
+*From:* Google Apps Sync Team [mailto:mail-noreply@google.com]
+*Sent:* Thursday, February 09, 2012 1:36 PM
+*To:* jow@xxxx.com
+*Subject:* Google Apps Sync was updated!
+
+
+
+Dear Google Apps Sync user,
+
+Google Apps Sync for Microsoft Outlook® was recently updated. Your computer
+now has the latest version (version 2.5). This release includes bug fixes
+to improve product reliability. For more information about these and other
+changes, please see the help article here:
+
+http://www.google.com/support/a/bin/answer.py?answer=153463
+
+Sincerely,
+
+The Google Apps Sync Team.
+
3  tests/emails/email_BlackBerry.txt
@@ -0,0 +1,3 @@
+Here is another email
+
+Sent from my BlackBerry
22 tests/emails/email_bullets.txt
@@ -0,0 +1,22 @@
+test 2 this should list second
+
+and have spaces
+
+and retain this formatting
+
+
+ - how about bullets
+ - and another
+
+
+On Fri, Feb 24, 2012 at 10:19 AM, <examples@email.goalengine.com> wrote:
+
+> Give us an example of how you applied what they learned to achieve
+> something in your organization
+
+
+
+
+--
+
+*Joe Smith | Director, Product Management*
3  tests/emails/email_iPhone.txt
@@ -0,0 +1,3 @@
+Here is another email
+
+Sent from my iPhone
3  tests/emails/email_multi_word_sent_from_my_mobile_device.txt
@@ -0,0 +1,3 @@
+Here is another email
+
+Sent from my Verizon Wireless BlackBerry
10 tests/emails/email_one_is_not_on.txt
@@ -0,0 +1,10 @@
+Thank, this is really helpful.
+
+One outstanding question I had:
+
+Locally (on development), when I run...
+
+On Oct 1, 2012, at 11:55 PM, Dave Tapley wrote:
+
+> The good news is that I've found a much better query for lastLocation.
+>
3  tests/emails/email_sent_from_my_not_signature.txt
@@ -0,0 +1,3 @@
+Here is another email
+
+Sent from my desk, is much easier then my mobile phone.
35 tests/tests.py
@@ -0,0 +1,35 @@
+import unittest
+from context import email_reply_parser
+from email_reply_parser import EmailReplyParser
+
+
+class EmailMessageTest(unittest.TestCase):
+
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def test_simple_body(self):
+ message = self.get_email('email_1_1')
+
+ self.assertEquals(3, len(message.fragments))
+
+ @unittest.skip("")
+ def test_multiline_reply_headers(self):
+ message = self.get_email('email_1_6')
+
+ self.assertIn('I get', message.read().text)
+ self.assertRegexpMatches('^On', str(message.text))
+ #self.assertRegexpMatches('Was this', message.fragments[1].block)
+
+ def get_email(self, name):
+ """ Return EmailMessage instance
+ """
+ text = open('emails/%s.txt' % name).read()
+ return EmailReplyParser.read(text)
+
+
+if __name__ == '__main__':
+ unittest.main()
BIN  tests/tests.pyc
Binary file not shown
Please sign in to comment.
Something went wrong with that request. Please try again.