# Comprehend Email Demo

#### Set up with some Dummy Emails
In real life these would be ingested from a service or a storage location

In [1]:
import json
emails = [
    """
    Hi Gillian, I just wanted to drop you a note and say great job yesterday!
    """,
    """
    Hey, 
    Can you come in early?
    Thanks,
    Dave
    """,
    """
    Hey Gillian,
    I will need the documents you promised this morning. They are already late.
    Best Regards,
    Kevin
    """,
    """
    Hey! I'm go out to get coffee - do you want me to bring you back one?
    """,
    """
    I'm going to be late this morning - can you cover for me until I get there?
    """,
    """
    Hi Gillian,
    We have a meeting with Jon and Kirsten later - can you see if Jenny wants to come too?
    Thanks
    Tom
    """,
    """
    Gillian - Looks like there was a major outage last night. It's been escalated, and we need to get on top of this really quickly.
    """
]

Python SDK for Comprehend at https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/comprehend.html

Note that the SDKs are all available in multiple programming languages https://aws.amazon.com/tools/

In [2]:
import boto3
comprehend = boto3.client('comprehend')

#### Detect Entity Example

In [3]:
print(emails[0])

comprehend_entity_response = comprehend.detect_entities(
    Text=emails[0], 
    LanguageCode='en')
print(json.dumps(comprehend_entity_response, indent=2))


    Hi Gillian, I just wanted to drop you a note and say great job yesterday!
    
{
  "Entities": [
    {
      "Score": 0.9999855756759644,
      "Type": "PERSON",
      "Text": "Gillian",
      "BeginOffset": 8,
      "EndOffset": 15
    },
    {
      "Score": 0.9998825788497925,
      "Type": "DATE",
      "Text": "yesterday",
      "BeginOffset": 68,
      "EndOffset": 77
    }
  ],
  "ResponseMetadata": {
    "RequestId": "d2677731-f006-4800-b919-d1df33000de7",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "x-amzn-requestid": "d2677731-f006-4800-b919-d1df33000de7",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "201",
      "date": "Thu, 23 Jul 2020 23:45:05 GMT"
    },
    "RetryAttempts": 0
  }
}


#### Detect Sentiment Example

In [4]:
comprehend_sentiment_response = comprehend.detect_sentiment(
    Text=emails[0], 
    LanguageCode='en')
print(json.dumps(comprehend_sentiment_response, indent=2))   

{
  "Sentiment": "POSITIVE",
  "SentimentScore": {
    "Positive": 0.6899372935295105,
    "Negative": 0.052835334092378616,
    "Neutral": 0.257215678691864,
    "Mixed": 1.1653065485006664e-05
  },
  "ResponseMetadata": {
    "RequestId": "0f6baba8-e496-4a3d-937f-096e0bea8093",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "x-amzn-requestid": "0f6baba8-e496-4a3d-937f-096e0bea8093",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "163",
      "date": "Thu, 23 Jul 2020 23:47:05 GMT"
    },
    "RetryAttempts": 0
  }
}


#### Detect Key Phrases Example

In [5]:
comprehend_key_phrase_response = comprehend.detect_key_phrases(
    Text=emails[0], 
    LanguageCode='en')
print(json.dumps(comprehend_key_phrase_response, indent=2))   

{
  "KeyPhrases": [
    {
      "Score": 1.0,
      "Text": "Hi Gillian",
      "BeginOffset": 5,
      "EndOffset": 15
    },
    {
      "Score": 1.0,
      "Text": "a note",
      "BeginOffset": 43,
      "EndOffset": 49
    },
    {
      "Score": 1.0,
      "Text": "great job",
      "BeginOffset": 58,
      "EndOffset": 67
    },
    {
      "Score": 0.9999998807907104,
      "Text": "yesterday",
      "BeginOffset": 68,
      "EndOffset": 77
    }
  ],
  "ResponseMetadata": {
    "RequestId": "33e33761-e9b9-4ef1-896b-0c0b27bf1df8",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "x-amzn-requestid": "33e33761-e9b9-4ef1-896b-0c0b27bf1df8",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "288",
      "date": "Thu, 23 Jul 2020 23:47:14 GMT"
    },
    "RetryAttempts": 0
  }
}


#### Loop over emails to compare them.
Note that a Batch option is available, but here we are mimicking muliple real time calls because we would want to always be up to date on our emails and would trigger the calls real time.

In [6]:
email_insights = []

for email in emails:
    insight = {}
    insight['email'] = email
    print(email)
    
    ### Detect Entities
    comprehend_entity_response = comprehend.detect_entities(
        Text=email, 
        LanguageCode='en')
    
    ### Detect Sentiment
    comprehend_sentiment_response = comprehend.detect_sentiment(
        Text=email, 
        LanguageCode='en')
    
    ### Detect Key Phrases
    comprehend_key_phrase_response = comprehend.detect_key_phrases(
        Text=email, 
        LanguageCode='en')
    
    insight['sentiment'] = comprehend_sentiment_response['Sentiment']
    print('Sentiment -',comprehend_sentiment_response['Sentiment'])
    #print(comprehend_response['SentimentScore'])
    print('')
    
    #print(comprehend_entity_response)
    insight['entities'] = {}
    for entity in comprehend_entity_response['Entities']:
        if not entity['Type'] in insight['entities']: insight['entities'][entity['Type']] = []
        insight['entities'][entity['Type']].append(entity['Text'])
        print(entity['Type'], '-', entity['Text'])
        
        
    print('')
    
    #print(comprehend_key_phrase_response)
    print('Key Phrases:')
    insight['keyPhrases'] = []
    for phrase in comprehend_key_phrase_response['KeyPhrases']:
        insight['keyPhrases'].append(phrase['Text'])
        print(phrase['Text'])
        
    print('')
    email_insights.append(insight)
    print('------')


    Hi Gillian, I just wanted to drop you a note and say great job yesterday!
    
Sentiment - POSITIVE

PERSON - Gillian
DATE - yesterday

Key Phrases:
Hi Gillian
a note
great job
yesterday

------

    Hey, 
    Can you come in early?
    Thanks,
    Dave
    
Sentiment - NEUTRAL

PERSON - Dave

Key Phrases:

------

    Hey Gillian,
    I will need the documents you promised this morning. They are already late.
    Best Regards,
    Kevin
    
Sentiment - NEGATIVE

PERSON - Gillian
DATE - this morning
PERSON - Kevin

Key Phrases:
the documents
this morning
Best Regards

------

    Hey! I'm go out to get coffee - do you want me to bring you back one?
    
Sentiment - NEUTRAL


Key Phrases:
coffee

------

    I'm going to be late this morning - can you cover for me until I get there?
    
Sentiment - NEUTRAL

DATE - late this morning

Key Phrases:
late this morning

------

    Hi Gillian,
    We have a meeting with Jon and Kirsten later - can you see if Jenny wants to come too?
  

In [7]:
for insight in email_insights:
    print(json.dumps(insight, indent=2))

{
  "email": "\n    Hi Gillian, I just wanted to drop you a note and say great job yesterday!\n    ",
  "sentiment": "POSITIVE",
  "entities": {
    "PERSON": [
      "Gillian"
    ],
    "DATE": [
      "yesterday"
    ]
  },
  "keyPhrases": [
    "Hi Gillian",
    "a note",
    "great job",
    "yesterday"
  ]
}
{
  "email": "\n    Hey, \n    Can you come in early?\n    Thanks,\n    Dave\n    ",
  "sentiment": "NEUTRAL",
  "entities": {
    "PERSON": [
      "Dave"
    ]
  },
  "keyPhrases": []
}
{
  "email": "\n    Hey Gillian,\n    I will need the documents you promised this morning. They are already late.\n    Best Regards,\n    Kevin\n    ",
  "sentiment": "NEGATIVE",
  "entities": {
    "PERSON": [
      "Gillian",
      "Kevin"
    ],
    "DATE": [
      "this morning"
    ]
  },
  "keyPhrases": [
    "the documents",
    "this morning",
    "Best Regards"
  ]
}
{
  "email": "\n    Hey! I'm go out to get coffee - do you want me to bring you back one?\n    ",
  "sentiment": "NEU

#### Check out the overall Sentiment

In [8]:
positive = list(filter(lambda x: (x['sentiment'] == 'POSITIVE'), email_insights))
negative = list(filter(lambda x: (x['sentiment'] == 'NEGATIVE'), email_insights))
neutral = list(filter(lambda x: (x['sentiment'] == 'NEUTRAL'), email_insights))

print('POSITIVE:', len(positive))
print('NEGATIVE:', len(negative))
print('NEUTRAL:', len(neutral))

POSITIVE: 2
NEGATIVE: 2
NEUTRAL: 3


#### Check out who is getting mentioned

In [9]:
people_mentions = set()

for insights in email_insights:
    entities = insights['entities']
    if 'PERSON' in entities:
        for person in entities['PERSON']:
            people_mentions.add(person)
        
print('People mentioned:', people_mentions)

People mentioned: {'Gillian', 'Jon', 'Dave', 'Kevin', 'Jenny', 'Tom', 'Kirsten'}


#### Search for a specific person being mentioned

In [10]:
person = 'Kevin'
print('Emails that mention', person)
for insights in email_insights:
    entities = insights['entities']
    if 'PERSON' in entities:
        if person in entities['PERSON']:
            print(insights['email'])

Emails that mention Kevin

    Hey Gillian,
    I will need the documents you promised this morning. They are already late.
    Best Regards,
    Kevin
    
