# Google Cloud Natural Language API

- [Natural Language API Client Libraries](https://cloud.google.com/natural-language/docs/reference/libraries#client-libraries-usage-python)
- [Google Cloud Natural Language API Python Samples](https://github.com/GoogleCloudPlatform/python-docs-samples/tree/master/language/cloud-client/v1)

First, authenticate by running the following command in an interactive terminal:
```
gcloud auth application-default login
```

In [12]:
from google.cloud import language
from google.cloud import storage as gcs
import six
import os
from tempfile import NamedTemporaryFile

In [13]:
texts = """Hello, world!
President Obama is speaking at the White House.
Ladies and gentlemen!
世界、こんにちは！
今日は天気いいですね！
悲しいニュースですね
素晴らしい
Google Cloud Natural Language API は、使いやすい REST API を介して強力な機械学習モデルを提供することで、テキストの構造と意味を解析できるようにします。この API を使用すれば、ドキュメント、ニュース記事、ブログ記事に含まれる人、場所、イベントなどに関する情報を抽出できるようになります。ソーシャル メディア上のコメントから商品に対するセンチメント（感情）を把握したり、コールセンターやメッセージ アプリに寄せられた消費者の意見から顧客満足度を分析したりすることができます。リクエストでアップロードしたテキストを分析することも、Google Cloud Storage のドキュメント ストレージ上のデータを分析することもできます。"""

# Specify some gcs_uri that you have read access to       
gcs_uri = 'gs://qa-nlp-1.appspot.com/messages/-KoD6Ht5pST_rdxZTXkZ'

In [16]:
# https://cloud.google.com/storage/docs/reference/libraries
# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/85d4b1c52de3b8749c139c63bd9f37fdd85da4c8/storage/cloud-client/snippets.py
def read_gcs(bucket_name, source_blob_name):
  gcs = storage.Client()
  bucket = gcs.get_bucket(bucket_name)
  f = NamedTemporaryFile()
  blob = bucket.blob(source_blob_name)
  blob.download_to_filename(f.name)
  print('Blob {} downloaded to {}.'.format(
      source_blob_name,
      f.name))
  text = f.read()
  f.close()
  return decode_utf(text)
#
read_gcs('qa-nlp-1.appspot.com', 'messages/-KoD6Ht5pST_rdxZTXkZ')

Blob messages/-KoD6Ht5pST_rdxZTXkZ downloaded to /tmp/tmpur9praxv.


'ＡＩの経済効果、2030年までに1780兆円規模'

In [14]:
def decode_utf(text):
    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')

    return text

  
def doc_from_text(text):
    text = decode_utf(text)
    return language_client.document_from_text(text)

  
def doc_from_gcs(gcs_uri):
    return language_client.document_from_url(gcs_uri)

In [17]:
# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/language/cloud-client/v1/snippets.py
language_client = language.Client()

def sentiment_text(text):
    """Detects sentiment in the text."""
    document = doc_from_text(text)

    # Detects sentiment in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    sentiment = document.analyze_sentiment().sentiment
    
    print('Score: {}'.format(sentiment.score))
    print('Magnitude: {}'.format(sentiment.magnitude))


def sentiment_gcs(gcs_uri):
    """Detects sentiment in the file located in Google Cloud Storage."""
    document = doc_from_gcs(gcs_uri)

    # Detects sentiment in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    sentiment = document.analyze_sentiment().sentiment

    print('Score: {}'.format(sentiment.score))
    print('Magnitude: {}'.format(sentiment.magnitude))


def entities_text(text):
    """Detects entities in the text."""
    document = doc_from_text(text)

    # Detects entities in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    entities = document.analyze_entities().entities

    for entity in entities:
        print('=' * 20)
        print(u'{:<16}: {}'.format('name', entity.name))
        print(u'{:<16}: {}'.format('type', entity.entity_type))
        print(u'{:<16}: {}'.format('metadata', entity.metadata))
        print(u'{:<16}: {}'.format('salience', entity.salience))
        print(u'{:<16}: {}'.format('wikipedia_url',
              entity.metadata.get('wikipedia_url', '-')))


def entities_gcs(gcs_uri):
    """Detects entities in the file located in Google Cloud Storage."""
    document = doc_from_gcs(gcs_uri)


    # Detects sentiment in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    entities = document.analyze_entities().entities

    for entity in entities:
        print('=' * 20)
        print(u'{:<16}: {}'.format('name', entity.name))
        print(u'{:<16}: {}'.format('type', entity.entity_type))
        print(u'{:<16}: {}'.format('metadata', entity.metadata))
        print(u'{:<16}: {}'.format('salience', entity.salience))
        print(u'{:<16}: {}'.format('wikipedia_url',
              entity.metadata.get('wikipedia_url', '-')))


def syntax_text(text):
    """Detects syntax in the text."""
    document = doc_from_text(text)

    # Detects syntax in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    tokens = document.analyze_syntax().tokens

    for token in tokens:
        print(u'{}: {}'.format(token.part_of_speech.tag, token.text_content))

def syntax_gcs(gcs_uri):
    """Detects syntax in the file located in Google Cloud Storage."""
    document = doc_from_gcs(gcs_uri)

    # Detects syntax in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    tokens = document.analyze_syntax().tokens

    for token in tokens:
        print(u'{}: {}'.format(token.part_of_speech.tag, token.text_content))
    return tokens


def nlp_text(text):
    print('================================================================================')
    print('Text: {}'.format(text))
    sentiment_text(text)
    entities_text(text)
    print('=========================================================')
    syntax_text(text)

    
def nlp_gcs(bucket_name, source_blob_name):
    gcs_uri = 'gs://' + bucket_name + '/' + source_blob_name
    print('================================================================================')
    print('Text: {}'.format(read_gcs(bucket_name, source_blob_name)))
    sentiment_gcs(gcs_uri)
    entities_gcs(gcs_uri)
    print('=========================================================')
    syntax_gcs(gcs_uri)

In [18]:
for text in texts.split('\n'):
    nlp_text(text)

tokens = syntax_file(gcs_uri)

Text: Hello, world!
Score: 0.4
Magnitude: 0.4
name            : world
type            : LOCATION
metadata        : {}
salience        : 1
wikipedia_url   : -
X: Hello
PUNCT: ,
NOUN: world
PUNCT: !
Text: President Obama is speaking at the White House.
Score: 0.2
Magnitude: 0.2
name            : Obama
type            : PERSON
metadata        : {'wikipedia_url': 'http://en.wikipedia.org/wiki/Barack_Obama', 'mid': '/m/02mjmr'}
salience        : 0.9077594
wikipedia_url   : http://en.wikipedia.org/wiki/Barack_Obama
name            : White House
type            : LOCATION
metadata        : {'wikipedia_url': 'http://en.wikipedia.org/wiki/White_House', 'mid': '/m/081sq'}
salience        : 0.092240565
wikipedia_url   : http://en.wikipedia.org/wiki/White_House
NOUN: President
NOUN: Obama
VERB: is
VERB: speaking
ADP: at
DET: the
NOUN: White
NOUN: House
PUNCT: .
Text: Ladies and gentlemen!
Score: 0.3
Magnitude: 0.3
name            : Ladies
type            : PERSON
metadata        : {}
salience     

In [19]:
nlp_gcs('qa-nlp-1.appspot.com', 'messages/-KoD6Ht5pST_rdxZTXkZ')

Blob messages/-KoD6Ht5pST_rdxZTXkZ downloaded to /tmp/tmpp2rk7u01.
Text: ＡＩの経済効果、2030年までに1780兆円規模
Score: 0.3
Magnitude: 0.3
name            : AI
type            : OTHER
metadata        : {}
salience        : 0.5231133
wikipedia_url   : -
name            : 経済効果
type            : OTHER
metadata        : {}
salience        : 0.47688666
wikipedia_url   : -
NOUN: ＡＩ
PRT: の
NOUN: 経済
NOUN: 効果
PUNCT: 、
NUM: 2030
AFFIX: 年
PRT: まで
PRT: に
NUM: 1780
AFFIX: 兆
AFFIX: 円
ADJ: 規模
