## Importing Data from JSON files

In [0]:
import os
import json
import numpy as np
from random import randrange
import zipfile

In [0]:
with zipfile.ZipFile("updated-json-files.zip","r") as zip_ref:
    zip_ref.extractall()

In [0]:
def load_data(link):
  
  text = []
  sentiment = []

  for filename in os.listdir(link):
    if filename != '.ipynb_checkpoints':
      file_path = os.path.join(link, filename)
      with open(file_path, encoding = 'unicode_escape') as f:
        dic = json.load(f)

        for k, v in dic['text'].items():
          text.append(v)
        for k, v in dic['sentiment'].items():
          sentiment.append(v)
  return text, sentiment

In [0]:
link = 'updated-json-files'
text, sentiment = load_data(link)

## Setting up Google API 

In [0]:
# Copyright 2016, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START language_sentiment_tutorial]
"""Demonstrates how to make a simple call to the Natural Language API."""

# [START language_sentiment_tutorial_imports]
import argparse
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="google_nlp_key.json"
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types
# [END language_sentiment_tutorial_imports]


In [0]:
# [START language_sentiment_tutorial_print_result]
def print_result(annotations):
    score = annotations.document_sentiment.score
    magnitude = annotations.document_sentiment.magnitude

    for index, sentence in enumerate(annotations.sentences):
        sentence_sentiment = sentence.sentiment.score
        print('Sentence {} has a sentiment score of {}'.format(
            index, sentence_sentiment))

    print('Overall Sentiment: score of {} with magnitude of {}'.format(
        score, magnitude))
    return score
# [END language_sentiment_tutorial_print_result]

In [0]:


# [START language_sentiment_tutorial_analyze_sentiment]
def analyze(content):
    """Run a sentiment analysis request on text within a passed filename."""
    client = language.LanguageServiceClient()

    document = types.Document(
        content=content,
        type=enums.Document.Type.PLAIN_TEXT)
    annotations = client.analyze_sentiment(document=document)

    # Print the results
    return print_result(annotations)
# [END language_sentiment_tutorial_analyze_sentiment]


In [0]:
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    """pretty print for confusion matrixes"""
    columnwidth = max([len(x) for x in labels] + [5])  # 5 is value length
    empty_cell = " " * columnwidth
    
    # Begin CHANGES
    fst_empty_cell = (columnwidth-3)//2 * " " + "t/p" + (columnwidth-3)//2 * " "
    
    if len(fst_empty_cell) < len(empty_cell):
        fst_empty_cell = " " * (len(empty_cell) - len(fst_empty_cell)) + fst_empty_cell
    # Print header
    print("    " + fst_empty_cell, end=" ")
    # End CHANGES
    
    for label in labels:
        print("%{0}s".format(columnwidth) % label, end=" ")
        
    print()
    # Print rows
    for i, label1 in enumerate(labels):
        print("    %{0}s".format(columnwidth) % label1, end=" ")
        for j in range(len(labels)):
            cell = "%{0}.1f".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            print(cell, end=" ")
        print()

## Now that API functions are defined let us use it to analyze our data

In [0]:
all_scores = []
for i in range(0,len(text)):
  all_scores.append(analyze(text[i]))
  

Sentence 0 has a sentiment score of 0.0
Sentence 1 has a sentiment score of 0.699999988079071
Sentence 2 has a sentiment score of 0.10000000149011612
Sentence 3 has a sentiment score of 0.0
Sentence 4 has a sentiment score of 0.10000000149011612
Overall Sentiment: score of 0.20000000298023224 with magnitude of 1.100000023841858
Sentence 0 has a sentiment score of 0.0
Sentence 1 has a sentiment score of 0.0
Sentence 2 has a sentiment score of -0.20000000298023224
Overall Sentiment: score of 0.0 with magnitude of 0.4000000059604645
Sentence 0 has a sentiment score of 0.0
Sentence 1 has a sentiment score of 0.0
Sentence 2 has a sentiment score of 0.10000000149011612
Overall Sentiment: score of 0.0 with magnitude of 0.20000000298023224
Sentence 0 has a sentiment score of 0.0
Sentence 1 has a sentiment score of 0.10000000149011612
Sentence 2 has a sentiment score of 0.10000000149011612
Sentence 3 has a sentiment score of 0.6000000238418579
Overall Sentiment: score of 0.20000000298023224 wit

In [0]:
score_text_form = []
for score in all_scores:
  if score>0:
    score_text_form.append("positive")
  elif score ==0:
    score_text_form.append("neutral")
  else:
    score_text_form.append("negative")
      

In [0]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cm = confusion_matrix(sentiment,score_text_form,labels=["positive", "negative", "neutral"])
print("The Google API is "+str(100*accuracy_score(sentiment,score_text_form))+"% accurate.")
print_cm(cm,["positive", "negative", "neutral"])

The Google API is 43.613138686131386% accurate.
       t/p   positive negative  neutral 
    positive    409.0     21.0     91.0 
    negative     59.0     18.0     41.0 
     neutral    212.0     37.0    290.0 


## Export as CSV for Experiment 4

In [0]:
label = []
for eachsent in sentiment:
  if eachsent == "positive":
    label.append(2)
  elif eachsent == "neutral":
     label.append(1)
  else:
     label.append(0)
import pandas as pd


google = {
         'text': text,
         'predict': score_text_form,
         'score': all_scores,
         'label': label
      }

df_ibm = pd.DataFrame.from_dict(google)
df_ibm.to_csv('google_result.csv')