# off shelf API 

In [5]:
import pandas as pd

#create input and output directories
import os
inputpath = 'input'
outputpath = 'outputs'
if os.path.exists(inputpath) is False:
    os.mkdir(inputpath)
if os.path.exists(outputpath) is False:
    os.mkdir(outputpath)
    
#input file path
sentiment140_file = 'input/training.1600000.processed.noemoticon.csv'
   
#read csv file
colnames = ['polarity', 'id', 'post_datetime', 'query', 'user', 'tweet']
df_tweets = pd.read_csv(sentiment140_file,
                encoding='UTF', names=colnames, encoding_errors='ignore')

# get 1600 tweets
df = df_tweets[['polarity','tweet']].sample(n=1600, random_state=0)
df.to_csv("outputs/selected_tweets1600.csv", index=False)

In [6]:
df_tweets.polarity.value_counts()

0    800000
4    800000
Name: polarity, dtype: int64

In [7]:
df.polarity.value_counts()

0    811
4    789
Name: polarity, dtype: int64

## Setting up

In [10]:
from configparser import ConfigParser
config = ConfigParser()
config.read('input/keys_config.cfg')
API_KEY = config.get('azure', 'api_key')
ENDPOINT = config.get('azure', 'endpoint')

def client():
    try:
        client = TextAnalyticsClient(
            endpoint=ENDPOINT,
            credential=AzureKeyCredential(API_KEY)
        )
        return client
    except Exception as e:
        print(e)
        return

client = client()

In [52]:
import os
import numpy as np
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient

 ## Request functions

In [38]:
def azure_sentiment(input_text,target_lang):
    input_text = [input_text] if isinstance(input_text, str) else input_text
    
    if input_text != '':
        response = client.analyze_sentiment(
            documents=input_text,
            language=target_lang,
            show_opinion_mining=True,
        )
        successful_responses = [doc for doc in response if not doc.is_error]
        result = successful_responses
    
    return result

In [48]:
 df.tweet.values

array(['wants to compete! i want hard competition! i want to rally. i want to feel the power coming out of the engine! i want to compete ',
       'It seems we are stuck on the ground in Amarillo. They have put a ground stop for all flights leaving for Denver. Said updates in an hour ',
       'where the f are my pinking shears? rarararrrarararr...babyproofing while cutting stuff makes me stick shears random places &amp; forget them ',
       ...,
       'Laptop got a virus, kept crashing, had to restore factory settings.  I lost everything.',
       '@oOoshecutee hey hey now!! ',
       '@MagnusApollo maybe not as frustrating as your pyramid maze, hate '],
      dtype=object)

In [42]:
def detailed_result(result):
    for doc in result:
        print("overall sentiment:", doc.sentiment)
        print(' positive_score', doc.confidence_scores.positive,
                 ' neutral_score', doc.confidence_scores.neutral,
                 ' negative_score', doc.confidence_scores.negative)
        print("\n***break down the analysis by each sentence***")
        sentences = doc.sentences
        for ind, sentence in enumerate(sentences):
            print(' sentence #', ind + 1, ":", sentence.text)
            print(' positive_score', sentence.confidence_scores.positive,
                     ' neutral_score', sentence.confidence_scores.neutral,
                     ' negative_score', sentence.confidence_scores.negative)
            # opinion mining result
            for mined_opinion in sentence.mined_opinions:
                target = mined_opinion.target
                print("--", target.sentiment,  "target text:",  target.text)

                for assessment in mined_opinion.assessments:
                    print(assessment.sentiment, "assessment text:", assessment.text)
        
        print('-'*100)

In [43]:
test = ['Tonight is the night.', 'I love it']
azure_sentiment(test,'en')
detailed_result(azure_sentiment(test,'en'))

overall sentiment: neutral
 positive_score 0.23  neutral_score 0.69  negative_score 0.08

***break down the analysis by each sentence***
 sentence # 1 : tonight is the night
 positive_score 0.23  neutral_score 0.69  negative_score 0.08
----------------------------------------------------------------------------------------------------
overall sentiment: positive
 positive_score 0.99  neutral_score 0.01  negative_score 0.0

***break down the analysis by each sentence***
 sentence # 1 : i love it
 positive_score 0.99  neutral_score 0.01  negative_score 0.0
----------------------------------------------------------------------------------------------------


## getting results for 1600 tweets

In [55]:
col_name = ["overall sentiment", "positive_score","neutral_score","negative_score"]
df_result = pd.DataFrame(np.zeros(shape=(1600,4)), columns=col_name)
for i, tweet in enumerate(df.tweet.values):
    result = azure_sentiment(tweet,'en')
    for doc in result:
         df_result.iloc[i] = [doc.sentiment,
                              doc.confidence_scores.positive,
                              doc.confidence_scores.neutral,
                              doc.confidence_scores.negative]
df_result

Unnamed: 0,overall sentiment,positive_score,neutral_score,negative_score
0,negative,0.11,0.10,0.79
1,negative,0.01,0.30,0.69
2,negative,0.04,0.20,0.76
3,positive,0.48,0.31,0.21
4,neutral,0.24,0.70,0.06
...,...,...,...,...
1595,positive,0.90,0.03,0.07
1596,positive,0.98,0.01,0.00
1597,negative,0.00,0.02,0.97
1598,neutral,0.28,0.56,0.16


In [56]:
df_result["overall sentiment"].value_counts()

positive    563
negative    553
neutral     326
mixed       158
Name: overall sentiment, dtype: int64

In [None]:
df_result.to_csv("outputs/azure_sentiment.csv", index=False) 