# Sentiment Analysis
Analysis the emotion of a document by counting the number of words that are positive, negative, or neutral.

#### setup:
Read from a dictionary that assign a emotion to each word

#### map:
Read from input, for each word in input, return (word_feeling, 1)

#### reduce:
Count number of positive, negative, and neutral words.

In [1]:
from pyspark import SparkContext, SparkConf
sc = SparkContext()

In [2]:
# prepare the emotion dictionary for dictionary file
# file format: word\tfeeling\n

emotion_dict = {}
with open('sentimentAnalysis/emotionCategory.txt', 'r') as dict_file:
    for line in dict_file:
        word, feeling = line.strip().split('\t')
        emotion_dict[word.lower()] = feeling

In [3]:
# read all articles in input folder

from os import listdir
from os.path import isfile, join

input_path = 'sentimentAnalysis/input'
input_files = [f for f in listdir(input_path) if isfile(join(input_path, f))]
results = []

# map to emotions
def split_line(line):
    return line.strip().split()

def create_feelings_pairs(word):
    if word.lower() in emotion_dict:
        return (emotion_dict[word.lower()], 1)
    else:
        return ('neutral', 0)

for input_file in input_files:
    print join(input_path, input_file)
    text_RDD = sc.textFile(join(input_path, input_file))
    feelings_RDD = text_RDD.flatMap(split_line).map(create_feelings_pairs)
    
    file_emotion_RDD = feelings_RDD.reduceByKey(lambda a, b: a + b)
    emotion_result = file_emotion_RDD.collect()
    
    # prepare output
    result = {}
    result['title'] = input_file
    result['data'] = {}
    for pair in emotion_result:
        result['data'][pair[0]] = pair[1]
    results.append(result)

sentimentAnalysis/input/ALL'S WELL THAT ENDS WELL
sentimentAnalysis/input/PerfectPositiveThinking
sentimentAnalysis/input/SONNETS


In [4]:
# save as a json file
import json

with open('sentimentAnalysis/results.json', 'w') as f:
    json.dump(results, f)