# <div class="alert alert-block alert-info"> Sentimental Analysis of Donald Trump's tweets </div>

In [1]:
import re

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
tweets_file = open('Trump_Raw_Tweets_Fall22.txt','r')

dictionary={}
lines=[]
for line in tweets_file:
    lower_line=line.lower()
    no_url_line = re.sub(r'http\S+|\'b\'|"b\'|\'b"',r'', lower_line)   #removes URLs and irrelevant text
    no_unicode_line=re.sub(r'\\xe2\\x80\\x99',"'",no_url_line)  #replaced unicode quote value with quote
    no_unicode_line=re.sub(r'\\x[0-9a-f]{2}',r'',no_unicode_line)   #removes unicode characters
    lines.extend(no_unicode_line.split('\\n')) #splits text file from '\n' into multi lines
    
    
#Removes punctuations from line and splits lines to form dictionary containing all words and their count    
for line in lines: 
    list1=line.strip().split()
    for w in list1:
        w1=w.strip("-\(.!,?\) '\"/:_;\\")
        if len(w1)> 0:
            if w1 not in dictionary:
                dictionary[w1]=1
            else:
                dictionary[w1]=dictionary[w1]+1
                
                
total_words=0
for v in dictionary.values():
    total_words+=v
print('Total number of words after data cleaning: ', total_words)

Total number of words after data cleaning:  56264


In [4]:
#Removing stop words from dictionary

stop_file = open('stop_words.txt','r')
stop_words=set()
for line in stop_file:
    stop_words.add(line.rstrip('\n'))
    
unique_stop=0
total_stop=0
for s in stop_words:
    if len(s) == 0 or s in dictionary:
        total_stop+=dictionary[s]
        del dictionary[s]
        unique_stop+=1

print('Unique stop words:',unique_stop)
print('Total stop words: ', total_stop)

Unique stop words: 586
Total stop words:  28221


In [5]:
#Word count after removing stop words

word_count_without_stopwords=0
for v in dictionary.values():
    word_count_without_stopwords+=v

print('Word count after removing stop words: ', word_count_without_stopwords)

Word count after removing stop words:  28043


In [6]:
#Counting number of positive, negative and other words

positive_file = open('positive.txt','r')
positive_words=set()
for line in positive_file:
    positive_words.add(line.rstrip('\n'))
unique_positive=0
total_positive=0

negative_file = open('negative.txt','r')
negative_words=set()
for line in negative_file:
    negative_words.add(line.rstrip('\n'))
unique_negative=0
total_negative=0

unique_others=0
total_others=0

for d in dictionary:
    if d in positive_words:
        unique_positive+=1
        total_positive+=dictionary[d]
    elif d in negative_words:
        unique_negative-=1
        total_negative-=dictionary[d]
    else:
        unique_others+=1
        total_others+=dictionary[d]
        
print('Unique positive words:', unique_positive)
print('Total positive words: ', total_positive)

print('Unique negative words:',unique_negative)
print('Total negative words: ', total_negative)

print('Unique other words: ',unique_others)
print('Total other words: ', total_others)

Unique positive words: 211
Total positive words:  2611
Unique negative words: -479
Total negative words:  -2346
Unique other words:  4834
Total other words:  23086


In [7]:
#Checking total

total2 = total_positive+abs(total_negative)+total_others+total_stop
print(total2)

56264


## <div class="alert alert-block alert-info"> Ratio:</div>

In [8]:
def ratio(a,b):
    r = (a/b)
    return round(r, 3)

print('Ratio of positive words to total words: ', ratio(total_positive,total_words))

print('Ratio of negative to total words: ', ratio(abs(total_negative), total_words))

print('Ratio of stop words to total words: ', ratio(total_stop, total_words))

print('Ratio of other words to total words: ', ratio(total_others, total_words))

print('Ratio of positive words to negative words: ', ratio(total_positive, abs(total_negative)))

Ratio of positive words to total words:  0.046
Ratio of negative to total words:  0.042
Ratio of stop words to total words:  0.502
Ratio of other words to total words:  0.41
Ratio of positive words to negative words:  1.113


In [9]:
#Closing files

positive_file.close()
negative_file.close()
stop_file.close()
tweets_file.close()

## <div class="alert alert-block alert-info">Analysis: </div> 

1. What’s the word count for positive/negative/stop word/others?

    - Positive words: 2611
    - Negative words: 2346
    - Stop words: 28221
    - Other words: 23086
    
    
2. What’s the ratio of positive/negative/stop word/others compare to the total word count?

    - Ratio of positive words to total words: 0.046
    - Ratio of negative words to total words: 0.042
    - Ratio of stop words to total words: 0.502
    - Ratio of other words to total words: 0.41
    
    
3. What’s the ratio for positive vs negative word count?

    - Ratio of positive to negative words: 1.113
    
    
4. Do you think that the general sentiment is negative or positive? Weakly or strongly?

    - The positive sentiment score is 2611 and negative sentiment score is -2346. Here, positive sentiment score is more than negative sentiment score which is also reflected in their ratio, i.e., 1.113. This means that positive sentiment is more by 11.3% than negative sentiment which meakes a weak inclination towards positive sentiments.

## <div class="alert alert-block alert-info">Conclusion: </div> 

From above analysis it can be said that the overall sentimental analysis of this data set is positive. However, this analysis is only based on calculations and not on analyzing real sentiment behind each word or phrases. For example, 'He is not bad.' This line has a positive meaning but uses two negative words. 
Also, the fundamental flaw in this problem is the word 'trump'. Trump itself is a negative word which distorts the sentiment analysis to some extent. 