-
Notifications
You must be signed in to change notification settings - Fork 1
/
sentimentanalysis_chunks_final.py
73 lines (56 loc) · 2.5 KB
/
sentimentanalysis_chunks_final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#open Google Cloud NL module downloaded via pip3
from google.cloud import language
import json
#before you run this you need to download your .json file key from Google Cloud NL (need account)
#instructions via https://www.freecodecamp.org/news/how-to-make-your-own-sentiment-analyzer-using-python-and-googles-natural-language-api-9e91e1c493e/
#once you have the json key file you need to set it in the Terminal
#export GOOGLE_APPLICATION_CREDENTIALS=path/to/the/json.key
#this is the function that talks to the API, it returns a dictonary with the text, score and magnitude
def analyze_text_sentiment(text):
client = language.LanguageServiceClient()
document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT)
response = client.analyze_sentiment(document=document)
sentiment = response.document_sentiment
results = {
'text':text,
'score':sentiment.score,
'magnitude': sentiment.magnitude
}
return results
#here is where you use a function to break the screenplay text into 'chunks'
def chunks(lst, n):
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i:i + n]
#open the json screenplay database file you want to run sentiment analysis on
#change for different movies
with open('ItHappenedOneNight.json', 'r') as database:
data = json.load(database)
for screen_play in data:
screen_play_text = screen_play['screenplay_text']
#clean the text
all_text_lines_clean = []
for text_line in screen_play_text:
text_line = " ".join(text_line.split())
if text_line.upper() == text_line:
continue
all_text_lines_clean.append(text_line)
#make a new place to store all the results
#you can change the amount of lines you want in the chunk per the number below
clean_chunks = chunks(all_text_lines_clean, 175)
clean_chunks = list(clean_chunks)
screen_play['line_results'] = []
counter = 0
for line in clean_chunks:
line = " ".join(line)
#send it to the API
api_results = analyze_text_sentiment(line)
print(api_results)
screen_play['line_results'].append(api_results)
counter+=1
print('on line ', counter, 'of', len(all_text_lines_clean))
#designate new file name +"_with_score.json" for new results dictionary
with open('ItHappenedOneNight_with_score.json', 'w') as out:
json.dump(data, out, indent=2)
#only do one title per file (if more than one), remove this to keep going
break