In [7]:
import requests
import json
import pandas as pd
import numpy as np
import yaml
import nltk

# Get the API key from a yaml file

with open("api_key.yaml", "r") as f:
    config = yaml.safe_load(f)
API_KEY = config["API_KEY"]

# Get the data from the API

url = f"https://api.themoviedb.org/3/discover/movie?api_key={API_KEY}&sort_by=popularity.desc"
response = requests.get(url)


In [8]:
data = response.json()

In [17]:
def generator_review(movie_id):
    reviews = []
    for page in range(1, 6):
        url_reviews = f"https://api.themoviedb.org/3/movie/{movie_id}/reviews?api_key={API_KEY}&page={page}"
        response_reviews = requests.get(url_reviews)
        data_reviews = response_reviews.json()
        for review in data_reviews["results"]:
            reviews.append(review["content"])
    return reviews

In [32]:
nltk.tokenize.sent_tokenize("Hi here")

['Hi here']

In [46]:
all_sentences = []
for review in generator_review(502356):
    for sentence in nltk.tokenize.sent_tokenize(review.replace(' -', ' .').replace(',', '.').replace(':', '.')):
        all_sentences.append(sentence)

In [64]:
from nltk.sentiment import SentimentIntensityAnalyzer, vader

sia = SentimentIntensityAnalyzer()
sia.polarity_scores("I love this movie")
print(sia.polarity_scores("But the film is massively unfunny."))

via = vader.SentimentIntensityAnalyzer()
via.polarity_scores("I love this movie")
print(via.polarity_scores("But the film is massively unfunny."))

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


In [77]:
import transformers

classifier = transformers.pipeline('sentiment-analysis')


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Downloading config.json: 100%|██████████| 629/629 [00:00<00:00, 210kB/s]
Downloading pytorch_model.bin: 100%|██████████| 255M/255M [00:09<00:00, 29.1MB/s] 
Downloading tokenizer_config.json: 100%|██████████| 48.0/48.0 [00:00<00:00, 24.0kB/s]
Downloading vocab.txt: 100%|██████████| 226k/226k [00:00<00:00, 2.72MB/s]


In [85]:
def positivity_score(sentence):
    sign = 1 if classifier(sentence)[0]["label"] == "POSITIVE" else -1
    return classifier(sentence)[0]["score"] * sign

sentence_scores = [(sentence, positivity_score(sentence)) for sentence in all_sentences]


In [87]:
sentence_scores.sort(key=lambda x: x[1], reverse=True)

In [91]:
ordered_reviews = pd.DataFrame(sentence_scores, columns=["sentence", "score"])

In [55]:
sia.polarity_scores("I love this movie")

{'neg': 0.0, 'neu': 0.323, 'pos': 0.677, 'compound': 0.6369}

In [76]:


def model(sentence):
    return sia.polarity_scores(sentence)["compound"]

reviews = generator_review(502356)

explainer = shap.Explainer(model)
shap_values = explainer(reviews)

AttributeError: 'str' object has no attribute 'shape'

In [48]:
all_sentences

['_The Super Mario Bros. Movie_ is like Fruit Stripe Gum.',
 'It’s super colorful and eyecatching.',
 'but it seems to instantly lose its flavor and charm.',
 'The film is visually stunning and Jack Black is outstanding as Bowser.',
 'The big action sequences are like big budget versions of the Mario video games playthroughs with little welcome surprises thrown in.',
 'But the film is massively unfunny.',
 'the characters are extremely flat.',
 'and the flimsy writing is about as complex as an unkempt mustache.',
 '**Full review.',
 '** https.//boundingintocomics.com/2023/04/06/the-super-mario-bros-movie-review-plunging-rainbow-colored-nostalgia-to-death/',
 "I am not the demographic and to be honest this isn't really a film that I was ever going to like either.",
 'The story is beyond simple and the game-based animation .',
 'though expertly crafted .',
 'did absolutely nothing for me as ninety minutes rolled along devoid of characterisations.',
 'thrill or much sense of adventure.',


In [None]:
nltk.sent_tokenize(text)

In [25]:
# Create a dataframe from the data

df = pd.DataFrame(data['results'])

In [26]:
df

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,/9n2tJBplPbgR2ca05hS5CKXwP2c.jpg,"[16, 12, 10751, 14, 35]",502356,en,The Super Mario Bros. Movie,"While working underground to fix a water main,...",9065.306,/qNBAXBIQlnOThrVvA6mA2B5ggV6.jpg,2023-04-05,The Super Mario Bros. Movie,False,7.5,631
1,False,/5i6SjyDbDWqyun8klUuCxrlFbyw.jpg,"[18, 28]",677179,en,Creed III,"After dominating the boxing world, Adonis Cree...",5253.149,/cvsXj3I9Q2iyyIo95AecSd1tad7.jpg,2023-03-01,Creed III,False,7.3,924
2,False,/ovM06PdF3M8wvKb06i4sjW3xoww.jpg,"[878, 12, 28]",76600,en,Avatar: The Way of Water,Set more than a decade after the events of the...,4810.649,/t6HIqrRAclMCA60NsSmeqe9RmNV.jpg,2022-12-14,Avatar: The Way of Water,False,7.7,6947
3,False,/bT3IpP7OopgiVuy6HCPOWLuaFAd.jpg,"[35, 9648, 28]",638974,en,Murder Mystery 2,"After starting their own detective agency, Nic...",3571.437,/wdffZv8gIiWy6xr4t7hWBWtUwpl.jpg,2023-03-28,Murder Mystery 2,False,6.5,616
4,False,/wybmSmviUXxlBmX44gtpow5Y9TB.jpg,"[28, 35, 14]",594767,en,Shazam! Fury of the Gods,"Billy Batson and his foster siblings, who tran...",5827.392,/A3ZbZsmsvNGdprRi2lKgGEeVLEH.jpg,2023-03-15,Shazam! Fury of the Gods,False,7.0,723
5,False,/vSUls0b7dNhC7tJoExF1MBYWWyh.jpg,"[16, 35, 10751, 12, 14]",816904,es,Momias,"Through a series of unfortunate events, three ...",1725.865,/qVdrYN8qu7xUtsdEFeGiIVIaYd.jpg,2023-01-05,Mummies,False,7.1,143
6,False,/i8dshLvq4LE3s0v8PrkDdUyb1ae.jpg,"[28, 53, 80]",603692,en,John Wick: Chapter 4,"With the price on his head ever increasing, Jo...",1840.121,/vZloFAK7NmvMGKE7VkF5UHaz0I.jpg,2023-03-22,John Wick: Chapter 4,False,8.0,971
7,False,/a2tys4sD7xzVaogPntGsT1ypVoT.jpg,"[53, 35, 80]",804150,en,Cocaine Bear,"Inspired by a true story, an oddball group of ...",1550.773,/gOnmaxHo0412UVr1QM5Nekv1xPi.jpg,2023-02-22,Cocaine Bear,False,6.5,729
8,False,/5Y5pz0NX7SZS9036I733F7uNcwK.jpg,"[27, 9648]",758323,en,The Pope's Exorcist,"Father Gabriele Amorth, Chief Exorcist of the ...",1768.271,/9JBEPLTPSm0d1mbEcLxULjJq9Eh.jpg,2023-04-05,The Pope's Exorcist,False,7.1,38
9,False,/ouB7hwclG7QI3INoYJHaZL4vOaa.jpg,"[16, 10751, 14, 12, 35]",315162,en,Puss in Boots: The Last Wish,Puss in Boots discovers that his passion for a...,1878.108,/kuf6dutpsT0vSVehic3EZIqkOBt.jpg,2022-12-07,Puss in Boots: The Last Wish,False,8.3,5144
