In [None]:
#retrieving data from MongoDB

import json

with open('/Users/wasilhassan/Desktop/sort/credentials.json') as f:
    data = json.load(f)

secret_key = data['mongodb']

print(f"My secret key is {len(secret_key)} characters in lenght.")

In [None]:
import pymongo
import certifi
import pandas as pd
import re

mongo_connection_string = data['mongodb']

client = pymongo.MongoClient(mongo_connection_string, tlsCAFile=certifi.where())

# Fetch the database named "DA320"
db = client.my_database

# Within the database we have "collections". Think of them as tables in SQL.
allCollections = db.list_collection_names()

# Here is the list of collections within my database
print(f"Using MongoDB version {client.server_info()['version']}.")
print(f"This database has the collections {allCollections}")

In [None]:
#Find data through pipeline view in 2010
db = client['my_database']
view = db['try_1']
data = list(view.find({}))


#convert data into data frame

df = pd.DataFrame(data)
df['release_date'] = pd.to_datetime(df['release_date'])
df = df[df['release_date'].dt.year == 2010]

#Find data in Metacritic in the same year
metacritic_view = pd.DataFrame(db.metacritic.find({ "release_date": re.compile("2010")}))

#convert value to number
metacritic_view.score = pd.to_numeric(metacritic_view.score, errors = 'coerce')
metacritic_view.release_date = pd.to_datetime(metacritic_view.release_date, errors = 'coerce')

print(df)



In [None]:
#Merge the two datasets together using an inner join

unified_view = pd.merge(df, metacritic_view, how="inner", on="title")
unified_view.head()

In [None]:
import transformers
classifier = transformers.pipeline("fill-mask", model="distilbert-base-uncased")

In [None]:
prompt = "movies is [MASK]"
result = classifier(prompt)

for prediction in result:
    print(prediction['sequence'])
    print(prediction['token_str'])
    print(prediction['score'])

In [None]:
predictions = []
for _, row in unified_view.iterrows():
    plot_summary = row["description"]

    # Create the prompt to classify the movie as "happy" or "sad"
    prompt = f"This movie is about {plot_summary}. The movie has a [MASK] ending."

    # Use the classifier to predict the sentiment of the movie
    result = classifier(prompt)

    # Get the top predicted sentiment and its score
    predicted_sentiment = result[0]["token_str"]
    sentiment_score = result[0]["score"]

    # Add the prediction to the list
    predictions.append(predicted_sentiment)

# Add the predictions to the dataset as a new column
unified_view["Sentiment"] = predictions

# Print the updated dataset
print(unified_view)

In [None]:
import plotly.express as px

unified_view['score'] = unified_view['score'] / 10

# Reshape the data
df_melt = pd.melt(unified_view, id_vars='Sentiment', value_vars=['user_rating', 'score'], 
                  var_name='rating_type', value_name='rating_value')

# Create boxplot
fig = px.box(df_melt, x="Sentiment", y="rating_value", color="rating_type")
fig.show()

In [None]:
#This graph tells the user rating and score of movies with different type of endings. I can deduct from here that movies with happy endings has a median score of 5.2 and a median user rating of 6.3. 


In [None]:
import pandas as pd
import plotly.express as px

# Convert 'release_date_y' to datetime if it's not already
unified_view['release_date_y'] = pd.to_datetime(unified_view['release_date_y'])

# Extract the month and create a new 'Month' column
unified_view['Month'] = unified_view['release_date_y'].dt.month

# Group the data by 'Month' and 'Sentiment' and count the number of sentiments for each month
df_grouped = unified_view.groupby(['Month', 'Sentiment']).size().reset_index(name='Count')

# Create a bar plot
fig = px.bar(df_grouped, x='Month', y='Count', color='Sentiment', barmode='group')
fig.show()


In [None]:
#From this graph I can tell that december was the month when most movies with tragic ending were released, with a total of 18 movies. And June was the month when most movies with happy ending were released, with a total of 9 movies. 

In [None]:
import plotly.express as px

# Create boxplot
fig = px.box(unified_view, x="Sentiment", y="runtime")
fig.show()

In [None]:
#This graph tells us about the runtime of movies with different types of endings. 