In [1]:
#Loading and Reading CSV Files

In [2]:
# Add Matplotlib inline magic command
%matplotlib inline

# Add dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

In [3]:
#Files to load
games_data_to_load = "../Data/games_scores.csv"
movies_data_to_load = "../Data/movies_scores.csv"
music_data_to_load = "../Data/music_scores.csv"
tv_data_to_load = "../Data/tv_scores.csv"


In [4]:
#Read data
games_data_df = pd.read_csv(games_data_to_load)
movies_data_df = pd.read_csv(movies_data_to_load)
music_data_df = pd.read_csv(music_data_to_load)
tv_data_df = pd.read_csv(tv_data_to_load)

In [5]:
# Display the data table for preview
music_data_df.head()

Unnamed: 0,id,artist,metascore,release_date,sort_no,summary,title,user_score
0,13354,Wadada Leo Smith,99,"May 22, 2012",1,The four-disc set from the jazz trumpeter was ...,Ten Freedom Summers,4.2
1,37516,Fiona Apple,98,"April 17, 2020",2,The fifth full-length studio release for the a...,Fetch the Bolt Cutters,8.1
2,1671,Brian Wilson,97,"September 28, 2004",3,"Well, better 37 years late than never. Origina...",SMiLE,7.9
3,1538,Loretta Lynn,97,"April 27, 2004",4,It's been over 40 years since she released her...,Van Lear Rose,8.4
4,22163,Kendrick Lamar,96,"March 16, 2015",5,The third full-length studio release from the ...,To Pimp A Butterfly,8.9


In [6]:
#Drop "tbd" user_score rows before calculating the weighted average
games = games_data_df[games_data_df.user_score!="tbd"]
movies = movies_data_df[movies_data_df.user_score!="tbd"]
music = music_data_df[music_data_df.user_score!="tbd"]
tv = tv_data_df[tv_data_df.user_score!="tbd"]

In [7]:
games.drop(columns=["id"], inplace=True)
movies.drop(columns=["id"], inplace=True)
music.drop(columns=["id"], inplace=True)
tv.drop(columns=["id"], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [8]:
#rename "sort_no" column so that we can reference the original rank after
games = games.rename(columns={'sort_no': 'original_metarank'})
movies = movies.rename(columns={'sort_no': 'original_metarank'})
music = music.rename(columns={'sort_no': 'original_metarank'})
tv = tv.rename(columns={'sort_no': 'original_metarank'})

In [9]:
music.head()

Unnamed: 0,artist,metascore,release_date,original_metarank,summary,title,user_score
0,Wadada Leo Smith,99,"May 22, 2012",1,The four-disc set from the jazz trumpeter was ...,Ten Freedom Summers,4.2
1,Fiona Apple,98,"April 17, 2020",2,The fifth full-length studio release for the a...,Fetch the Bolt Cutters,8.1
2,Brian Wilson,97,"September 28, 2004",3,"Well, better 37 years late than never. Origina...",SMiLE,7.9
3,Loretta Lynn,97,"April 27, 2004",4,It's been over 40 years since she released her...,Van Lear Rose,8.4
4,Kendrick Lamar,96,"March 16, 2015",5,The third full-length studio release from the ...,To Pimp A Butterfly,8.9


In [10]:
#Check column data types
music.dtypes

artist               object
metascore             int64
release_date         object
original_metarank     int64
summary              object
title                object
user_score           object
dtype: object

In [11]:
#Convert the user score column to a numberic
games["user_score"] = pd.to_numeric(games["user_score"])
movies["user_score"] = pd.to_numeric(movies["user_score"])
music["user_score"] = pd.to_numeric(music["user_score"])
tv["user_score"] = pd.to_numeric(tv["user_score"])

In [12]:
#Multiply the user score column by 10 to match the metascore
games.user_score = games.user_score.multiply(10)
movies.user_score = movies.user_score.multiply(10)
music.user_score = music.user_score.multiply(10)
tv.user_score = tv.user_score.multiply(10)

In [13]:
#Covert the metascore column to a float to match the user score
games["metascore"] = games["metascore"].astype(np.float64)
movies["metascore"] = movies["metascore"].astype(np.float64)
music["metascore"] = music["metascore"].astype(np.float64)
tv["metascore"] = tv["metascore"].astype(np.float64)

In [14]:
music.dtypes

artist                object
metascore            float64
release_date          object
original_metarank      int64
summary               object
title                 object
user_score           float64
dtype: object

In [15]:
music.head()

Unnamed: 0,artist,metascore,release_date,original_metarank,summary,title,user_score
0,Wadada Leo Smith,99.0,"May 22, 2012",1,The four-disc set from the jazz trumpeter was ...,Ten Freedom Summers,42.0
1,Fiona Apple,98.0,"April 17, 2020",2,The fifth full-length studio release for the a...,Fetch the Bolt Cutters,81.0
2,Brian Wilson,97.0,"September 28, 2004",3,"Well, better 37 years late than never. Origina...",SMiLE,79.0
3,Loretta Lynn,97.0,"April 27, 2004",4,It's been over 40 years since she released her...,Van Lear Rose,84.0
4,Kendrick Lamar,96.0,"March 16, 2015",5,The third full-length studio release from the ...,To Pimp A Butterfly,89.0


In [16]:
#Calculate the weighted average of metascores and user scores combined
games_weight_average = games["user_score"]*.6 + games["metascore"]*.4
movies_weight_average = movies["user_score"]*.6 + movies["metascore"]*.4
music_weight_average = music["user_score"]*.6 + music["metascore"]*.4
tv_weight_average = tv["user_score"]*.6 + tv["metascore"]*.4

In [17]:
#Add weighted average column to data frames
games = games.assign(weighted_average = games_weight_average)
movies = movies.assign(weighted_average = movies_weight_average)
music = music.assign(weighted_average = music_weight_average)
tv = tv.assign(weighted_average = tv_weight_average)

In [18]:
music.head()

Unnamed: 0,artist,metascore,release_date,original_metarank,summary,title,user_score,weighted_average
0,Wadada Leo Smith,99.0,"May 22, 2012",1,The four-disc set from the jazz trumpeter was ...,Ten Freedom Summers,42.0,64.8
1,Fiona Apple,98.0,"April 17, 2020",2,The fifth full-length studio release for the a...,Fetch the Bolt Cutters,81.0,87.8
2,Brian Wilson,97.0,"September 28, 2004",3,"Well, better 37 years late than never. Origina...",SMiLE,79.0,86.2
3,Loretta Lynn,97.0,"April 27, 2004",4,It's been over 40 years since she released her...,Van Lear Rose,84.0,89.2
4,Kendrick Lamar,96.0,"March 16, 2015",5,The third full-length studio release from the ...,To Pimp A Butterfly,89.0,91.8


In [19]:
music.dtypes

artist                object
metascore            float64
release_date          object
original_metarank      int64
summary               object
title                 object
user_score           float64
weighted_average     float64
dtype: object

In [20]:
#Sort records by the weighted average of scores
music.sort_values(by='weighted_average', ascending=False)

Unnamed: 0,artist,metascore,release_date,original_metarank,summary,title,user_score,weighted_average
4,Kendrick Lamar,96.0,"March 16, 2015",5,The third full-length studio release from the ...,To Pimp A Butterfly,89.0,91.8
729,RAYE,88.0,"February 3, 2023",141,The debut full-length release for London-based...,My 21st Century Blues,93.0,91.0
8,Outkast,95.0,"October 31, 2000",9,"Featuring the hit singles ""B.O.B."" and ""Ms. Ja...",Stankonia,88.0,90.8
16,Madvillain,93.0,"March 23, 2004",17,'Madvillainy' is a collaboration between rappe...,Madvillainy,89.0,90.6
38,Weyes Blood,91.0,"April 5, 2019",39,The fourth full-length release for Natalie Mer...,Titanic Rising,90.0,90.4
...,...,...,...,...,...,...,...,...
13144,Viva Brother,34.0,"August 1, 2011",13318,"The debut album for the British pop rock band,...",Famous First Words,25.0,28.6
13142,Dirty Vegas,35.0,"November 30, 2004",13316,The UK dance trio returns with a sophomore rel...,One,24.0,28.4
13136,6ix9ine,38.0,"November 27, 2018",13310,The debut full-length studio release for the r...,DUMMY BOY,18.0,26.0
13119,Soulja Boy Tell Em,42.0,"December 16, 2008",13292,The follow-up to Souljboytellem.com was produc...,iSouljaBoyTellem,14.0,25.2


In [21]:
games['adjusted_metarank'] = games['weighted_average'].rank(ascending=False)
movies['adjusted_metarank'] = movies['weighted_average'].rank(ascending=False)
music['adjusted_metarank'] = music['weighted_average'].rank(ascending=False)
tv['adjusted_metarank'] = tv['weighted_average'].rank(ascending=False)

In [22]:
#Sort files by their adjusted metarank
music.sort_values(by='adjusted_metarank', ascending=True)

Unnamed: 0,artist,metascore,release_date,original_metarank,summary,title,user_score,weighted_average,adjusted_metarank
4,Kendrick Lamar,96.0,"March 16, 2015",5,The third full-length studio release from the ...,To Pimp A Butterfly,89.0,91.8,1.0
729,RAYE,88.0,"February 3, 2023",141,The debut full-length release for London-based...,My 21st Century Blues,93.0,91.0,2.0
8,Outkast,95.0,"October 31, 2000",9,"Featuring the hit singles ""B.O.B."" and ""Ms. Ja...",Stankonia,88.0,90.8,3.0
16,Madvillain,93.0,"March 23, 2004",17,'Madvillainy' is a collaboration between rappe...,Madvillainy,89.0,90.6,4.0
38,Weyes Blood,91.0,"April 5, 2019",39,The fourth full-length release for Natalie Mer...,Titanic Rising,90.0,90.4,5.5
...,...,...,...,...,...,...,...,...,...
13144,Viva Brother,34.0,"August 1, 2011",13318,"The debut album for the British pop rock band,...",Famous First Words,25.0,28.6,9407.5
13142,Dirty Vegas,35.0,"November 30, 2004",13316,The UK dance trio returns with a sophomore rel...,One,24.0,28.4,9409.0
13136,6ix9ine,38.0,"November 27, 2018",13310,The debut full-length studio release for the r...,DUMMY BOY,18.0,26.0,9410.0
13119,Soulja Boy Tell Em,42.0,"December 16, 2008",13292,The follow-up to Souljboytellem.com was produc...,iSouljaBoyTellem,14.0,25.2,9411.0


In [23]:
movies.sort_values(by='adjusted_metarank', ascending=True)

Unnamed: 0,metascore,rating,release_date,original_metarank,summary,title,user_score,weighted_average,adjusted_metarank
101,100.0,R,"March 24, 1972",2,Francis Ford Coppola's epic features Marlon Br...,The Godfather,92.0,95.2,1.0
104,100.0,TV-PG,"January 23, 1943",5,"A Casablanca, Morocco casino owner in 1941 she...",Casablanca,88.0,92.8,2.0
103,100.0,TV-G,"September 1, 1954",4,A wheelchair-bound photographer spies on his n...,Rear Window,87.0,92.2,3.0
145,96.0,PG,"September 20, 2002",46,"A young girl, Chihiro, becomes trapped in a st...",Spirited Away,89.0,91.8,4.0
111,99.0,Passed,"March 7, 1931",12,The Tramp (Charlie Chaplin) struggles to help ...,City Lights,87.0,91.8,5.0
...,...,...,...,...,...,...,...,...,...
15276,11.0,PG-13,"April 22, 2005",15277,When a wealthy businessman is faced with a div...,King's Ransom,14.0,12.8,12112.0
15319,1.0,,"November 14, 2003",15320,"Two lovers, killed during the Holocaust, are r...",The Singing Forest,20.0,12.4,12113.0
15304,8.0,Not Rated,"July 18, 2014",15305,An annual 4th of July weekend hosted by troubl...,Among Ravens,9.0,8.6,12114.0
15320,1.0,PG,"August 22, 1987",15321,Dodger must confront the struggles of life as ...,The Garbage Pail Kids Movie,7.0,4.6,12115.5


In [24]:
games.sort_values(by='adjusted_metarank', ascending=True)

Unnamed: 0,metascore,platform,release_date,original_metarank,summary,title,user_score,weighted_average,adjusted_metarank
100,99.0,Nintendo 64,"November 23, 1998",1,"As a young boy, Link is tricked by Ganondorf, ...",The Legend of Zelda: Ocarina of Time,91.0,94.2,1.5
142,96.0,PlayStation 2,"October 25, 2005",43,In Resident Evil 4 players are reacquainted wi...,Resident Evil 4,93.0,94.2,1.5
64,91.0,PlayStation,"November 11, 1999",365,A month and a half have passed since the mansi...,Resident Evil 3: Nemesis,96.0,94.0,4.0
21,91.0,GameCube,"April 30, 2002",322,Raccoon City has been completely overrun by mu...,Resident Evil (2002),96.0,94.0,4.0
608,94.0,Dreamcast,"February 29, 2000",109,Resident Evil Code: Veronica takes up the stor...,Resident Evil Code: Veronica,94.0,94.0,4.0
...,...,...,...,...,...,...,...,...,...
20007,22.0,Xbox 360,"May 21, 2013",20008,Fast & Furious: Showdown takes some of the fra...,Fast & Furious: Showdown,13.0,16.6,18636.0
20016,17.0,Xbox 360,"April 5, 2013",20017,A remake the 80's Double Dragon II game. Team ...,Double Dragon II: Wander of the Dragons,16.0,16.4,18637.0
20013,19.0,PC,"June 11, 2015",20014,A dire curse has shrouded the town of Lorwich ...,Alone in the Dark: Illumination,13.0,15.4,18638.5
20014,19.0,Xbox 360,"June 25, 2013",20015,The game is set in the last years of the roari...,Ride to Hell: Retribution,13.0,15.4,18638.5


In [25]:
tv.sort_values(by='adjusted_metarank', ascending=True)

Unnamed: 0,metascore,release_date,original_metarank,summary,title,user_score,weighted_average,adjusted_metarank
4,99.0,"July 15, 2012",5,The final season for the award-winning drama b...,Breaking Bad: Season 5,97.0,97.8,1.0
33,95.0,"July 11, 2022",34,The second half of the sixth and final season ...,Better Call Saul: Season 6.5,97.0,96.2,2.0
21,96.0,"July 17, 2011",22,Walter (Bryan Cranston) has more trouble on hi...,Breaking Bad: Season 4,95.0,95.4,3.0
37,94.0,"April 18, 2022",38,The sixth and final season of the Breaking Bad...,Better Call Saul: Season 6,94.0,94.0,4.0
9,98.0,"September 19, 2004",10,In chronicling a multi-generational family bus...,The Wire: Season 3,91.0,93.8,5.5
...,...,...,...,...,...,...,...,...
3134,39.0,"September 25, 2017",3135,The morning talk show hosted by former Fox New...,Megyn Kelly Today: Season 1,9.0,21.0,2993.5
3240,30.0,"September 14, 2011",3241,Mario Lopez hosts this reality show where cele...,H8R: Season 1,15.0,21.0,2993.5
3108,40.0,"January 24, 2020",3109,The documentary series hosted by Gwyneth Paltr...,The Goop Lab with Gwyneth Paltrow: Season 1,8.0,20.8,2995.0
3190,35.0,"September 18, 2018",3191,Former Celebrity Apprentice contestant Tom Arn...,The Hunt for the Trump Tapes with Tom Arnold: ...,9.0,19.4,2996.0


In [26]:
#Files to save

from pathlib import Path  
filepath1 = Path("../Data/adjusted_games_scores.csv") 
filepath1.parent.mkdir(parents=True, exist_ok=True)  
games.to_csv(filepath1)  

In [27]:
filepath2 = Path("../Data/adjusted_movies_scores.csv") 
filepath2.parent.mkdir(parents=True, exist_ok=True)  
movies.to_csv(filepath2)  

In [28]:
filepath3 = Path("../Data/adjusted_music_scores.csv") 
filepath3.parent.mkdir(parents=True, exist_ok=True)  
music.to_csv(filepath3)  

In [29]:
filepath4 = Path("../Data/adjusted_tv_scores.csv") 
filepath4.parent.mkdir(parents=True, exist_ok=True)  
tv.to_csv(filepath4)  