In [1]:
#Loading and Reading CSV Files

In [2]:
# Add Matplotlib inline magic command
%matplotlib inline

# Add dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

In [3]:
#Files to load
games_data_to_load = "../Data/games_scores.csv"
movies_data_to_load = "../Data/movies_scores.csv"
music_data_to_load = "../Data/music_scores.csv"
tv_data_to_load = "../Data/tv_scores.csv"


In [4]:
#Read data
games_data_df = pd.read_csv(games_data_to_load)
movies_data_df = pd.read_csv(movies_data_to_load)
music_data_df = pd.read_csv(music_data_to_load)
tv_data_df = pd.read_csv(tv_data_to_load)

In [5]:
# Display the data table for preview
music_data_df.head()

Unnamed: 0,id,artist,metascore,release_date,sort_no,summary,title,user_score
0,13354,Wadada Leo Smith,99,"May 22, 2012",1,The four-disc set from the jazz trumpeter was ...,Ten Freedom Summers,4.2
1,37516,Fiona Apple,98,"April 17, 2020",2,The fifth full-length studio release for the a...,Fetch the Bolt Cutters,8.1
2,1671,Brian Wilson,97,"September 28, 2004",3,"Well, better 37 years late than never. Origina...",SMiLE,7.9
3,1538,Loretta Lynn,97,"April 27, 2004",4,It's been over 40 years since she released her...,Van Lear Rose,8.4
4,22163,Kendrick Lamar,96,"March 16, 2015",5,The third full-length studio release from the ...,To Pimp A Butterfly,8.9


In [6]:
#Drop "tbd" user_score rows before calculating the weighted average
games = games_data_df[games_data_df.user_score!="tbd"]
movies = movies_data_df[movies_data_df.user_score!="tbd"]
music = music_data_df[music_data_df.user_score!="tbd"]
tv = tv_data_df[tv_data_df.user_score!="tbd"]

In [7]:
#rename "sort_no" column so that we can reference the original rank after
music = music.rename(columns={'sort_no': 'original_sort_no'})

In [8]:
music.head()

Unnamed: 0,id,artist,metascore,release_date,original_sort_no,summary,title,user_score
0,13354,Wadada Leo Smith,99,"May 22, 2012",1,The four-disc set from the jazz trumpeter was ...,Ten Freedom Summers,4.2
1,37516,Fiona Apple,98,"April 17, 2020",2,The fifth full-length studio release for the a...,Fetch the Bolt Cutters,8.1
2,1671,Brian Wilson,97,"September 28, 2004",3,"Well, better 37 years late than never. Origina...",SMiLE,7.9
3,1538,Loretta Lynn,97,"April 27, 2004",4,It's been over 40 years since she released her...,Van Lear Rose,8.4
4,22163,Kendrick Lamar,96,"March 16, 2015",5,The third full-length studio release from the ...,To Pimp A Butterfly,8.9


In [9]:
#Check column data types
music.dtypes

id                   int64
artist              object
metascore            int64
release_date        object
original_sort_no     int64
summary             object
title               object
user_score          object
dtype: object

In [10]:
#Convert the user score column to a numberic
music["user_score"] = pd.to_numeric(music["user_score"])

In [11]:
#Multiply the user score column by 10 to match the metascore
music.user_score = music.user_score.multiply(10)

In [12]:
#Covert the metascore column to a float to match the user score
music["metascore"] = music["metascore"].astype(np.float64)

In [13]:
music.dtypes

id                    int64
artist               object
metascore           float64
release_date         object
original_sort_no      int64
summary              object
title                object
user_score          float64
dtype: object

In [14]:
music.head()

Unnamed: 0,id,artist,metascore,release_date,original_sort_no,summary,title,user_score
0,13354,Wadada Leo Smith,99.0,"May 22, 2012",1,The four-disc set from the jazz trumpeter was ...,Ten Freedom Summers,42.0
1,37516,Fiona Apple,98.0,"April 17, 2020",2,The fifth full-length studio release for the a...,Fetch the Bolt Cutters,81.0
2,1671,Brian Wilson,97.0,"September 28, 2004",3,"Well, better 37 years late than never. Origina...",SMiLE,79.0
3,1538,Loretta Lynn,97.0,"April 27, 2004",4,It's been over 40 years since she released her...,Van Lear Rose,84.0
4,22163,Kendrick Lamar,96.0,"March 16, 2015",5,The third full-length studio release from the ...,To Pimp A Butterfly,89.0
