In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from matplotlib import font_manager,rc

import random

font_location = "C:\Windows\Fonts\malgun.ttf"
font_name = font_manager.FontProperties(fname=font_location).get_name()
rc('font',family=font_name)
plt.rcParams['axes.unicode_minus'] = False

In [6]:
file_path = '../data/best_selling_artists.csv'
artist = pd.read_csv(file_path) # read_csv 함수로 데이터를 Dataframe 형태로 불러옵니다.

### 데이터의 기본정보 확인

In [7]:
artist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121 entries, 0 to 120
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Artist         121 non-null    object
 1   Country        121 non-null    object
 2   period_active  121 non-null    object
 3   Year           121 non-null    int64 
 4   Genre          121 non-null    object
 5   TCU            121 non-null    object
 6   Sales          121 non-null    object
dtypes: int64(1), object(6)
memory usage: 6.7+ KB


### 열 정의
* Artist          : 아티스트 이름
* Country         : 아티스트가 출신 국가
* period_active   : 아티스트가 음악을 만든 시간
* Year            : 음반차트 첫 진입년도
* Genre           : 아티스트의 음악 장르
* TCU             : 인증받은 판매량
* Sales           : 알려진 판매량 

In [8]:
artist.shape

(121, 7)

In [9]:
artist.head(10)

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU,Sales
0,The Beatles,United Kingdom,1960–1970,1962,Rock / pop,290.4 million,600 million500 million
1,Elvis Presley,United States,1954–1977,1956,Rock and roll / pop / country,231.2 million,500 million
2,Michael Jackson,United States,1964–2009,1971,Pop / rock / dance / soul / R&B,277.3 million,400 million
3,Elton John,United Kingdom,1964–present,1970,Pop / rock,208 million,300 million250 million
4,Queen,United Kingdom,1971–present,1973,Rock,184 million,300 million250 million
5,Madonna,United States,1979–present,1983,Pop / dance / electronica,181.7 million,300 million250 million
6,Led Zeppelin,United Kingdom,1968–1980,1969,Hard rock / blues rock / folk rock,142.4 million,300 million200 million
7,Rihanna,Barbados,2005–present,2005,R&B / pop / dance / hip-hop,335.3 million,250 million230 million
8,Pink Floyd,United Kingdom,"1965–1996, 2005, 2012–2014",1967,Progressive rock / psychedelic rock,123.8 million,250 million200 million
9,Eminem,United States,1996–present,1999,Hip-hop,325.7 million,220 million


In [10]:
artist.tail(10)

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU,Sales
111,Kenny G,United States,1982–present,1984,Smooth jazz,53.9 million,75 million
112,Green Day,United States,1987–present,1994,Punk rock / pop punk / alternative rock,53 million,75 million
113,Enya,Ireland,1982–present,1987,New-age / Celtic,51.9 million,75 million
114,Bryan Adams,Canada,1979–present,1979,Rock,49.9 million,75 million
115,Bob Marley,Jamaica,1962–1981,1975,Reggae,46.3 million,75 million
116,The Police,United Kingdom,1977–19862007–2008,1978,Pop / rock,42.2 million,75 million
117,Gloria Estefan,United States,1975–present,1984,Latin pop / dance pop,37.6 million,75 million
118,Barry Manilow,United States,1973–present,1973,Pop / soft rock,36.7 million,75 million
119,Kiss,United States,1972–present,1974,Hard rock / heavy metal,28.7 million,75 million
120,Aretha Franklin,United States,1956–2018,1961,Soul / jazz / blues / R&B,26.6 million,75 million


In [11]:
artist.columns

Index(['Artist', 'Country', 'period_active', 'Year', 'Genre', 'TCU', 'Sales'], dtype='object')

#### => 'sales'는 인증되지않은판매량이므로 인증된 수치인 'TCU'만 사용

In [12]:
artist = artist.drop(['Sales'], axis = 1)

In [13]:
artist

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU
0,The Beatles,United Kingdom,1960–1970,1962,Rock / pop,290.4 million
1,Elvis Presley,United States,1954–1977,1956,Rock and roll / pop / country,231.2 million
2,Michael Jackson,United States,1964–2009,1971,Pop / rock / dance / soul / R&B,277.3 million
3,Elton John,United Kingdom,1964–present,1970,Pop / rock,208 million
4,Queen,United Kingdom,1971–present,1973,Rock,184 million
...,...,...,...,...,...,...
116,The Police,United Kingdom,1977–19862007–2008,1978,Pop / rock,42.2 million
117,Gloria Estefan,United States,1975–present,1984,Latin pop / dance pop,37.6 million
118,Barry Manilow,United States,1973–present,1973,Pop / soft rock,36.7 million
119,Kiss,United States,1972–present,1974,Hard rock / heavy metal,28.7 million


In [14]:
artist['TCU']=artist.TCU.str.split(' ').str[0].astype('float64')

In [15]:
artist['TCU']=artist['TCU']*1000000

In [16]:
artist['TCU']

0      290400000.0
1      231200000.0
2      277300000.0
3      208000000.0
4      184000000.0
          ...     
116     42200000.0
117     37600000.0
118     36700000.0
119     28700000.0
120     26600000.0
Name: TCU, Length: 121, dtype: float64

In [17]:
artist.isnull().sum()

Artist           0
Country          0
period_active    0
Year             0
Genre            0
TCU              0
dtype: int64

In [18]:
artist['Genre'].unique()

array(['Rock / pop', 'Rock and roll / pop / country',
       'Pop / rock / dance / soul / R&B', 'Pop / rock', 'Rock',
       'Pop / dance / electronica', 'Hard rock / blues rock / folk rock',
       'R&B / pop / dance / hip-hop',
       'Progressive rock / psychedelic rock', 'Hip-hop',
       'R&B / pop / soul / hip-hop',
       'Pop / country / rock / folk / alternative', 'R&B / pop',
       'R&B / soul / pop / gospel', 'Pop / Rock',
       'Hard rock / blues rock / rock and roll', 'Rock / blues rock',
       'Hip-hop / R&B / pop', 'Country', 'Hip-hop / electronic / pop',
       'Pop / teen pop / dance pop', 'Pop / folk pop', 'Pop rock / R&B',
       'Hard rock', 'Rock / progressive rock / adult contemporary',
       'Pop / adult contemporary', 'Pop / disco',
       'Pop / swing / easy listening', 'Pop', 'Pop / R&B / hip-hop',
       'Heavy metal / thrash metal', 'Pop / dance / electronic',
       'Pop rock / funk rock / dance-pop', 'Pop / soul',
       'Funk rock / alternative rock',

In [19]:
artist[artist['Country'].map(len) < 3]

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU


* 나라별 인기있는 장르
* 년도별 어떤 장르가 인기있었는지 ..
* 오랜시간동안 차트인 하지못한 가수 

In [20]:
artist.head()

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU
0,The Beatles,United Kingdom,1960–1970,1962,Rock / pop,290400000.0
1,Elvis Presley,United States,1954–1977,1956,Rock and roll / pop / country,231200000.0
2,Michael Jackson,United States,1964–2009,1971,Pop / rock / dance / soul / R&B,277300000.0
3,Elton John,United Kingdom,1964–present,1970,Pop / rock,208000000.0
4,Queen,United Kingdom,1971–present,1973,Rock,184000000.0


In [23]:
artist['Year'].dtype

dtype('int64')

#### 1960년대

In [30]:
artist_year_1960 = artist.loc[(artist['Year'] >= 1960) & (artist['Year'] <= 1969)]

artist_year_1960

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU
0,The Beatles,United Kingdom,1960–1970,1962,Rock / pop,290400000.0
6,Led Zeppelin,United Kingdom,1968–1980,1969,Hard rock / blues rock / folk rock,142400000.0
8,Pink Floyd,United Kingdom,"1965–1996, 2005, 2012–2014",1967,Progressive rock / psychedelic rock,123800000.0
17,The Rolling Stones,United Kingdom,1962–present,1963,Rock / blues rock,102700000.0
29,Barbra Streisand,United States,1960–present,1963,Pop / adult contemporary,98400000.0
41,Fleetwood Mac,United Kingdom United States,1967–present,1968,Rock / pop,94400000.0
43,Rod Stewart,United Kingdom,1964–present,1969,Rock / pop,84900000.0
44,Bee Gees,United Kingdom Australia[238][239],1963–20032009–2012,1963,Pop / disco,74700000.0
56,Neil Diamond,United States,1966–present,1966,Pop / rock,71100000.0
62,Santana,United States,1966–present,1969,Rock,62800000.0


In [31]:
artist_year_1960['Genre'].value_counts()

Rock / pop                             6
Pop                                    2
Folk / rock                            1
Rock / Hard rock/ Pop rock             1
Rock / pop / surf rock                 1
Rock / folk / country                  1
Funk / R&B / soul                      1
Art rock / glam rock / pop             1
Progressive rock / pop rock            1
Pop / rock / dance / folk              1
Psychedelic rock                       1
Hard rock / blues rock / folk rock     1
Latin                                  1
Folk rock                              1
Rock                                   1
Pop / rock                             1
Pop / disco                            1
Pop / adult contemporary               1
Rock / blues rock                      1
Progressive rock / psychedelic rock    1
Soul / jazz / blues / R&B              1
Name: Genre, dtype: int64

#### 1970년대

In [32]:
artist_year_1970 = artist.loc[(artist['Year'] >= 1970) & (artist['Year'] <= 1979)]

artist_year_1970

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU
2,Michael Jackson,United States,1964–2009,1971,Pop / rock / dance / soul / R&B,277300000.0
3,Elton John,United Kingdom,1964–present,1970,Pop / rock,208000000.0
4,Queen,United Kingdom,1971–present,1973,Rock,184000000.0
14,Eagles,United States,"1971–1980, 1994–present",1972,Rock,152200000.0
16,AC/DC,Australia,1973–present,1975,Hard rock / blues rock / rock and roll,130200000.0
21,Billy Joel,United States,1964–present,1971,Pop / rock,134400000.0
25,Bruce Springsteen,United States,1972–present,1973,Rock,127900000.0
27,Aerosmith,United States,1970–present,1973,Hard rock,104800000.0
30,ABBA,Sweden,"1972–1982, 2016–present",1972,Pop / disco,69000000.0
55,Eric Clapton,United Kingdom,1962–present,1970,Rock / blues,71600000.0


In [34]:
artist_year_1970['Genre'].value_counts()

Rock                                      7
Hard rock / heavy metal                   3
Pop / rock                                3
Rock / hard rock                          2
Pop / rock / dance / soul / R&B           1
Reggae                                    1
Pop / disco / R&B                         1
Rock / pop                                1
Funk / R&B / soul                         1
Rock / soft rock                          1
Country / pop                             1
Funk / R&B / pop / soul / rock            1
Rock / blues                              1
Pop / disco                               1
Hard rock                                 1
Hard rock / blues rock / rock and roll    1
Pop / soft rock                           1
Name: Genre, dtype: int64

#### 1980년대

In [35]:
artist_year_1980 = artist.loc[(artist['Year'] >= 1980) & (artist['Year'] <= 1989)]

artist_year_1980

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU
5,Madonna,United States,1979–present,1983,Pop / dance / electronica,181700000.0
13,Whitney Houston,United States,1977–2012,1984,R&B / soul / pop / gospel,155300000.0
15,Celine Dion,Canada,1981–present,1981,Pop / Rock,143000000.0
19,Garth Brooks,United States,1989–present,1989,Country,165300000.0
26,U2,Ireland,1976–present,1980,Rock,114600000.0
28,Phil Collins,United Kingdom,"1980–2011, 2015–present",1981,Rock / progressive rock / adult contemporary,99400000.0
35,Metallica,United States,1981–present,1983,Heavy metal / thrash metal,105600000.0
40,Red Hot Chili Peppers,United States,1983–present,1987,Funk rock / alternative rock,99800000.0
42,Bon Jovi,United States,1983–present,1983,Hard rock / glam metal,88700000.0
48,George Strait,United States,1981–present,1984,Country,99800000.0


In [36]:
artist_year_1980['Genre'].value_counts()

Country                                         2
Pop / dance / electronica                       1
Hard rock / heavy metal                         1
New-age / Celtic                                1
Smooth jazz                                     1
Country / pop rock                              1
Alternative rock                                1
Pop / R&B                                       1
Pop                                             1
R&B / pop                                       1
Rock / pop rock / hard rock                     1
R&B / soul / pop / gospel                       1
Hard rock / glam metal                          1
Funk rock / alternative rock                    1
Heavy metal / thrash metal                      1
Rock / progressive rock / adult contemporary    1
Rock                                            1
Pop / Rock                                      1
Latin pop / dance pop                           1
Name: Genre, dtype: int64

#### 1990년대

In [39]:
artist_year_1990 = artist.loc[(artist['Year'] >= 1990) & (artist['Year'] <= 1999)]

artist_year_1990

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU
9,Eminem,United States,1996–present,1999,Hip-hop,325700000.0
10,Mariah Carey,United States,1988–present,1990,R&B / pop / soul / hip-hop,205800000.0
34,Jay-Z,United States,1996–present,1996,Hip-hop,136400000.0
37,Lil Wayne,United States,1996–present,1999,Hip-hop,188700000.0
46,Coldplay,United Kingdom,1996–present,1999,Alternative rock / pop rock / pop,109400000.0
50,Britney Spears,United States,1998–present,1998,Pop / dance / dance-pop,88700000.0
52,Shania Twain,Canada,1993–present,1993,Country pop,84800000.0
54,Backstreet Boys,United States,1993–present,1995,Pop,78000000.0
86,Christina Aguilera,United States,1993–present,1998,R&B / pop / rock,76600000.0
94,Usher,United States,1991–present,1994,R&B / pop,76600000.0


In [40]:
artist_year_1990['Genre'].value_counts()

Hip-hop                                    4
R&B / pop / soul / hip-hop                 1
Alternative rock / pop rock / pop          1
Pop / dance / dance-pop                    1
Country pop                                1
Pop                                        1
R&B / pop / rock                           1
R&B / pop                                  1
Latin pop / pop / pop rock                 1
Country                                    1
Hip-hop / R&B / dance                      1
J-pop / pop / dance / electronic           1
R&B / soul / hip-hop                       1
Grunge / alternative rock                  1
Pop rock                                   1
Punk rock / pop punk / alternative rock    1
Name: Genre, dtype: int64

#### 2000년 이후

In [41]:
artist_year_2000 = artist.loc[(artist['Year'] >= 2000)]

artist_year_2000

Unnamed: 0,Artist,Country,period_active,Year,Genre,TCU
7,Rihanna,Barbados,2005–present,2005,R&B / pop / dance / hip-hop,335300000.0
11,Taylor Swift,United States,2006–present,2006,Pop / country / rock / folk / alternative,250400000.0
12,Beyoncé,United States,1997–present,2002,R&B / pop,226500000.0
18,Drake,Canada,2001–present,2009,Hip-hop / R&B / pop,413060000.0
20,Kanye West,United States,1996–present,2003,Hip-hop / electronic / pop,258300000.0
22,Justin Bieber,CanadaUnited States,2008–present,2009,Pop / teen pop / dance pop,277900000.0
23,Ed Sheeran,United Kingdom,2004–present,2011,Pop / folk pop,246700000.0
24,Bruno Mars,United States,2004–present,2010,Pop rock / R&B,203600000.0
32,Katy Perry,United States,2001–present,2008,Pop,172900000.0
33,Chris Brown,United States,2005–present,2005,Pop / R&B / hip-hop,199700000.0


In [38]:
artist_year_2000['Genre'].value_counts()

Hip-hop / R&B / pop                          2
Pop / R&B                                    2
Pop / R&B / hip-hop                          2
R&B / pop / dance / hip-hop                  1
Pop / soul                                   1
Pop rock / electropop / pop                  1
Hip-hop / hip house /EDM                     1
Pop / R&B / neo soul                         1
Pop / pop rock / R&B                         1
Alternative rock / nu metal / rap rock       1
Hip-hop / pop                                1
Pop / dance / electronic                     1
Pop rock / funk rock / dance-pop             1
Pop / country / rock / folk / alternative    1
Pop                                          1
Pop rock / R&B                               1
Pop / folk pop                               1
Pop / teen pop / dance pop                   1
Hip-hop / electronic / pop                   1
R&B / pop                                    1
Country / country pop                        1
Name: Genre, 