## 예제 2-24 데이터프레임 칼럼추가

In [1]:
import pandas as pd

In [2]:
# 특정 열을 지정하여 데이터프레임 생성

In [4]:
movies = pd.read_csv("../data/movies.csv",usecols = ['Film','Year'])

In [5]:
movies.head()

Unnamed: 0,Film,Year
0,Zack and Miri Make a Porno,2008
1,Youth in Revolt,2010
2,You Will Meet a Tall Dark Stranger,2010
3,When in Rome,2010
4,What Happens in Vegas,2008


In [8]:
# 기본모드는 전체 행을 가져온다.
movies_add = pd.read_csv("../data/movies.csv")

In [9]:
movies_add.head()

Unnamed: 0,Film,Genre,Lead Studio,Audience score %,Profitability,Rotten Tomatoes %,Worldwide Gross,Year
0,Zack and Miri Make a Porno,Romance,The Weinstein Company,70,1.747542,64,$41.94,2008
1,Youth in Revolt,Comedy,The Weinstein Company,52,1.09,68,$19.62,2010
2,You Will Meet a Tall Dark Stranger,Comedy,Independent,35,1.211818,43,$26.66,2010
3,When in Rome,Comedy,Disney,44,0.0,15,$43.04,2010
4,What Happens in Vegas,Comedy,Fox,72,6.267647,28,$219.37,2008


In [10]:
# has_been의 열을 추가하여 모든 초기값을 0으로 한다.

In [38]:
# df[기존에 없던 열 이름] = 초기값
# => 새로운 열을 생성하고 그 열의 모든 값을 초기값으로 설정한다.
movies['has_seen'] = 0

In [39]:
movies.head()

Unnamed: 0,Film,Year,has_seen
0,Zack and Miri Make a Porno,2008,0
1,Youth in Revolt,2010,0
2,You Will Meet a Tall Dark Stranger,2010,0
3,When in Rome,2010,0
4,What Happens in Vegas,2008,0


In [26]:
movies.drop(['has_been'], axis = 1, inplace = True)

In [27]:
movies

Unnamed: 0,Film,Year,has_seen
0,Zack and Miri Make a Porno,2008,0
1,Youth in Revolt,2010,0
2,You Will Meet a Tall Dark Stranger,2010,0
3,When in Rome,2010,0
4,What Happens in Vegas,2008,0
...,...,...,...
72,Across the Universe,2007,0
73,A Serious Man,2009,0
74,A Dangerous Method,2011,0
75,27 Dresses,2008,0


In [28]:
movies_add.columns

Index(['Film', 'Genre', 'Lead Studio', 'Audience score %', 'Profitability',
       'Rotten Tomatoes %', 'Worldwide Gross', 'Year'],
      dtype='object')

In [29]:
movies_add.head()

Unnamed: 0,Film,Genre,Lead Studio,Audience score %,Profitability,Rotten Tomatoes %,Worldwide Gross,Year
0,Zack and Miri Make a Porno,Romance,The Weinstein Company,70,1.747542,64,$41.94,2008
1,Youth in Revolt,Comedy,The Weinstein Company,52,1.09,68,$19.62,2010
2,You Will Meet a Tall Dark Stranger,Comedy,Independent,35,1.211818,43,$26.66,2010
3,When in Rome,Comedy,Disney,44,0.0,15,$43.04,2010
4,What Happens in Vegas,Comedy,Fox,72,6.267647,28,$219.37,2008


In [44]:
# 결과값을 'has_been' 열에 넣는다.
movies['has_seen'] = (movies_add['Audience score %'] + movies_add['Rotten Tomatoes %'])// 2

In [31]:
# 첫번째 행의 결과를 보면 (70+64) // 2 => 67

In [45]:
movies.head()

Unnamed: 0,Film,Year,has_seen
0,Zack and Miri Make a Porno,2008,67
1,Youth in Revolt,2010,60
2,You Will Meet a Tall Dark Stranger,2010,39
3,When in Rome,2010,29
4,What Happens in Vegas,2008,50


In [46]:
movies['has_seen'].isnull().sum()

0

In [47]:
movies_add['Worldwide Gross'].dtype # object 타입

dtype('O')

In [37]:
# WorldWide Gross 열의 모든 $ 값을 없앤다. (정규식을 사용하지않는다.) 
# 그리고 타입을 astype함수를 사용해 float64로 바꾼다.

In [48]:
# 특정문자를 제거하려고한다면 .... '' ( <= 빈문자(Empty String)으로 바꾼다.)
# 열의 데이터 타입이 문자열(Object)라면 .. .str.replace 함수를 사용 할 수 있다.
# 데이터 타입을 바꾸는 것은 astype함수로 가능하다.
d = movies_add['Worldwide Gross'].str.replace('$','', regex = False).astype('float64')

In [49]:
d

0      41.94
1      19.62
2      26.66
3      43.04
4     219.37
       ...  
72     29.37
73     30.68
74      8.97
75    160.31
76     60.72
Name: Worldwide Gross, Length: 77, dtype: float64

In [50]:
movies.head()

Unnamed: 0,Film,Year,has_seen
0,Zack and Miri Make a Porno,2008,67
1,Youth in Revolt,2010,60
2,You Will Meet a Tall Dark Stranger,2010,39
3,When in Rome,2010,29
4,What Happens in Vegas,2008,50


In [60]:
# has_seen 열의 인덱스를 가져온다.
movies.columns.get_loc('has_seen')

2

In [None]:
# insert (삽입될 열의 위치, 삽입될 열의 이름, 삽입될 열의 값)

In [61]:
movies.insert(movies.columns.get_loc('has_seen') + 1, "원화환산", d * 1200)

ValueError: cannot insert 원화환산, already exists

In [62]:
movies.head()

Unnamed: 0,Film,Year,has_seen,원화환산
0,Zack and Miri Make a Porno,2008,67,50328.0
1,Youth in Revolt,2010,60,23544.0
2,You Will Meet a Tall Dark Stranger,2010,39,31992.0
3,When in Rome,2010,29,51648.0
4,What Happens in Vegas,2008,50,263244.0


## 예제 2-25 시리즈와 데이터프레임 내의 특정원소 변경

In [64]:
movies.loc[74,'has_seen']

84

In [65]:
import numpy as np

In [66]:
movies.loc[74,'has_been'] = np.nan

In [67]:
movies.loc[74,'has_been']

nan

In [69]:
movies['has_been'].isnull().sum()

77

In [70]:
movies.columns

Index(['Film', 'Year', 'has_seen', '원화환산', 'has_been'], dtype='object')

In [71]:
movies

Unnamed: 0,Film,Year,has_seen,원화환산,has_been
0,Zack and Miri Make a Porno,2008,67,50328.0,
1,Youth in Revolt,2010,60,23544.0,
2,You Will Meet a Tall Dark Stranger,2010,39,31992.0,
3,When in Rome,2010,29,51648.0,
4,What Happens in Vegas,2008,50,263244.0,
...,...,...,...,...,...
72,Across the Universe,2007,69,35244.0,
73,A Serious Man,2009,76,36816.0,
74,A Dangerous Method,2011,84,10764.0,
75,27 Dresses,2008,55,192372.0,


In [72]:
movies_ser_at = movies['Year']

In [73]:
movies_ser_at.head()

0    2008
1    2010
2    2010
3    2010
4    2008
Name: Year, dtype: int64

In [74]:
movies_ser_at.get(0)

2008

In [75]:
movies_ser_at.head()

0    2008
1    2010
2    2010
3    2010
4    2008
Name: Year, dtype: int64

## 예제 2-26 시리즈와 데이터프레임의 행과열 삭제

In [76]:
movies_del = pd.read_csv("../data/movies.csv")

In [77]:
movies_del.shape

(77, 8)

In [79]:
movies_del.head()

Unnamed: 0,Film,Genre,Lead Studio,Audience score %,Profitability,Rotten Tomatoes %,Worldwide Gross,Year
0,Zack and Miri Make a Porno,Romance,The Weinstein Company,70,1.747542,64,$41.94,2008
1,Youth in Revolt,Comedy,The Weinstein Company,52,1.09,68,$19.62,2010
2,You Will Meet a Tall Dark Stranger,Comedy,Independent,35,1.211818,43,$26.66,2010
3,When in Rome,Comedy,Disney,44,0.0,15,$43.04,2010
4,What Happens in Vegas,Comedy,Fox,72,6.267647,28,$219.37,2008


In [80]:
# 'Rotten Tomatoes %'열을 삭제 axis = 1 => 삭제 대상이 열.

In [81]:
movies_del1 = movies_del.drop('Rotten Tomatoes %', axis = 1)

In [82]:
movies_del1.head()

Unnamed: 0,Film,Genre,Lead Studio,Audience score %,Profitability,Worldwide Gross,Year
0,Zack and Miri Make a Porno,Romance,The Weinstein Company,70,1.747542,$41.94,2008
1,Youth in Revolt,Comedy,The Weinstein Company,52,1.09,$19.62,2010
2,You Will Meet a Tall Dark Stranger,Comedy,Independent,35,1.211818,$26.66,2010
3,When in Rome,Comedy,Disney,44,0.0,$43.04,2010
4,What Happens in Vegas,Comedy,Fox,72,6.267647,$219.37,2008


In [83]:
movies_del1.shape

(77, 7)

In [None]:
# 1,2,3,4행 삭제, axis = 0 <= 삭제 대상이 행.

In [84]:
movies_del2 = movies_del.drop([1,2,3,4], axis = 0)

In [85]:
movies_del2.shape

(73, 8)

In [87]:
# 1,2,3,4 행 삭제
movies_del2.head()

Unnamed: 0,Film,Genre,Lead Studio,Audience score %,Profitability,Rotten Tomatoes %,Worldwide Gross,Year
0,Zack and Miri Make a Porno,Romance,The Weinstein Company,70,1.747542,64,$41.94,2008
5,Water For Elephants,Drama,20th Century Fox,72,3.081421,60,$117.09,2011
6,WALL-E,Animation,Disney,89,2.896019,96,$521.28,2008
7,Waitress,Romance,Independent,67,11.089742,89,$22.18,2007
8,Waiting For Forever,Romance,Independent,53,0.005,6,$0.03,2011


In [88]:
movies_del.shape

(77, 8)

In [89]:
movies_del3 = movies_del.drop(['Lead Studio', 'Worldwide Gross'], axis = 1)

In [90]:
movies_del3.shape

(77, 6)

In [92]:
# Lead Studio, Worldwide Gross 열 삭제
movies_del3.head()

Unnamed: 0,Film,Genre,Audience score %,Profitability,Rotten Tomatoes %,Year
0,Zack and Miri Make a Porno,Romance,70,1.747542,64,2008
1,Youth in Revolt,Comedy,52,1.09,68,2010
2,You Will Meet a Tall Dark Stranger,Comedy,35,1.211818,43,2010
3,When in Rome,Comedy,44,0.0,15,2010
4,What Happens in Vegas,Comedy,72,6.267647,28,2008


In [93]:
movies_del.drop(['Lead Studio','Worldwide Gross'], axis = 1, inplace = True)

In [94]:
movies_del.head()

Unnamed: 0,Film,Genre,Audience score %,Profitability,Rotten Tomatoes %,Year
0,Zack and Miri Make a Porno,Romance,70,1.747542,64,2008
1,Youth in Revolt,Comedy,52,1.09,68,2010
2,You Will Meet a Tall Dark Stranger,Comedy,35,1.211818,43,2010
3,When in Rome,Comedy,44,0.0,15,2010
4,What Happens in Vegas,Comedy,72,6.267647,28,2008


In [95]:
movies_ser = movies_del['Genre']

In [96]:
movies_ser.head()

0    Romance
1     Comedy
2     Comedy
3     Comedy
4     Comedy
Name: Genre, dtype: object

In [103]:
movies_ser.drop([1,2,3,4], inplace = True)

KeyError: '[1, 2, 3, 4] not found in axis'

In [98]:
movies_ser.head()

0      Romance
5        Drama
6    Animation
7      Romance
8      Romance
Name: Genre, dtype: object

In [99]:
movies_ser

0       Romance
5         Drama
6     Animation
7       Romance
8       Romance
        ...    
72      romance
73        Drama
74        Drama
75       Comedy
76       comedy
Name: Genre, Length: 73, dtype: object