# Chapter 1: Pandas 基礎

## レシピ
* [DataFrameの解剖学](#レシピ1-DataFrameの解剖学)
* [DataFrame主要素へのアクセス](#レシピ2-DataFrame主要素へのアクセス)
* [データ型の理解](#レシピ3-データ型の理解)
* [データのカラムをSeriesとして選択取得](#レシピ4-データのカラムをSeriesとして選択取得)
* [Seriesのメソッド呼び出し](#レシピ5-Seriesのメソッド呼び出し)
* [Seriesの演算子の動き](#レシピ6-Seriesの演算子の動き)
* [Seriesのメソッドチェイニング](#レシピ7-Seriesのメソッドチェイニング)
* [分かりやすいインデックスに置き換え](#レシピ8-分かりやすいインデックスに置き換え)
* [行とカラムの名前変更](#レシピ9-行とカラムの名前変更)
* [カラムの作成と削除](#レシピ10-カラムの作成と削除)

In [None]:
import numpy as np
import pandas as pd

# レシピ1 DataFrameの解剖学

In [2]:
movie = pd.read_csv('data/movie.csv')
movie.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
4,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,...,,,,,,,12.0,7.1,,0


In [3]:
movie.tail()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
4911,Color,Scott Smith,1.0,87.0,2.0,318.0,Daphne Zuniga,637.0,,Comedy|Drama,...,6.0,English,Canada,,,2013.0,470.0,7.7,,84
4912,Color,,43.0,43.0,,319.0,Valorie Curry,841.0,,Crime|Drama|Mystery|Thriller,...,359.0,English,USA,TV-14,,,593.0,7.5,16.0,32000
4913,Color,Benjamin Roberds,13.0,76.0,0.0,0.0,Maxwell Moody,0.0,,Drama|Horror|Thriller,...,3.0,English,USA,,1400.0,2013.0,0.0,6.3,,16
4914,Color,Daniel Hsia,14.0,100.0,0.0,489.0,Daniel Henney,946.0,10443.0,Comedy|Drama|Romance,...,9.0,English,USA,PG-13,,2012.0,719.0,6.3,2.35,660
4915,Color,Jon Gunn,43.0,90.0,16.0,16.0,Brian Herzlinger,86.0,85222.0,Documentary,...,84.0,English,USA,PG,1100.0,2004.0,23.0,6.6,1.85,456


# レシピ2 DataFreme 主要素へのアクセス

In [4]:
index = movie.index
columns = movie.columns
data = movie.values

In [5]:
index

RangeIndex(start=0, stop=4916, step=1)

In [6]:
columns

Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
      dtype='object')

In [7]:
data

array([['Color', 'James Cameron', 723.0, ..., 7.9, 1.78, 33000],
       ['Color', 'Gore Verbinski', 302.0, ..., 7.1, 2.35, 0],
       ['Color', 'Sam Mendes', 602.0, ..., 6.8, 2.35, 85000],
       ...,
       ['Color', 'Benjamin Roberds', 13.0, ..., 6.3, nan, 16],
       ['Color', 'Daniel Hsia', 14.0, ..., 6.3, 2.35, 660],
       ['Color', 'Jon Gunn', 43.0, ..., 6.6, 1.85, 456]], dtype=object)

In [8]:
type(index)

pandas.core.indexes.range.RangeIndex

In [9]:
type(columns)

pandas.core.indexes.base.Index

In [10]:
type(data)

numpy.ndarray

In [11]:
# RangeIndexがIndexのサブクラスであるか？
issubclass(pd.RangeIndex, pd.Index)

True

In [12]:
index.values

array([   0,    1,    2, ..., 4913, 4914, 4915], dtype=int64)

In [13]:
columns.values

array(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes',
       'actor_2_name', 'actor_1_facebook_likes', 'gross', 'genres',
       'actor_1_name', 'movie_title', 'num_voted_users',
       'cast_total_facebook_likes', 'actor_3_name',
       'facenumber_in_poster', 'plot_keywords', 'movie_imdb_link',
       'num_user_for_reviews', 'language', 'country', 'content_rating',
       'budget', 'title_year', 'actor_2_facebook_likes', 'imdb_score',
       'aspect_ratio', 'movie_facebook_likes'], dtype=object)

# レシピ3 データ型の理解

In [14]:
movie.dtypes

color                         object
director_name                 object
num_critic_for_reviews       float64
duration                     float64
director_facebook_likes      float64
actor_3_facebook_likes       float64
actor_2_name                  object
actor_1_facebook_likes       float64
gross                        float64
genres                        object
actor_1_name                  object
movie_title                   object
num_voted_users                int64
cast_total_facebook_likes      int64
actor_3_name                  object
facenumber_in_poster         float64
plot_keywords                 object
movie_imdb_link               object
num_user_for_reviews         float64
language                      object
country                       object
content_rating                object
budget                       float64
title_year                   float64
actor_2_facebook_likes       float64
imdb_score                   float64
aspect_ratio                 float64
m

In [15]:
movie.get_dtype_counts()

  """Entry point for launching an IPython kernel.


float64    13
int64       3
object     12
dtype: int64

# レシピ4 データのカラムをSeriesとして選択取得

In [16]:
movie['director_name']

0           James Cameron
1          Gore Verbinski
2              Sam Mendes
3       Christopher Nolan
4             Doug Walker
              ...        
4911          Scott Smith
4912                  NaN
4913     Benjamin Roberds
4914          Daniel Hsia
4915             Jon Gunn
Name: director_name, Length: 4916, dtype: object

In [17]:
# カラム名をドット表記で選択出来るがお勧めしない
movie.director_name

0           James Cameron
1          Gore Verbinski
2              Sam Mendes
3       Christopher Nolan
4             Doug Walker
              ...        
4911          Scott Smith
4912                  NaN
4913     Benjamin Roberds
4914          Daniel Hsia
4915             Jon Gunn
Name: director_name, Length: 4916, dtype: object

In [18]:
type(movie['director_name'])

pandas.core.series.Series

In [19]:
director = movie['director_name']
director.name

'director_name'

### SeriesをDataFrameに変換する

In [20]:
director.to_frame()

Unnamed: 0,director_name
0,James Cameron
1,Gore Verbinski
2,Sam Mendes
3,Christopher Nolan
4,Doug Walker
...,...
4911,Scott Smith
4912,
4913,Benjamin Roberds
4914,Daniel Hsia


# レシピ5 Seriesのメソッド呼び出し

### Seriesの全属性、メソッドについて調べるにはdir関数を使う

In [21]:
s_attr_methods = set(dir(pd.Series))
len(s_attr_methods)

465

In [22]:
df_attr_methods = set(dir(pd.DataFrame))
len(df_attr_methods)

451

In [23]:
len(s_attr_methods & df_attr_methods)

394

In [24]:
director = movie['director_name']
actor_1_fb_likes = movie['actor_1_facebook_likes']

In [25]:
director.head()

0        James Cameron
1       Gore Verbinski
2           Sam Mendes
3    Christopher Nolan
4          Doug Walker
Name: director_name, dtype: object

In [26]:
actor_1_fb_likes.head()

0     1000.0
1    40000.0
2    11000.0
3    27000.0
4      131.0
Name: actor_1_facebook_likes, dtype: float64

### 異なる値の個数を数えるvalue_counts

In [27]:
director.value_counts()

Steven Spielberg    26
Woody Allen         22
Martin Scorsese     20
Clint Eastwood      20
Spike Lee           16
                    ..
James Melkonian      1
Ed Decter            1
Hans Canosa          1
Richard Ayoade       1
Ellory Elkayem       1
Name: director_name, Length: 2397, dtype: int64

In [28]:
actor_1_fb_likes.value_counts()

1000.0     436
11000.0    206
2000.0     189
3000.0     150
12000.0    131
          ... 
362.0        1
216.0        1
859.0        1
225.0        1
334.0        1
Name: actor_1_facebook_likes, Length: 877, dtype: int64

### Seriesの要素数を数えるにはsize,shape,lenを使う

In [29]:
director.size

4916

In [30]:
director.shape

(4916,)

In [31]:
len(director)

4916

### 非欠損値の個数を数えるcount

In [32]:
director.count()

4814

In [33]:
actor_1_fb_likes.count()

4909

### 基本要約統計量に、min,max,mean,median,std,sumがある

In [34]:
actor_1_fb_likes.min(), actor_1_fb_likes.max(), \
actor_1_fb_likes.mean(), actor_1_fb_likes.median(),\
actor_1_fb_likes.std(), actor_1_fb_likes.sum(),

(0.0, 640000.0, 6494.488490527602, 982.0, 15106.986883848309, 31881444.0)

### 基本統計量を簡略化して表示させるのにdescribeを使う

In [35]:
actor_1_fb_likes.describe()

count      4909.000000
mean       6494.488491
std       15106.986884
min           0.000000
25%         607.000000
50%         982.000000
75%       11000.000000
max      640000.000000
Name: actor_1_facebook_likes, dtype: float64

In [36]:
director.describe()

count                 4814
unique                2397
top       Steven Spielberg
freq                    26
Name: director_name, dtype: object

### 数値データの正確な四分位を計算するにはquantile

In [37]:
actor_1_fb_likes.quantile(.2)

510.0

In [38]:
actor_1_fb_likes.quantile([.1,.2,.3,.4,.5,
                          .6,.7,.8,.9])

0.1      240.0
0.2      510.0
0.3      694.0
0.4      854.0
0.5      982.0
0.6     1000.0
0.7     8000.0
0.8    13000.0
0.9    18000.0
Name: actor_1_facebook_likes, dtype: float64

### isnullメソッドでBooleanで欠損値かどうかわかる

In [39]:
director.isnull()

0       False
1       False
2       False
3       False
4       False
        ...  
4911    False
4912     True
4913    False
4914    False
4915    False
Name: director_name, Length: 4916, dtype: bool

### fillnaメソッドでSeries内の欠損値を置き換えできる

In [40]:
actor_1_fb_likes_filled = actor_1_fb_likes.fillna(0)
actor_1_fb_likes_filled.count()

4916

### Seriesから欠損値を削除するにはdropnaを使う

In [41]:
actor_1_fb_likes_dropped = actor_1_fb_likes.dropna()
actor_1_fb_likes_dropped.size

4909

### value_countsにnormalizeパラメータをTrueとすると、相対度数を返し、分布についての情報が得られる

In [42]:
director.value_counts(normalize=True)

Steven Spielberg    0.005401
Woody Allen         0.004570
Martin Scorsese     0.004155
Clint Eastwood      0.004155
Spike Lee           0.003324
                      ...   
James Melkonian     0.000208
Ed Decter           0.000208
Hans Canosa         0.000208
Richard Ayoade      0.000208
Ellory Elkayem      0.000208
Name: director_name, Length: 2397, dtype: float64

### 欠損値があるかどうか直接的に調べるにはhasnans

In [43]:
director.hasnans

True

### 非欠損値に対してTrueを返すnotnull

In [44]:
director.notnull()

0        True
1        True
2        True
3        True
4        True
        ...  
4911     True
4912    False
4913     True
4914     True
4915     True
Name: director_name, Length: 4916, dtype: bool

# レシピ6 Seriesの演算子の働き

In [45]:
imdb_score = movie['imdb_score']
imdb_score

0       7.9
1       7.1
2       6.8
3       8.5
4       7.1
       ... 
4911    7.7
4912    7.5
4913    6.3
4914    6.3
4915    6.6
Name: imdb_score, Length: 4916, dtype: float64

### 加算演算子を使うとSeriesの各要素に1追加出来る

In [46]:
imdb_score + 1

0       8.9
1       8.1
2       7.8
3       9.5
4       8.1
       ... 
4911    8.7
4912    8.5
4913    7.3
4914    7.3
4915    7.6
Name: imdb_score, Length: 4916, dtype: float64

### 減算(-),乗算(*),除算(/),べき乗(**)も可能

In [47]:
imdb_score * 2.5

0       19.75
1       17.75
2       17.00
3       21.25
4       17.75
        ...  
4911    19.25
4912    18.75
4913    15.75
4914    15.75
4915    16.50
Name: imdb_score, Length: 4916, dtype: float64

### 切り捨て除算(//),余りを返す(%)も可能

In [48]:
imdb_score // 7

0       1.0
1       1.0
2       0.0
3       1.0
4       1.0
       ... 
4911    1.0
4912    1.0
4913    0.0
4914    0.0
4915    0.0
Name: imdb_score, Length: 4916, dtype: float64

### 大なり(>),小なり(<),以上(<=),等価(==), 不等価(!=)をSeriesの条件比較に施すとTrueかFalseのSeriesになる

In [49]:
imdb_score > 7

0        True
1        True
2       False
3        True
4        True
        ...  
4911     True
4912     True
4913    False
4914    False
4915    False
Name: imdb_score, Length: 4916, dtype: bool

In [50]:
director == 'James Cameron'

0        True
1       False
2       False
3       False
4       False
        ...  
4911    False
4912    False
4913    False
4914    False
4915    False
Name: director_name, Length: 4916, dtype: bool

### 演算全てにメソッドがある

In [51]:
imdb_score.add(1)              # imdb_score + 1

0       8.9
1       8.1
2       7.8
3       9.5
4       8.1
       ... 
4911    8.7
4912    8.5
4913    7.3
4914    7.3
4915    7.6
Name: imdb_score, Length: 4916, dtype: float64

In [52]:
imdb_score.mul(2.5)            # imdb_score * 2.5

0       19.75
1       17.75
2       17.00
3       21.25
4       17.75
        ...  
4911    19.25
4912    18.75
4913    15.75
4914    15.75
4915    16.50
Name: imdb_score, Length: 4916, dtype: float64

In [53]:
imdb_score.floordiv(7)            # imdb_score // 7

0       1.0
1       1.0
2       0.0
3       1.0
4       1.0
       ... 
4911    1.0
4912    1.0
4913    0.0
4914    0.0
4915    0.0
Name: imdb_score, Length: 4916, dtype: float64

In [54]:
imdb_score.gt(7)            # imdb_score > 7

0        True
1        True
2       False
3        True
4        True
        ...  
4911     True
4912     True
4913    False
4914    False
4915    False
Name: imdb_score, Length: 4916, dtype: bool

In [55]:
director.eq('James Cameron')            # imdb_score == 'James Cameron'

0        True
1       False
2       False
3       False
4       False
        ...  
4911    False
4912    False
4913    False
4914    False
4915    False
Name: director_name, Length: 4916, dtype: bool

# レシピ7 Seriesのメソッドチェイニング

### ドット表記を用いたメソッドの順次呼び出しはメソッドチェイニングと呼ばれる

In [56]:
director.value_counts().head(3)

Steven Spielberg    26
Woody Allen         22
Martin Scorsese     20
Name: director_name, dtype: int64

### 欠損値の個数を数えるにはisnullの後にsumメソッドをチェイニングする

In [57]:
actor_1_fb_likes.isnull().sum()

7

### 欠損値のある数値カラムはデータ型がfloatになる

In [58]:
actor_1_fb_likes.dtype

dtype('float64')

### 可読性をあげる為、１つの式を複数行に書かない、バックスラッシュを使って文字を次行につなげる

In [59]:
actor_1_fb_likes.fillna(0)\
                .astype(int)\
                .head()

0     1000
1    40000
2    11000
3    27000
4      131
Name: actor_1_facebook_likes, dtype: int32

### バックスラッシュではなく括弧でもOK

In [60]:
(actor_1_fb_likes.fillna(0)
                .astype(int)
                .head())

0     1000
1    40000
2    11000
3    27000
4      131
Name: actor_1_facebook_likes, dtype: int32

# レシピ8 分かりやすいインデックスに置き換え

### set_indexメソッドを使って、インデックスを変える

In [61]:
movie2 = movie.set_index('movie_title')
movie2

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Avatar,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
Spectre,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,...,,,,,,,12.0,7.1,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Signed Sealed Delivered,Color,Scott Smith,1.0,87.0,2.0,318.0,Daphne Zuniga,637.0,,Comedy|Drama,...,6.0,English,Canada,,,2013.0,470.0,7.7,,84
The Following,Color,,43.0,43.0,,319.0,Valorie Curry,841.0,,Crime|Drama|Mystery|Thriller,...,359.0,English,USA,TV-14,,,593.0,7.5,16.00,32000
A Plague So Pleasant,Color,Benjamin Roberds,13.0,76.0,0.0,0.0,Maxwell Moody,0.0,,Drama|Horror|Thriller,...,3.0,English,USA,,1400.0,2013.0,0.0,6.3,,16
Shanghai Calling,Color,Daniel Hsia,14.0,100.0,0.0,489.0,Daniel Henney,946.0,10443.0,Comedy|Drama|Romance,...,9.0,English,USA,PG-13,,2012.0,719.0,6.3,2.35,660


### reset_indexメソッドでインデックスをカラムにすることが出来る

In [62]:
movie2.reset_index()

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Avatar,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
1,Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
2,Spectre,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
3,The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
4,Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,131.0,,Rob Walker,131.0,,...,,,,,,,12.0,7.1,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4911,Signed Sealed Delivered,Color,Scott Smith,1.0,87.0,2.0,318.0,Daphne Zuniga,637.0,,...,6.0,English,Canada,,,2013.0,470.0,7.7,,84
4912,The Following,Color,,43.0,43.0,,319.0,Valorie Curry,841.0,,...,359.0,English,USA,TV-14,,,593.0,7.5,16.00,32000
4913,A Plague So Pleasant,Color,Benjamin Roberds,13.0,76.0,0.0,0.0,Maxwell Moody,0.0,,...,3.0,English,USA,,1400.0,2013.0,0.0,6.3,,16
4914,Shanghai Calling,Color,Daniel Hsia,14.0,100.0,0.0,489.0,Daniel Henney,946.0,10443.0,...,9.0,English,USA,PG-13,,2012.0,719.0,6.3,2.35,660


# レシピ9 行とカラムの名前変更

### DataFremeのrenameメソッドは辞書形式で古い値を新しい値にマッピングする

In [67]:
movie = pd.read_csv('data/movie.csv', index_col='movie_title')

In [68]:
idx_rename = {'Avatar':'Ratava', 'Spectre': 'Ertceps'}
col_rename = {'director_name':'Director Name', 'num_critic_for_reviews': 'Critical Reviews'}

In [69]:
movie_renamed = movie.rename(index=idx_rename, columns=col_rename)
movie_renamed.head()

Unnamed: 0_level_0,color,Director Name,Critical Reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ratava,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
Ertceps,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,...,,,,,,,12.0,7.1,,0


### リストを使って直接代入して名前を変更する方法

In [72]:
movie = pd.read_csv('data/movie.csv', index_col='movie_title')
index = movie.index
columns = movie.columns
index_list = index.tolist()
column_list = columns.tolist()
# rowとcolumnのラベルを代入で変更する
index_list[0] ='Ratava'
index_list[2] ='Ertceps'
column_list[1] = 'Director Name'
column_list[2] = 'Critical Reviews'
# 最後にindexとcolumnsに代入する
movie.index = index_list
movie.columns = column_list
movie.head(3)

Unnamed: 0,color,Director Name,Critical Reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
Ratava,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
Ertceps,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000


# レシピ10 カラムの作成と削除

In [73]:
movie = pd.read_csv('data/movie.csv')

### カラムを追加する

In [74]:
movie['has_seen'] = 0

In [76]:
# 全ての俳優と監督のFacebookいいねを足し合わせる
movie['actor_director_facebook_likes'] = \
    (movie['actor_1_facebook_likes'] +
    movie['actor_2_facebook_likes'] +
    movie['actor_3_facebook_likes'] +
    movie['director_facebook_likes'])

### 新規カラムに欠損値があるか調べて欠損値を0にする

In [77]:
movie['actor_director_facebook_likes'].isnull().sum()

122

In [78]:
movie['actor_director_facebook_likes'] = \
movie['actor_director_facebook_likes'].fillna(0)

### cast_total_facebook_likesがactor_director_facebook_likesより小さくなる事がないか確認

In [81]:
movie['is_cast_likes_more'] = \
    (movie['cast_total_facebook_likes'] >=
    movie['actor_director_facebook_likes'])

### is_cast_likes_moreが全てTrueかどうかallで調べる

In [82]:
movie['is_cast_likes_more'].all()

False

### actor_director_facebook_likesカラムを削除する

In [83]:
movie = movie.drop('actor_director_facebook_likes', axis='columns')

### 俳優のいいねの総和だけのカラムを作る

In [84]:
movie['actor_total_facebook_likes'] = \
    (movie['actor_1_facebook_likes'] +
    movie['actor_2_facebook_likes'] +
    movie['actor_3_facebook_likes'])

In [86]:
movie['actor_total_facebook_likes'] = \
movie['actor_total_facebook_likes'].fillna(0)

### cast_total_facebook_likesがactor_total_facebook_likesより小さくないか調べる

In [87]:
movie['is_cast_likes_more'] = \
    (movie['cast_total_facebook_likes'] >=
    movie['actor_total_facebook_likes'])
movie['is_cast_likes_more'].all()

True

### cast_total_facebook_likesの何パーセントがactor_toal_facebook_likesから来たか計算する

In [88]:
movie['pct_actor_cast_like'] = \
    (movie['actor_total_facebook_likes'] / 
    movie['cast_total_facebook_likes'])

### pct_actor_cast_likeの最小値と最大値が0と1の間にあることを検証する

In [89]:
(movie['pct_actor_cast_like'].min(),
movie['pct_actor_cast_like'].max())

(0.0, 1.0)

### Seriesとして出力する

In [90]:
movie.set_index('movie_title')['pct_actor_cast_like'].head()

movie_title
Avatar                                        0.577369
Pirates of the Caribbean: At World's End      0.951396
Spectre                                       0.987521
The Dark Knight Rises                         0.683783
Star Wars: Episode VII - The Force Awakens    0.000000
Name: pct_actor_cast_like, dtype: float64

- DataFrameの末尾でなく所定の位置に新規カラムを挿入できるinsertメソッド
- insertメソッドは第1引数に挿入する位置を整数で、第2引数にカラム名、第3引数に値
- カラム名の整数位置を知るには,indexメソッドget_locを使う

In [91]:
profit_index = movie.columns.get_loc('gross') + 1
profit_index

9

In [92]:
movie.insert(loc=profit_index,
            column='profit',
            value=movie['gross']- movie['budget'])