In [21]:
import pandas as pd
train = pd.read_csv("./data/titanic_train.csv")
pd.options.display.max_rows = 6

### 1. 일반적인 방법 - DataFrame [ ] 안에 조건식을 넣는다.

In [22]:
train['Survived']==0 # train.Survived==0 도 같은 의미

0       True
1      False
2      False
       ...  
888     True
889    False
890     True
Name: Survived, Length: 891, dtype: bool

In [23]:
train[train['Survived']==0]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.7500,,Q


In [24]:
train.Age[train['Survived']==0]  # 특정 컬럼 활용

0      22.0
4      35.0
5       NaN
       ... 
886    27.0
888     NaN
890    32.0
Name: Age, Length: 549, dtype: float64

In [25]:
# 다중 조건 ( & 또는 | 를 사용 )
## 조건마다 ( ) 가 반드시 필요
train[(train.Age>30) & (train.Survived==0)]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
13,14,0,3,"Andersson, Mr. Anders Johan",male,39.0,1,5,347082,31.2750,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
881,882,0,3,"Markun, Mr. Johann",male,33.0,0,0,349257,7.8958,,S
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.1250,,Q
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.7500,,Q


### 2. query를 활용한 방법
    일반적인 방법과 computing time에는 큰 차이가 없으며, 메모리  활용 면에서는 오히려 더 효율적이라고 한다.

참고 : https://jakevdp.github.io/PythonDataScienceHandbook/03.12-performance-eval-and-query.html

In [26]:
train.query('Survived == 0')

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.7500,,Q


#### 변수 값을 query 안에 넣어서 활용

In [27]:
survived = 1
train.query('Survived == @survived')

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
...,...,...,...,...,...,...,...,...,...,...,...,...
880,881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25.0,0,1,230433,26.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


#### 다중 조건 - and나 or 도 사용 가능 / ( ) 없이 사용 가능

In [28]:
train.query('Age>30 and Survived==0')
# train.query('Age>30 & Survived==0')

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
13,14,0,3,"Andersson, Mr. Anders Johan",male,39.0,1,5,347082,31.2750,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
881,882,0,3,"Markun, Mr. Johann",male,33.0,0,0,349257,7.8958,,S
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.1250,,Q
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.7500,,Q


#### 추가 기능

#### * in, not in

In [29]:
check_list = ['S','Q']
train.query('Embarked in @check_list') # train['Embarked'].isin(['S','Q'])

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.250,,S
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.100,C123,S
...,...,...,...,...,...,...,...,...,...,...,...,...
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.450,,S
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.750,,Q


#### * index 자체를  조건으로 활용

In [30]:
train.query('index > 100') # index 101이상부터 출력

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
101,102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S
102,103,0,1,"White, Mr. Richard Frasar",male,21.0,0,1,35281,77.2875,D26,S
103,104,0,3,"Johansson, Mr. Gustaf Joel",male,33.0,0,0,7540,8.6542,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.7500,,Q


multi index의 경우도 가능

In [31]:
multi_index_train = train[:]
multi_index_train.set_index(['Sex','Pclass'], inplace=True)

In [32]:
multi_index_train.query('Sex=="male" and Pclass==3')

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Survived,Name,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Sex,Pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
male,3,1,0,"Braund, Mr. Owen Harris",22.0,1,0,A/5 21171,7.2500,,S
male,3,5,0,"Allen, Mr. William Henry",35.0,0,0,373450,8.0500,,S
male,3,6,0,"Moran, Mr. James",,0,0,330877,8.4583,,Q
male,...,...,...,...,...,...,...,...,...,...,...
male,3,882,0,"Markun, Mr. Johann",33.0,0,0,349257,7.8958,,S
male,3,885,0,"Sutehall, Mr. Henry Jr",25.0,0,0,SOTON/OQ 392076,7.0500,,S
male,3,891,0,"Dooley, Mr. Patrick",32.0,0,0,370376,7.7500,,Q


### 3. filter - 컬럼이 많거나 index가 특별한 기준일 때 유용
    - column과 index 기준으로 필터링 기능
    -  정규식도 활용 가능

In [91]:
test = train.head().copy()
test = test.set_index('Name')
test

Unnamed: 0_level_0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Braund, Mr. Owen Harris",1,0,3,male,22.0,1,0,A/5 21171,7.25,,S
"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",2,1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
"Heikkinen, Miss. Laina",3,1,3,female,26.0,0,0,STON/O2. 3101282,7.925,,S
"Futrelle, Mrs. Jacques Heath (Lily May Peel)",4,1,1,female,35.0,1,0,113803,53.1,C123,S
"Allen, Mr. William Henry",5,0,3,male,35.0,0,0,373450,8.05,,S


#### axis = 0 - column 필터링,  axis = 1 - index 필터링

like : 단어 포함 여부

In [92]:
test.filter(like='ge', axis=1) # 컬럼에 포함

Unnamed: 0_level_0,PassengerId,Age
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Braund, Mr. Owen Harris",1,22.0
"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",2,38.0
"Heikkinen, Miss. Laina",3,26.0
"Futrelle, Mrs. Jacques Heath (Lily May Peel)",4,35.0
"Allen, Mr. William Henry",5,35.0


In [93]:
test.filter(like='Mr', axis=0) # index에 포함

Unnamed: 0_level_0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Braund, Mr. Owen Harris",1,0,3,male,22.0,1,0,A/5 21171,7.25,,S
"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",2,1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
"Futrelle, Mrs. Jacques Heath (Lily May Peel)",4,1,1,female,35.0,1,0,113803,53.1,C123,S
"Allen, Mr. William Henry",5,0,3,male,35.0,0,0,373450,8.05,,S


정규식

In [94]:
test.filter(regex='s$', axis=1) # 컬럼에 정규식

Unnamed: 0_level_0,Pclass
Name,Unnamed: 1_level_1
"Braund, Mr. Owen Harris",3
"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",1
"Heikkinen, Miss. Laina",3
"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1
"Allen, Mr. William Henry",3


In [90]:
test.filter(regex='s$', axis=0) # index에 정규식

Unnamed: 0_level_0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Braund, Mr. Owen Harris",1,0,3,male,22.0,1,0,A/5 21171,7.25,,S
