# Pandas
## 1. pandas 특징

* Numpy를 내부적으로 활용함(numpy 의 특징을 그대로 가짐)
* 데이터 분석에 특화된 데이터 구조 제공
* 다양한 데이터 분석 함수 제공
* 데이터베이스에 쉽게 연결 가능
* json 데이터나 table 요소를 데이터프레임으로 손쉽게 변형 가능

In [1]:
# 설치
# !pip install pandas
# !conda install pandas

## 2. Pandas 에서 다루는 데이터 타입
1) DataFrame : 2차원의 표형식 데이터  --> key : [value1, value2]
2) Series : 1차원 벡터형식 데이터  --> list, tuple, ndarray

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame(
    {'이름': ['홍길동', '장마철', '소나기', '더워요'],  # 한줄 한줄이 series 시리즈임
     'Age' : [23, 55, 24, 16], 
     '성별' : ['male', 'female', 'female', 'male']}
)
df

Unnamed: 0,이름,Age,성별
0,홍길동,23,male
1,장마철,55,female
2,소나기,24,female
3,더워요,16,male


In [3]:
df['이름']

0    홍길동
1    장마철
2    소나기
3    더워요
Name: 이름, dtype: object

In [4]:
# display 통해 표형식으로 예쁘게 출력 가능함

display(df)

Unnamed: 0,이름,Age,성별
0,홍길동,23,male
1,장마철,55,female
2,소나기,24,female
3,더워요,16,male


In [5]:
display(df['이름'])

0    홍길동
1    장마철
2    소나기
3    더워요
Name: 이름, dtype: object

In [6]:
print(type(df['이름']))

<class 'pandas.core.series.Series'>


### pandas = 2차원 matrix 
### series = vector 

In [7]:
display(df['이름'])
print(type(df['이름']))
print('shape:' , df['이름'].shape)
print()



display(df[['이름']])
print(type(df[['이름']]))
print('shape:' , df[['이름']].shape)
print('ndim:' ,df[['이름']].ndim)
# vector에서 대괄호를 하나 더 씌웠다는 것은 matrix 가된다

0    홍길동
1    장마철
2    소나기
3    더워요
Name: 이름, dtype: object

<class 'pandas.core.series.Series'>
shape: (4,)



Unnamed: 0,이름
0,홍길동
1,장마철
2,소나기
3,더워요


<class 'pandas.core.frame.DataFrame'>
shape: (4, 1)
ndim: 2


## * 시리즈 만들기 pd.Series([리스트자료], name = '컬럼명')
* 이름을 가지고 있는 벡터

In [8]:
ages = pd.Series([22, 35, 58],name = "Age")
ages

0    22
1    35
2    58
Name: Age, dtype: int64

In [9]:
ages[1:]

1    35
2    58
Name: Age, dtype: int64

In [10]:
ages.max()

np.int64(58)

In [11]:
ages.mean()

np.float64(38.333333333333336)

In [12]:
ages.median()

np.float64(35.0)

## * pandas 에서 자료 불러오기  + 저장하기 

* csv, tsv, excel, json, html 
* pd.read_ 확장자

In [13]:
!pip install openpyxl



In [14]:
# 엑셀파일 열기 1

df1 = pd.read_excel("./data/Online Retail.xlsx")
df1


KeyboardInterrupt



In [None]:
# 엑셀파일 열기 2

df2 = pd.read_excel("./data/아파트(매매)_실거래가_20240806113828.xlsx", header = 12, index_col=0)
df2.head(13)

# 13번째부터 불러와라?

## * csv 파일 읽기
* pd.read_csv(파일명, 옵션)

In [None]:
df3 = pd.read_csv("./data/06고객이탈예측.csv", encoding = 'cp949')
df3

## * json 파일 읽기

In [30]:
df4 = pd.read_json("./data/Chicken_shops", encoding = 'cp949')

UnicodeDecodeError: 'cp949' codec can't decode byte 0xe1 in position 106: illegal multibyte sequence

## * html 파일 읽기

In [28]:
df5 = pd.read_html("./data/corpList.htm")
df5

[    Unnamed: 0                             Unnamed: 1  \
 0     한국제15호스팩                             금융 지원 서비스업   
 1       에스오에스랩  측정, 시험, 항해, 제어 및 기타 정밀기기 제조업; 광학기기 제외   
 2   미래에셋비전스팩6호                             금융 지원 서비스업   
 3   에이치엠씨제7호스팩                             금융 지원 서비스업   
 4        파라다이스                     유원지 및 기타 오락관련 서비스업   
 5       한중엔시에스      전동기, 발전기 및 전기 변환 · 공급 · 제어 장치 제조업   
 6     KB제29호스팩                             금융 지원 서비스업   
 7   미래에셋비전스팩5호                             금융 지원 서비스업   
 8     씨어스테크놀로지                             의료용 기기 제조업   
 9     한국제14호스팩                             금융 지원 서비스업   
 10   디비금융스팩12호                                 기타 금융업   
 11        라메디텍                             의료용 기기 제조업   
 12       그리드위즈                 그외 기타 전문, 과학 및 기술 서비스업   
 13       다원넥스뷰                        사진장비 및 광학기기 제조업   
 14  미래에셋비전스팩4호                             금융 지원 서비스업   
 
                                            Unnamed: 2  Unnamed: 3 Unn

In [29]:
df5 = pd.read_html("./data/corpList.htm")
df5[0]

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7
0,한국제15호스팩,금융 지원 서비스업,기업인수합병,2024-06-26,12월,유한,,서울특별시
1,에스오에스랩,"측정, 시험, 항해, 제어 및 기타 정밀기기 제조업; 광학기기 제외",산업용 및 차량용 라이다(LiDAR),2024-06-25,12월,정지성,홈페이지 보기,광주광역시
2,미래에셋비전스팩6호,금융 지원 서비스업,기업인수합병,2024-06-24,12월,정명훈,,서울특별시
3,에이치엠씨제7호스팩,금융 지원 서비스업,기타금융,2024-06-24,12월,강신명,,서울특별시
4,파라다이스,유원지 및 기타 오락관련 서비스업,"카지노, 호텔, 복합리조트",2024-06-24,12월,최종환..,홈페이지 보기,서울특별시
5,한중엔시에스,"전동기, 발전기 및 전기 변환 · 공급 · 제어 장치 제조업","수냉식 냉각시스템 ESS Parts, 공랭식 ESS Module Parts, EV ...",2024-06-24,12월,김환식,홈페이지 보기,경상북도
6,KB제29호스팩,금융 지원 서비스업,기업인수합병,2024-06-21,12월,서영화,,서울특별시
7,미래에셋비전스팩5호,금융 지원 서비스업,기업인수합병,2024-06-19,12월,김대호,,서울특별시
8,씨어스테크놀로지,의료용 기기 제조업,심전도검사솔루션 입원환자모니터링솔루션,2024-06-19,12월,이영신,홈페이지 보기,경기도
9,한국제14호스팩,금융 지원 서비스업,기업인수합병,2024-06-19,12월,변성환,,서울특별시


In [10]:
import os
from sqlalchemy import create_engine
import pymysql
import pandas as pd
from dotenv import load_dotenv
 
pymysql.install_as_MySQLdb()

load_dotenv(dotenv_path="../05data_scraping/.env_db")



True

In [36]:
# MySQL에서 테이블 불러오기 

engine = create_engine(f"{os.getenv('db')}+{os.getenv('dbtype')}://{os.getenv('id')}:{os.getenv('pw')}@{os.getenv('host')}/{os.getenv('database')}")
conn = engine.connect()

data_07 = pd.read_sql('2024_07_stock_price_info', con=conn)
data_08 = pd.read_sql('2024_08_stock_price_info', con=conn)
conn.close()

In [38]:
data_07.head(10)

Unnamed: 0,수집일,회사명,종목코드,현재가,변동금액,변화율,전일,고가,거래량,시가,저가,거래대금
0,2024-07-30,산일전기,62040,55500,5300,10.56,50200,61300,12682123,55200,50900,718098
1,2024-07-30,에이치에스효성,487570,65700,16900,-20.46,82600,75900,372304,75400,64800,26079
2,2024-07-30,엔에이치스팩31호,481890,2035,15,0.74,2020,2040,313965,2025,2020,637
3,2024-07-30,SK증권제13호스팩,473950,2080,0,0.0,2080,2085,214025,2075,2075,445
4,2024-07-30,엑셀세라퓨틱스,373110,6780,180,2.73,6600,7160,1359481,6830,6550,9329
5,2024-07-30,이베스트스팩6호,478110,2060,0,0.0,2060,2065,61185,2055,2045,126
6,2024-07-30,시프트업,462870,67300,3200,4.99,64100,67800,832883,64100,64100,55607
7,2024-07-30,하스,450330,12560,40,-0.32,12600,13250,697300,12650,12330,8916
8,2024-07-30,이노스페이스,462350,22750,1250,-5.21,24000,24000,122218,24000,22750,2835
9,2024-07-30,신한글로벌액티브리츠,481850,2850,15,-0.52,2865,2855,266692,2855,2835,758


In [39]:
data_08.head(10)

Unnamed: 0,수집일,회사명,종목코드,현재가,변동금액,변화율,전일,고가,거래량,시가,저가,거래대금
0,2024-08-02,산일전기,62040,52500,400,0.77,52100,55300,1648658,54200,51800,88568
1,2024-08-02,에이치에스효성,487570,55000,5100,-8.49,60100,58400,63677,58400,54500,3598
2,2024-08-02,엔에이치스팩31호,481890,2045,10,-0.49,2055,2055,76286,2055,2045,156
3,2024-08-02,SK증권제13호스팩,473950,2090,10,-0.48,2100,2100,66668,2100,2090,140
4,2024-08-02,엑셀세라퓨틱스,373110,6080,320,-5.0,6400,6300,319509,6270,6000,1955
5,2024-08-02,이베스트스팩6호,478110,2060,5,-0.24,2065,2065,12245,2065,2055,25
6,2024-08-02,시프트업,462870,68700,1200,-1.72,69900,69500,220973,68500,67000,15114
7,2024-08-02,하스,450330,13440,660,-4.68,14100,14900,1226414,13920,13420,17516
8,2024-08-02,이노스페이스,462350,19830,2870,-12.64,22700,20350,402382,19730,19200,7978
9,2024-08-02,신한글로벌액티브리츠,481850,2800,10,-0.36,2810,2815,70562,2815,2790,197


## * MySQL 에서 titanic DB - passenger, surv, ticket 테이블 로드하기 

In [72]:
load_dotenv(dotenv_path="../05data_scraping/.env_db")

engine = create_engine(f"{os.getenv('db')}+{os.getenv('dbtype')}://{os.getenv('id')}:{os.getenv('pw')}@{os.getenv('host')}/titanic")
conn = engine.connect()

# 테이블 3개 불러오기 
passenger = pd.read_sql('passenger', con=conn)
ticket = pd.read_sql('ticket', con=conn)
surv = pd.read_sql('surv', con=conn)


conn.close()

In [73]:
passenger

# 기준으로 삼는다. 데이터가 제일 많기 때문

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch
0,193,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19.0,1,0
1,192,"Carbines, Mr. William",male,19.0,0,0
2,715,"Greenberg, Mr. Samuel",male,52.0,0,0
3,533,"Elias, Mr. Joseph Jr",male,17.0,1,1
4,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0
...,...,...,...,...,...,...
618,580,"Jussila, Mr. Eiriik",male,32.0,0,0
619,503,"O'Sullivan, Miss. Bridget Mary",female,,0,0
620,538,"LeRoy, Miss. Bertha",female,30.0,0,0
621,197,"Mernagh, Mr. Robert",male,,0,0


In [74]:
ticket

Unnamed: 0,PassengerId,Ticket,Pclass,Fare,Cabin,Embarked
0,486,4133,3,25.4667,,S
1,119,PC 17558,1,247.5208,B58 B60,C
2,836,PC 17756,1,83.1583,E49,C
3,528,PC 17483,1,221.7792,C95,S
4,396,350052,3,7.7958,,S
...,...,...,...,...,...,...
440,692,349256,3,13.4167,,C
441,585,3411,3,8.7125,,C
442,265,382649,3,7.7500,,Q
443,328,28551,2,13.0000,D,S


In [75]:
surv

Unnamed: 0,PassengerId,Survived
0,762,0
1,665,1
2,809,0
3,332,0
4,21,0
...,...,...
441,698,1
442,778,1
443,157,1
444,350,0


## * 3개의 데이터프레임을 1개로 합치기
* sql에서의 조인과 같다
* join( )
* merge( ) : 조인과 동일
* concat( ) 

In [76]:
test1 = pd.concat([passenger, ticket, surv], axis = 1)
test1.head() 

# 합쳐진 것을 보면 엉망으로 합쳐짐

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch,PassengerId.1,Ticket,Pclass,Fare,Cabin,Embarked,PassengerId.2,Survived
0,193,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19.0,1,0,486.0,4133,3.0,25.4667,,S,762.0,0.0
1,192,"Carbines, Mr. William",male,19.0,0,0,119.0,PC 17558,1.0,247.5208,B58 B60,C,665.0,1.0
2,715,"Greenberg, Mr. Samuel",male,52.0,0,0,836.0,PC 17756,1.0,83.1583,E49,C,809.0,0.0
3,533,"Elias, Mr. Joseph Jr",male,17.0,1,1,528.0,PC 17483,1.0,221.7792,C95,S,332.0,0.0
4,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,396.0,350052,3.0,7.7958,,S,21.0,0.0


In [77]:
test1 = pd.concat([passenger, ticket, surv], keys = 'PassengerId', axis = 1)
test1.head() 


  test1 = pd.concat([passenger, ticket, surv], keys = 'PassengerId', axis = 1)


Unnamed: 0_level_0,P,P,P,P,P,P,a,a,a,a,a,a,s,s
Unnamed: 0_level_1,PassengerId,Name,Sex,Age,SibSp,Parch,PassengerId,Ticket,Pclass,Fare,Cabin,Embarked,PassengerId,Survived
0,193,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19.0,1,0,486.0,4133,3.0,25.4667,,S,762.0,0.0
1,192,"Carbines, Mr. William",male,19.0,0,0,119.0,PC 17558,1.0,247.5208,B58 B60,C,665.0,1.0
2,715,"Greenberg, Mr. Samuel",male,52.0,0,0,836.0,PC 17756,1.0,83.1583,E49,C,809.0,0.0
3,533,"Elias, Mr. Joseph Jr",male,17.0,1,1,528.0,PC 17483,1.0,221.7792,C95,S,332.0,0.0
4,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,396.0,350052,3.0,7.7958,,S,21.0,0.0


In [78]:
test2 = pd.merge(passenger, ticket, how='inner', on='PassengerId')
test2.head()

# join 보다 merge 를 더 많이 사용함
# 2개의 테이블을 제대로 합치는 법

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Pclass,Fare,Cabin,Embarked
0,192,"Carbines, Mr. William",male,19.0,0,0,28424,2,13.0,,S
1,715,"Greenberg, Mr. Samuel",male,52.0,0,0,250647,2,13.0,,S
2,533,"Elias, Mr. Joseph Jr",male,17.0,1,1,2690,3,7.2292,,C
3,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,A/5. 3337,3,14.5,,S
4,597,"Leitch, Miss. Jessie Wills",female,,0,0,248727,2,33.0,,S


In [79]:
test3 = pd.merge(left = test2, right=surv, how='inner', on='PassengerId')
test3.head()

# 3개의 테이블을 제대로 합치는 법
# 겹치는 게 없어서 아무것도 뜨지 않는다. 

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Pclass,Fare,Cabin,Embarked,Survived


In [80]:
ticket = ticket.set_index("PassengerId")
ticket 

Unnamed: 0_level_0,Ticket,Pclass,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
486,4133,3,25.4667,,S
119,PC 17558,1,247.5208,B58 B60,C
836,PC 17756,1,83.1583,E49,C
528,PC 17483,1,221.7792,C95,S
396,350052,3,7.7958,,S
...,...,...,...,...,...
692,349256,3,13.4167,,C
585,3411,3,8.7125,,C
265,382649,3,7.7500,,Q
328,28551,2,13.0000,D,S


In [81]:
passenger.join(ticket)

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Pclass,Fare,Cabin,Embarked
0,193,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19.0,1,0,,,,,
1,192,"Carbines, Mr. William",male,19.0,0,0,A/5 21171,3.0,7.250,,S
2,715,"Greenberg, Mr. Samuel",male,52.0,0,0,,,,,
3,533,"Elias, Mr. Joseph Jr",male,17.0,1,1,STON/O2. 3101282,3.0,7.925,,S
4,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,113803,1.0,53.100,C123,S
...,...,...,...,...,...,...,...,...,...,...,...
618,580,"Jussila, Mr. Eiriik",male,32.0,0,0,A/5. 3336,3.0,16.100,,S
619,503,"O'Sullivan, Miss. Bridget Mary",female,,0,0,230136,2.0,39.000,F4,S
620,538,"LeRoy, Miss. Bertha",female,30.0,0,0,31028,2.0,10.500,,S
621,197,"Mernagh, Mr. Robert",male,,0,0,,,,,


# ==========================================

## * 깃허브에서 파일 바로 연동시키기 

In [82]:
data = pd.read_csv("https://raw.githubusercontent.com/haram4th/ablearn/main/Taitanic_train.csv")
data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## * 데이터프레임이 어떻게 생겼는지 조회하는 명령어

* 앞쪽 5개 행을 읽는 head()    head(행갯수)
* 뒤쪽 5개 행을 읽는 tail()    tail(행갯수)

* head는 60을 넘어가면 자동 축약 출력, 즉 60개까지만 출력됨

In [83]:
data.head()


# .head() : 앞에서 5개만 출력함

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [84]:
data.tail()


# .tail() : 뒤에서 5개만 출력함

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [85]:
data.head(2)

# 앞에서 2개만 보기 

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [86]:
data.tail(7)

# 뒤에서 7개만 보기 

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
884,885,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.05,,S
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.125,,Q
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [87]:
# head는 60을 넘어가면 자동 축약 출력, 즉 60개까지만 전부 출력됨

data.head(60)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [88]:
# head는 60을 넘어가면 자동 축약 출력
# 0,1,2,3,4, ......... 60,61,62 등

data.head(65)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
60,61,0,3,"Sirayanian, Mr. Orsen",male,22.0,0,0,2669,7.2292,,C
61,62,1,1,"Icard, Miss. Amelie",female,38.0,0,0,113572,80.0000,B28,
62,63,0,1,"Harris, Mr. Henry Birkhardt",male,45.0,1,0,36973,83.4750,C83,S
63,64,0,3,"Skoog, Master. Harald",male,4.0,3,2,347088,27.9000,,S


## * 데이터프레임의 컬럼명, 결측치, 데이터타입을 같이 표시하는 명령어
* .info( )

In [89]:
data.info()

# 결측값, 칼럼, 데이터타입 등 다양한 정보를 알 수 있음 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


## * 데이터프레임에서 숫자 데이터의 통계를 보여주는 명령어

* .describe( ) 

In [90]:
data.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


## * pandas에서 일부 자료만 추출하기 

### 1) 데이터프레임에서 1개 컬럼만 가져오기 

In [91]:
data.head(1)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [92]:
data['Name']

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [93]:
display(data['Name'])
print(display(data['Name']))

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

None


### 2) 데이터프레임에서 2개 이상 컬럼 가져오기 

In [94]:
data[['Name', 'Sex']]

Unnamed: 0,Name,Sex
0,"Braund, Mr. Owen Harris",male
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female
2,"Heikkinen, Miss. Laina",female
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female
4,"Allen, Mr. William Henry",male
...,...,...
886,"Montvila, Rev. Juozas",male
887,"Graham, Miss. Margaret Edith",female
888,"Johnston, Miss. Catherine Helen ""Carrie""",female
889,"Behr, Mr. Karl Howell",male


## * 데이터프레임 컬럼 순서 바꾸기 

In [95]:
data = data[['PassengerId','Pclass', 'Survived', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']]
data

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,0,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,1,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,3,0,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,2,0,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,3,0,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## * 데이터프레임에서 일부 컬럼만 가져오기 2 

* loc : 인덱스 이름과 컬럼명으로 일부를 추출해서 가져오기 
* iloc : 슬라이싱을 이용해서 가져옴

In [96]:
data.loc[3:5,['Ticket', 'Name']]

# 앞쪽에는 인덱싱 번호, 뒷부분에는 출력할 컬럼만 

Unnamed: 0,Ticket,Name
3,113803,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,373450,"Allen, Mr. William Henry"
5,330877,"Moran, Mr. James"


In [97]:
data.loc[:,['Ticket', 'Name']]

# 전체를 불러오기 

Unnamed: 0,Ticket,Name
0,A/5 21171,"Braund, Mr. Owen Harris"
1,PC 17599,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,STON/O2. 3101282,"Heikkinen, Miss. Laina"
3,113803,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,373450,"Allen, Mr. William Henry"
...,...,...
886,211536,"Montvila, Rev. Juozas"
887,112053,"Graham, Miss. Margaret Edith"
888,W./C. 6607,"Johnston, Miss. Catherine Helen ""Carrie"""
889,111369,"Behr, Mr. Karl Howell"


In [98]:
data.loc[100:130,['Ticket', 'Name']]

Unnamed: 0,Ticket,Name
100,349245,"Petranec, Miss. Matilda"
101,349215,"Petroff, Mr. Pastcho (""Pentcho"")"
102,35281,"White, Mr. Richard Frasar"
103,7540,"Johansson, Mr. Gustaf Joel"
104,3101276,"Gustafsson, Mr. Anders Vilhelm"
105,349207,"Mionoff, Mr. Stoytcho"
106,343120,"Salkjelsvik, Miss. Anna Kristine"
107,312991,"Moss, Mr. Albert Johan"
108,349249,"Rekic, Mr. Tido"
109,371110,"Moran, Miss. Bertha"


In [99]:
data.iloc[0:10]

# 인덱스로만 가져오고 싶을 때는 iloc 를 사용하기 

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,0,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,1,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,3,0,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,3,0,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,1,0,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,3,0,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,3,1,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,2,1,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [100]:
data.loc[:130,['Ticket', 'Name']]

# 0 ~ 130 까지 

Unnamed: 0,Ticket,Name
0,A/5 21171,"Braund, Mr. Owen Harris"
1,PC 17599,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,STON/O2. 3101282,"Heikkinen, Miss. Laina"
3,113803,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,373450,"Allen, Mr. William Henry"
...,...,...
126,370372,"McMahon, Mr. Martin"
127,C 17369,"Madsen, Mr. Fridtjof Arne"
128,2668,"Peter, Miss. Anna"
129,347061,"Ekstrom, Mr. Johan"


In [101]:
data.loc[500:,['Ticket', 'Name']]

# 500 ~ 끝까지

Unnamed: 0,Ticket,Name
500,315086,"Calic, Mr. Petar"
501,364846,"Canavan, Miss. Mary"
502,330909,"O'Sullivan, Miss. Bridget Mary"
503,4135,"Laitinen, Miss. Kristina Sofia"
504,110152,"Maioni, Miss. Roberta"
...,...,...
886,211536,"Montvila, Rev. Juozas"
887,112053,"Graham, Miss. Margaret Edith"
888,W./C. 6607,"Johnston, Miss. Catherine Helen ""Carrie"""
889,111369,"Behr, Mr. Karl Howell"


In [102]:
data.iloc[:20, 4:]

# 완전히 인덱스 기반인 iloc

Unnamed: 0,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,male,22.0,1,0,A/5 21171,7.25,,S
1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,female,35.0,1,0,113803,53.1,C123,S
4,male,35.0,0,0,373450,8.05,,S
5,male,,0,0,330877,8.4583,,Q
6,male,54.0,0,0,17463,51.8625,E46,S
7,male,2.0,3,1,349909,21.075,,S
8,female,27.0,0,2,347742,11.1333,,S
9,female,14.0,1,0,237736,30.0708,,C


In [103]:
data.iloc[:20:2, ::2]

# 2칸식 건너뛰기

Unnamed: 0,PassengerId,Survived,Sex,SibSp,Ticket,Cabin
0,1,0,male,1,A/5 21171,
2,3,1,female,0,STON/O2. 3101282,
4,5,0,male,0,373450,
6,7,0,male,0,17463,E46
8,9,1,female,0,347742,
10,11,1,female,1,PP 9549,G6
12,13,0,male,0,A/5. 2151,
14,15,0,female,0,350406,
16,17,0,male,4,382652,
18,19,0,female,1,345763,


In [104]:
data.iloc[:20:2, ::-1]


# 콤마 앞 부분 :20:2  의 뜻 : 20번까지 불러오는데 2의 배수로만 출력
# 콤마 뒷 부분 ::-1  의 뜻 : 컬럼명을 역순으로 출력 -1 이기 때문

Unnamed: 0,Embarked,Cabin,Fare,Ticket,Parch,SibSp,Age,Sex,Name,Survived,Pclass,PassengerId
0,S,,7.25,A/5 21171,0,1,22.0,male,"Braund, Mr. Owen Harris",0,3,1
2,S,,7.925,STON/O2. 3101282,0,0,26.0,female,"Heikkinen, Miss. Laina",1,3,3
4,S,,8.05,373450,0,0,35.0,male,"Allen, Mr. William Henry",0,3,5
6,S,E46,51.8625,17463,0,0,54.0,male,"McCarthy, Mr. Timothy J",0,1,7
8,S,,11.1333,347742,2,0,27.0,female,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",1,3,9
10,S,G6,16.7,PP 9549,1,1,4.0,female,"Sandstrom, Miss. Marguerite Rut",1,3,11
12,S,,8.05,A/5. 2151,0,0,20.0,male,"Saundercock, Mr. William Henry",0,3,13
14,S,,7.8542,350406,0,0,14.0,female,"Vestrom, Miss. Hulda Amanda Adolfina",0,3,15
16,Q,,29.125,382652,1,4,2.0,male,"Rice, Master. Eugene",0,3,17
18,S,,18.0,345763,0,1,31.0,female,"Vander Planke, Mrs. Julius (Emelia Maria Vande...",0,3,19


## * 특정 컬럼에서 유일값 요소 출력하기 
* .unique( ) 

In [96]:
data['Embarked'].unique()


# nan : 결측값 - 나중에 없애줘야함

array(['S', 'C', 'Q', nan], dtype=object)

In [98]:
data['Pclass'].unique()

array([3, 1, 2])

## * 특정 칼럼에서 유일값 개수 출력
* .nunique( )

In [99]:
data['Name'].nunique()

891

In [100]:
data['Embarked'].nunique()

3

## * 카테고리 컬럼에서 유일값 별 개수를 세는 함수
* .value_counts( )

In [102]:
data['Pclass'].value_counts()

Pclass
3    491
1    216
2    184
Name: count, dtype: int64

In [103]:
data['Embarked'].value_counts()

Embarked
S    644
C    168
Q     77
Name: count, dtype: int64

## * 데이터를 정렬하는 함수 
* sort_values(by="컬럼명")

In [104]:
data.sort_values(by="Name")

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
845,846,3,0,"Abbing, Mr. Anthony",male,42.0,0,0,C.A. 5547,7.5500,,S
746,747,3,0,"Abbott, Mr. Rossmore Edward",male,16.0,1,1,C.A. 2673,20.2500,,S
279,280,3,1,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35.0,1,1,C.A. 2673,20.2500,,S
308,309,2,0,"Abelson, Mr. Samuel",male,30.0,1,0,P/PP 3381,24.0000,,C
874,875,2,1,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28.0,1,0,P/PP 3381,24.0000,,C
...,...,...,...,...,...,...,...,...,...,...,...,...
286,287,3,1,"de Mulder, Mr. Theodore",male,30.0,0,0,345774,9.5000,,S
282,283,3,0,"de Pelsmaeker, Mr. Alfons",male,16.0,0,0,345778,9.5000,,S
361,362,2,0,"del Carlo, Mr. Sebastiano",male,29.0,1,0,SC/PARIS 2167,27.7208,,C
153,154,3,0,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5000,,S


In [107]:
data.sort_values(by="Age")

# 나이가 어린 순으로 

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
803,804,3,1,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C
755,756,2,1,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5000,,S
644,645,3,1,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C
469,470,3,1,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C
78,79,2,1,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29.0000,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
859,860,3,0,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
863,864,3,0,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.5500,,S
868,869,3,0,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5000,,S
878,879,3,0,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S


In [39]:
data.sort_values(by="Age", ascending  = False)

# 나이가 많은 순으로 
# NaN : 결측값이기 때문에 맨 마지막

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0000,A23,S
851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
493,494,0,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
96,97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.7500,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
859,860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
863,864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.5500,,S
868,869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5000,,S
878,879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S


In [40]:
data.sort_values(by=["Age","Name"], ascending  = False)

# age 랑 name 둘다 내림차순이다. 
# ascending = False : 내림차순

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0000,A23,S
851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
96,97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
493,494,0,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.7500,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
507,508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.5500,,S
593,594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.7500,,Q
140,141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C
598,599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.2250,,C


In [41]:
data.sort_values(by=["Age","Name"], ascending  = [False, True])

# age : 내림차순 , ascending  = False
# name : 오름차순 , ascending  = True

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0000,A23,S
851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
493,494,0,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
96,97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.7500,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
55,56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5000,C52,S
354,355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.2250,,C
495,496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C
240,241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C


## *  컬럼을 인덱스로 지정하기
* set_index("컬럼명") 

In [42]:
data.set_index("PassengerId")

# PassengerId 를 인덱스로 지정하는 거 

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...
887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [43]:
data = data.set_index(["PassengerId", "Name"])
data 

# PassengerId 랑 Name 을 멀티인덱스로 지정하는 거 

Unnamed: 0_level_0,Unnamed: 1_level_0,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,"Braund, Mr. Owen Harris",0,3,male,22.0,1,0,A/5 21171,7.2500,,S
2,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
3,"Heikkinen, Miss. Laina",1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
5,"Allen, Mr. William Henry",0,3,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...
887,"Montvila, Rev. Juozas",0,2,male,27.0,0,0,211536,13.0000,,S
888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
889,"Johnston, Miss. Catherine Helen ""Carrie""",0,3,female,,1,2,W./C. 6607,23.4500,,S
890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


## * 인덱스를 숫자로 초기화 
* reset_index(drop = "원래 인덱스를 유지? 삭제) 

In [105]:
data

# 현재 여기서 PassengerId / Name 은 인덱스로 지정되어 있다.

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,0,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,1,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,3,0,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,2,0,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,3,0,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [45]:
data.reset_index(drop=True)

# 지정한 인덱스 삭제하기 

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,0,3,male,22.0,1,0,A/5 21171,7.2500,,S
1,1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,1,1,female,35.0,1,0,113803,53.1000,C123,S
4,0,3,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,211536,13.0000,,S
887,1,1,female,19.0,0,0,112053,30.0000,B42,S
888,0,3,female,,1,2,W./C. 6607,23.4500,,S
889,1,1,male,26.0,0,0,111369,30.0000,C148,C


In [46]:
data = data.reset_index()
data

Unnamed: 0,PassengerId,Name,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",0,3,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",0,3,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",0,2,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",0,3,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


## * 컬럼 이름 출력 
* .columns 

In [47]:
data.columns[0:5]

Index(['PassengerId', 'Name', 'Survived', 'Pclass', 'Sex'], dtype='object')

In [48]:
col = data.columns
col

Index(['PassengerId', 'Name', 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

## * 컬럼 이름 변경하기 
* rename({이름1 : 변경할 이름})
* columns = [컬럼이름]   <br>
이렇게 덮어쓰면 이름이 변경됨

In [131]:
data = data.rename(columns = {"PassengerId" : "승객번호"})
data 

# 컬럼명을 "PassengerId" -->  "승객번호"  로 변경하기 

Unnamed: 0,승객번호,Name,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


In [130]:
data

Unnamed: 0,승객번호,Name,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


In [132]:
data = data.rename(columns = {"Name" : "승객이름"})
data 

# 컬럼명을 "Name" ---> "승객이름" 로 변경하기 

Unnamed: 0,승객번호,승객이름,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


In [53]:
data.columns = ['승객번호', '이름', '선실등급', 'Survived', '성별', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']

In [54]:
data.columns

Index(['승객번호', '이름', '선실등급', 'Survived', '성별', 'Age', 'SibSp', 'Parch',
       'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [55]:
data

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",0,3,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",0,3,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",0,2,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",0,3,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


## * 컬럼 추가/삭제하기 

In [56]:
data['가족수'] = data['SibSp']

In [57]:
data

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,가족수
0,1,"Braund, Mr. Owen Harris",0,3,male,22.0,1,0,A/5 21171,7.2500,,S,1
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,"Heikkinen, Miss. Laina",1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S,1
4,5,"Allen, Mr. William Henry",0,3,male,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",0,2,male,27.0,0,0,211536,13.0000,,S,0
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S,0
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",0,3,female,,1,2,W./C. 6607,23.4500,,S,1
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C,0


In [58]:
data['테스트'] = '테스트'
data

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,가족수,테스트
0,1,"Braund, Mr. Owen Harris",0,3,male,22.0,1,0,A/5 21171,7.2500,,S,1,테스트
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1,테스트
2,3,"Heikkinen, Miss. Laina",1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0,테스트
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S,1,테스트
4,5,"Allen, Mr. William Henry",0,3,male,35.0,0,0,373450,8.0500,,S,0,테스트
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",0,2,male,27.0,0,0,211536,13.0000,,S,0,테스트
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S,0,테스트
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",0,3,female,,1,2,W./C. 6607,23.4500,,S,1,테스트
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C,0,테스트


## * 컬럼 삭제하기 
* del 변수명['컬럼명']
* 변수명.drop("컬럼명", axis = 1) <br>  변수명.drop(["컬럼명1", "컬럼명2"], axis = 1)

In [59]:
del data['테스트']
data

# '테스트' 칼럼 지우기 

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,가족수
0,1,"Braund, Mr. Owen Harris",0,3,male,22.0,1,0,A/5 21171,7.2500,,S,1
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,"Heikkinen, Miss. Laina",1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S,1
4,5,"Allen, Mr. William Henry",0,3,male,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",0,2,male,27.0,0,0,211536,13.0000,,S,0
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S,0
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",0,3,female,,1,2,W./C. 6607,23.4500,,S,1
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C,0


In [60]:
data.drop('가족수', axis = 1)

# '가족수' 칼럼 삭제하기 

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",0,3,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",0,3,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",0,2,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",0,3,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


## * 데이터프레임의 칼럼과 칼럼을 연산해서 계산 가능

In [61]:
data['가족수'] = data['SibSp'] + data['Parch'] 

In [62]:
data

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,가족수
0,1,"Braund, Mr. Owen Harris",0,3,male,22.0,1,0,A/5 21171,7.2500,,S,1
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,"Heikkinen, Miss. Laina",1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S,1
4,5,"Allen, Mr. William Henry",0,3,male,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",0,2,male,27.0,0,0,211536,13.0000,,S,0
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S,0
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",0,3,female,,1,2,W./C. 6607,23.4500,,S,3
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C,0


In [63]:
data['승차권/좌석'] = data['Ticket'] + data['Cabin'] 

# 둘다 문자열이기 때문에 더해지는거 가능함 

In [71]:
data

0        1.0
1        2.0
2        3.0
3        4.0
4        5.0
       ...  
886    887.0
887    888.0
888    889.0
889    890.0
890    891.0
Name: 승객번호, Length: 891, dtype: float32

## * 데이터타입 바꾸기 

In [65]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   승객번호      891 non-null    int64  
 1   이름        891 non-null    object 
 2   선실등급      891 non-null    int64  
 3   Survived  891 non-null    int64  
 4   성별        891 non-null    object 
 5   Age       714 non-null    float64
 6   SibSp     891 non-null    int64  
 7   Parch     891 non-null    int64  
 8   Ticket    891 non-null    object 
 9   Fare      891 non-null    float64
 10  Cabin     204 non-null    object 
 11  Embarked  889 non-null    object 
 12  가족수       891 non-null    int64  
 13  승차권/좌석    204 non-null    object 
dtypes: float64(2), int64(6), object(6)
memory usage: 97.6+ KB


In [66]:
data = data['승객번호'].astype('float32')

In [49]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Name         891 non-null    object 
 2   Survived     891 non-null    int64  
 3   Pclass       891 non-null    int64  
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


## * 판다스 데이터 프레임에서 조건에 맞는 행만 가져오기 
* 데이터프레임[데이터프레임['컬럼명'] 조건식] ==> 조건식이 참인 것만 가져옴
* 조건식 연산자 > , < , ==, != , >=. <=, &(and), |(or) 

In [106]:
data

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,0,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,1,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,3,0,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,2,0,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,3,0,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [107]:
data[data['Age'] > 35]

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
6,7,1,0,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.5500,C103,S
13,14,3,0,"Andersson, Mr. Anders Johan",male,39.0,1,5,347082,31.2750,,S
15,16,2,1,"Hewlett, Mrs. (Mary D Kingcome)",female,55.0,0,0,248706,16.0000,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
865,866,2,1,"Bystrom, Mrs. (Karolina)",female,42.0,0,0,236852,13.0000,,S
871,872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47.0,1,1,11751,52.5542,D35,S
873,874,3,0,"Vander Cruyssen, Mr. Victor",male,47.0,0,0,345765,9.0000,,S
879,880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56.0,0,1,11767,83.1583,C50,C


In [111]:
data[(data['Age'] > 35) & (data['Sex'] == 'female')]

# 나이가 35세 이상인 여성 출력하기 

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.5500,C103,S
15,16,2,1,"Hewlett, Mrs. (Mary D Kingcome)",female,55.0,0,0,248706,16.0000,,S
25,26,3,1,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...",female,38.0,1,5,347077,31.3875,,S
40,41,3,0,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40.0,1,0,7546,9.4750,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
862,863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Ba...",female,48.0,0,0,17466,25.9292,D17,S
865,866,2,1,"Bystrom, Mrs. (Karolina)",female,42.0,0,0,236852,13.0000,,S
871,872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47.0,1,1,11751,52.5542,D35,S
879,880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56.0,0,1,11767,83.1583,C50,C


In [112]:
data[(data['Pclass'] == 1) & (data['Sex'] == 'male') & (data['Survived'] == 1)]

# 성별이 남자이고, Pclass 가 1등급, 생존여부가 1인 사람 출력하기 

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
23,24,1,1,"Sloper, Mr. William Thompson",male,28.0,0,0,113788,35.5,A6,S
55,56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S
97,98,1,1,"Greenfield, Mr. William Bertram",male,23.0,0,1,PC 17759,63.3583,D10 D12,C
187,188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45.0,0,0,111428,26.55,,S
209,210,1,1,"Blank, Mr. Henry",male,40.0,0,0,112277,31.0,A31,C
224,225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38.0,1,0,19943,90.0,C93,S
248,249,1,1,"Beckwith, Mr. Richard Leonard",male,37.0,1,1,11751,52.5542,D35,S
298,299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S
305,306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S
370,371,1,1,"Harder, Mr. George Achilles",male,25.0,1,0,11765,55.4417,E50,C


In [114]:
data[data['Fare'] == 0]

# 승차권 금액이 0원인 사람

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
179,180,3,0,"Leonard, Mr. Lionel",male,36.0,0,0,LINE,0.0,,S
263,264,1,0,"Harrison, Mr. William",male,40.0,0,0,112059,0.0,B94,S
271,272,3,1,"Tornquist, Mr. William Henry",male,25.0,0,0,LINE,0.0,,S
277,278,2,0,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0.0,,S
302,303,3,0,"Johnson, Mr. William Cahoone Jr",male,19.0,0,0,LINE,0.0,,S
413,414,2,0,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0.0,,S
466,467,2,0,"Campbell, Mr. William",male,,0,0,239853,0.0,,S
481,482,2,0,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0.0,,S
597,598,3,0,"Johnson, Mr. Alfred",male,49.0,0,0,LINE,0.0,,S
633,634,1,0,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0.0,,S


In [116]:
data[data['Ticket'] == '239853']

# 3명의 친구가 티켓번호 239853, 누군가 한번에 티켓을 구매한 경우같음

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
277,278,2,0,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0.0,,S
413,414,2,0,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0.0,,S
466,467,2,0,"Campbell, Mr. William",male,,0,0,239853,0.0,,S


In [117]:
data[(data['Sex'] == 'male') & (data['Survived'] == 1)]

# 남자면서 생존한사람

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
17,18,2,1,"Williams, Mr. Charles Eugene",male,,0,0,244373,13.0000,,S
21,22,2,1,"Beesley, Mr. Lawrence",male,34.0,0,0,248698,13.0000,D56,S
23,24,1,1,"Sloper, Mr. William Thompson",male,28.0,0,0,113788,35.5000,A6,S
36,37,3,1,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C
55,56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5000,C52,S
...,...,...,...,...,...,...,...,...,...,...,...,...
838,839,3,1,"Chip, Mr. Chang",male,32.0,0,0,1601,56.4958,,S
839,840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7000,C47,C
857,858,1,1,"Daly, Mr. Peter Denis",male,51.0,0,0,113055,26.5500,E17,S
869,870,3,1,"Johnson, Master. Harold Theodor",male,4.0,1,1,347742,11.1333,,S


In [118]:
data[((data['Sex'] == 'female') & (data['Survived'] == 1)) & ((data['Pclass'] == 1) |(data['Pclass'] == 2))]

# 선실등급이 1 이거나 2이면서, 생존한 여성

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
9,10,2,1,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.5500,C103,S
15,16,2,1,"Hewlett, Mrs. (Mary D Kingcome)",female,55.0,0,0,248706,16.0000,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
871,872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47.0,1,1,11751,52.5542,D35,S
874,875,2,1,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28.0,1,0,P/PP 3381,24.0000,,C
879,880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56.0,0,1,11767,83.1583,C50,C
880,881,2,1,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25.0,0,1,230433,26.0000,,S


* 선실등급이 1 or 2 이면서 생존한 여성 중에서 이름 컬럼만 보고 싶을 때
<br><br>
* 데이터프레임.loc[데이터프레임['컬럼명'] 조건. '원하는 컬럼명']  ---> 공식 ver
* 데이터프레임[데이터프레임['컬럼명'] 조건]['원하는컬럼명']  ---> 편법 ver


In [122]:
# 선실등급이 1 or 2 이면서 생존한 여성 중에서 이름 컬럼만 보고 싶을 때

data.loc[((data['Sex'] == 'female') & (data['Survived'] == 1)) & ((data['Pclass'] == 1) |(data['Pclass'] == 2)), 'Name']



1      Cumings, Mrs. John Bradley (Florence Briggs Th...
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
9                    Nasser, Mrs. Nicholas (Adele Achem)
11                              Bonnell, Miss. Elizabeth
15                      Hewlett, Mrs. (Mary D Kingcome) 
                             ...                        
871     Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
874                Abelson, Mrs. Samuel (Hannah Wizosky)
879        Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
880         Shelley, Mrs. William (Imanita Parrish Hall)
887                         Graham, Miss. Margaret Edith
Name: Name, Length: 161, dtype: object

In [127]:
# 선실등급(Pclass)이 1인 사람의 'Name' 칼럼만 출력하기 

data.loc[(data['Pclass'] == 1), 'Name']

1      Cumings, Mrs. John Bradley (Florence Briggs Th...
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
6                                McCarthy, Mr. Timothy J
11                              Bonnell, Miss. Elizabeth
23                          Sloper, Mr. William Thompson
                             ...                        
871     Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
872                             Carlsson, Mr. Frans Olof
879        Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
887                         Graham, Miss. Margaret Edith
889                                Behr, Mr. Karl Howell
Name: Name, Length: 216, dtype: object

In [133]:
# 선실등급(Pclass)이 1인 사람의 'Name' 칼럼, 'Pclass' 칼럼 출력하기 
# 공식 ver

data.loc[(data['Pclass'] == 1), ['Name', 'Pclass']]

Unnamed: 0,Name,Pclass
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1
6,"McCarthy, Mr. Timothy J",1
11,"Bonnell, Miss. Elizabeth",1
23,"Sloper, Mr. William Thompson",1
...,...,...
871,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",1
872,"Carlsson, Mr. Frans Olof",1
879,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",1
887,"Graham, Miss. Margaret Edith",1


In [130]:
data[data['Pclass'] == 1]

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
6,7,1,0,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.5500,C103,S
23,24,1,1,"Sloper, Mr. William Thompson",male,28.0,0,0,113788,35.5000,A6,S
...,...,...,...,...,...,...,...,...,...,...,...,...
871,872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47.0,1,1,11751,52.5542,D35,S
872,873,1,0,"Carlsson, Mr. Frans Olof",male,33.0,0,0,695,5.0000,B51 B53 B55,S
879,880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56.0,0,1,11767,83.1583,C50,C
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S


In [132]:
data[data['Pclass'] == 1][['Name', 'Pclass']]

# 편법 ver

Unnamed: 0,Name,Pclass
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1
6,"McCarthy, Mr. Timothy J",1
11,"Bonnell, Miss. Elizabeth",1
23,"Sloper, Mr. William Thompson",1
...,...,...
871,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",1
872,"Carlsson, Mr. Frans Olof",1
879,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",1
887,"Graham, Miss. Margaret Edith",1


isin([조건1, 조건2])

In [136]:
# data[(data['선실등급'] == 1) | (data['선실등급'] == 2)]

data[data['Pclass'].isin([1,2])]

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
6,7,1,0,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
9,10,2,1,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.5500,C103,S
...,...,...,...,...,...,...,...,...,...,...,...,...
880,881,2,1,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25.0,0,1,230433,26.0000,,S
883,884,2,0,"Banfield, Mr. Frederick James",male,28.0,0,0,C.A./SOTON 34068,10.5000,,S
886,887,2,0,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S


## * 특정 단어를 포함한 행을 찾기 
* ex) 이름에 Joseph 이 들어간 사람
<br><br>
* like %단어% - SQL ver
* .str.contains("찾을 단어")

In [137]:
data[data['Name'].str.contains("Joseph")]

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
20,21,2,0,"Fynney, Mr. Joseph J",male,35.0,0,0,239865,26.0,,S
34,35,1,0,"Meyer, Mr. Edgar Joseph",male,28.0,1,0,PC 17604,82.1708,,C
95,96,3,0,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S
140,141,3,0,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C
145,146,2,0,"Nicholls, Mr. Joseph Charles",male,19.0,1,1,C.A. 33112,36.75,,S
194,195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44.0,0,0,PC 17610,27.7208,B4,C
375,376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,,C
454,455,3,0,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S
532,533,3,0,"Elias, Mr. Joseph Jr",male,17.0,1,1,2690,7.2292,,C
559,560,3,1,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36.0,1,0,345572,17.4,,S


In [138]:
# 승차권 번호에 'PC' 가 들어간 사람들

data[data['Ticket'].str.contains("PC")]

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
30,31,1,0,"Uruchurtu, Don. Manuel E",male,40.0,0,0,PC 17601,27.7208,,C
31,32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C
34,35,1,0,"Meyer, Mr. Edgar Joseph",male,28.0,1,0,PC 17604,82.1708,,C
52,53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49.0,1,0,PC 17572,76.7292,D33,C
64,65,1,0,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C
96,97,1,0,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
97,98,1,1,"Greenfield, Mr. William Bertram",male,23.0,0,1,PC 17759,63.3583,D10 D12,C
118,119,1,0,"Baxter, Mr. Quigg Edmond",male,24.0,0,1,PC 17558,247.5208,B58 B60,C
139,140,1,0,"Giglio, Mr. Victor",male,24.0,0,0,PC 17593,79.2,B86,C


## * 승객 이름에서 ( ) 괄호 없애기 

 ex) Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan")
 
 * 방법1 : 기본적인 반복분 사용
 * 방법2 : data['Name'].apply(lambda x : x.replace("(","").replace(")",""))
 * 방법3 : data['Name'].str.replace("(","").replace(")","")

In [141]:
data['Name']

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [148]:
data['Name'].replace("(","").replace(")","")

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [150]:
for name in data['Name']:
    print(name.replace("(","").replace(")",""))

Braund, Mr. Owen Harris
Cumings, Mrs. John Bradley Florence Briggs Thayer
Heikkinen, Miss. Laina
Futrelle, Mrs. Jacques Heath Lily May Peel
Allen, Mr. William Henry
Moran, Mr. James
McCarthy, Mr. Timothy J
Palsson, Master. Gosta Leonard
Johnson, Mrs. Oscar W Elisabeth Vilhelmina Berg
Nasser, Mrs. Nicholas Adele Achem
Sandstrom, Miss. Marguerite Rut
Bonnell, Miss. Elizabeth
Saundercock, Mr. William Henry
Andersson, Mr. Anders Johan
Vestrom, Miss. Hulda Amanda Adolfina
Hewlett, Mrs. Mary D Kingcome 
Rice, Master. Eugene
Williams, Mr. Charles Eugene
Vander Planke, Mrs. Julius Emelia Maria Vandemoortele
Masselmani, Mrs. Fatima
Fynney, Mr. Joseph J
Beesley, Mr. Lawrence
McGowan, Miss. Anna "Annie"
Sloper, Mr. William Thompson
Palsson, Miss. Torborg Danira
Asplund, Mrs. Carl Oscar Selma Augusta Emilia Johansson
Emir, Mr. Farred Chehab
Fortune, Mr. Charles Alexander
O'Dwyer, Miss. Ellen "Nellie"
Todoroff, Mr. Lalio
Uruchurtu, Don. Manuel E
Spencer, Mrs. William Augustus Marie Eugenie
Glynn, M

In [153]:
# 방법 1

result = []
for name in data['Name']:
    result.append(name.replace("(","").replace(")",""))
result

['Braund, Mr. Owen Harris',
 'Cumings, Mrs. John Bradley Florence Briggs Thayer',
 'Heikkinen, Miss. Laina',
 'Futrelle, Mrs. Jacques Heath Lily May Peel',
 'Allen, Mr. William Henry',
 'Moran, Mr. James',
 'McCarthy, Mr. Timothy J',
 'Palsson, Master. Gosta Leonard',
 'Johnson, Mrs. Oscar W Elisabeth Vilhelmina Berg',
 'Nasser, Mrs. Nicholas Adele Achem',
 'Sandstrom, Miss. Marguerite Rut',
 'Bonnell, Miss. Elizabeth',
 'Saundercock, Mr. William Henry',
 'Andersson, Mr. Anders Johan',
 'Vestrom, Miss. Hulda Amanda Adolfina',
 'Hewlett, Mrs. Mary D Kingcome ',
 'Rice, Master. Eugene',
 'Williams, Mr. Charles Eugene',
 'Vander Planke, Mrs. Julius Emelia Maria Vandemoortele',
 'Masselmani, Mrs. Fatima',
 'Fynney, Mr. Joseph J',
 'Beesley, Mr. Lawrence',
 'McGowan, Miss. Anna "Annie"',
 'Sloper, Mr. William Thompson',
 'Palsson, Miss. Torborg Danira',
 'Asplund, Mrs. Carl Oscar Selma Augusta Emilia Johansson',
 'Emir, Mr. Farred Chehab',
 'Fortune, Mr. Charles Alexander',
 'O\'Dwyer, Miss

In [154]:
data['Name']

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [157]:
# 방법 2

data['Name'].apply(lambda x : x.replace("(","").replace(")",""))

# 재할당까지 해줘야 변경됨!

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley Florence Briggs Thayer
2                                 Heikkinen, Miss. Laina
3             Futrelle, Mrs. Jacques Heath Lily May Peel
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [158]:
# 방법 3 : .str.replace("(","").replace(")","")
# apply 와 같은 역할을 함 

data['Name'].str.replace("(","").replace(")","")

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley Florence Briggs Tha...
2                                 Heikkinen, Miss. Laina
3            Futrelle, Mrs. Jacques Heath Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [161]:
# 함수 사용해서 이름 대문자로 바꾸기 

data['Name'].str.upper()

0                                BRAUND, MR. OWEN HARRIS
1      CUMINGS, MRS. JOHN BRADLEY (FLORENCE BRIGGS TH...
2                                 HEIKKINEN, MISS. LAINA
3           FUTRELLE, MRS. JACQUES HEATH (LILY MAY PEEL)
4                               ALLEN, MR. WILLIAM HENRY
                             ...                        
886                                MONTVILA, REV. JUOZAS
887                         GRAHAM, MISS. MARGARET EDITH
888             JOHNSTON, MISS. CATHERINE HELEN "CARRIE"
889                                BEHR, MR. KARL HOWELL
890                                  DOOLEY, MR. PATRICK
Name: Name, Length: 891, dtype: object

In [162]:
# 함수 사용해서 이름 소문자로 바꾸기 

data['Name'].str.lower()

0                                braund, mr. owen harris
1      cumings, mrs. john bradley (florence briggs th...
2                                 heikkinen, miss. laina
3           futrelle, mrs. jacques heath (lily may peel)
4                               allen, mr. william henry
                             ...                        
886                                montvila, rev. juozas
887                         graham, miss. margaret edith
888             johnston, miss. catherine helen "carrie"
889                                behr, mr. karl howell
890                                  dooley, mr. patrick
Name: Name, Length: 891, dtype: object

In [166]:
# 성빼고 이름만 출력하기 

data['Name'].str.split(",")[0][0]

'Braund'

In [170]:
# 성빼고 이름만 출력하기 

for name in data['Name'].str.split(","):
    print(name[0])

Braund
Cumings
Heikkinen
Futrelle
Allen
Moran
McCarthy
Palsson
Johnson
Nasser
Sandstrom
Bonnell
Saundercock
Andersson
Vestrom
Hewlett
Rice
Williams
Vander Planke
Masselmani
Fynney
Beesley
McGowan
Sloper
Palsson
Asplund
Emir
Fortune
O'Dwyer
Todoroff
Uruchurtu
Spencer
Glynn
Wheadon
Meyer
Holverson
Mamee
Cann
Vander Planke
Nicola-Yarred
Ahlin
Turpin
Kraeff
Laroche
Devaney
Rogers
Lennon
O'Driscoll
Samaan
Arnold-Franchi
Panula
Nosworthy
Harper
Faunthorpe
Ostby
Woolner
Rugg
Novel
West
Goodwin
Sirayanian
Icard
Harris
Skoog
Stewart
Moubarek
Nye
Crease
Andersson
Kink
Jenkin
Goodwin
Hood
Chronopoulos
Bing
Moen
Staneff
Moutal
Caldwell
Dowdell
Waelens
Sheerlinck
McDermott
Carrau
Ilett
Backstrom
Ford
Slocovski
Fortune
Celotti
Christmann
Andreasson
Chaffee
Dean
Coxon
Shorney
Goldschmidt
Greenfield
Doling
Kantor
Petranec
Petroff
White
Johansson
Gustafsson
Mionoff
Salkjelsvik
Moss
Rekic
Moran
Porter
Zabour
Barton
Jussila
Attalah
Pekoniemi
Connors
Turpin
Baxter
Andersson
Hickman
Moore
Nasser
Webber
Whi

In [None]:

result = []
for name in data['Name']:
    result.append(name.replace("(","").replace(")",""))
result

In [140]:
# 이 데이터로 연습하기 

data = pd.read_csv("https://raw.githubusercontent.com/haram4th/ablearn/main/Taitanic_train.csv")
data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## * groupby 
* 데이터프레임[['컬럼1','컬럼2']].groupby("기준컬럼").집합연산함수(mean, max, min, count, value_counts)
* 데이터프레임.groupby("기준컬럼").집합연산함수(mean., max, min, count, value_counts) 



In [171]:
data[['Sex', 'Age']].groupby('Sex').mean()

Unnamed: 0_level_0,Age
Sex,Unnamed: 1_level_1
female,27.915709
male,30.726645


In [172]:
data[['Sex', 'Age']].groupby('Sex').count()

Unnamed: 0_level_0,Age
Sex,Unnamed: 1_level_1
female,261
male,453


In [173]:
data[['Sex', 'Age']].groupby('Sex').max()

Unnamed: 0_level_0,Age
Sex,Unnamed: 1_level_1
female,63.0
male,80.0


In [175]:
data[['Sex', 'Age']].groupby('Sex').value_counts()

# 성별 별 나이별로 몇명씩 있는지 알 수 있음

Sex     Age 
female  24.0    16
        18.0    13
        22.0    12
        30.0    11
        35.0     8
                ..
male    57.0     1
        66.0     1
        70.5     1
        74.0     1
        80.0     1
Name: count, Length: 145, dtype: int64

In [178]:
# 성별 별 생존자수 

data[['Sex', 'Survived']].groupby('Sex').count()

Unnamed: 0_level_0,Survived
Sex,Unnamed: 1_level_1
female,314
male,577


In [179]:
# 성별 별 생존자수 

data[['Sex', 'Survived']].groupby('Sex').value_counts()

Sex     Survived
female  1           233
        0            81
male    0           468
        1           109
Name: count, dtype: int64

In [183]:
# 선실등급별 생존여부 Pclass
# count 대신 value_counts 를 사용해야 한다. 

data[['Pclass', 'Survived']].groupby('Pclass').value_counts()

Pclass  Survived
1       1           136
        0            80
2       0            97
        1            87
3       0           372
        1           119
Name: count, dtype: int64

In [182]:
# 선실등급별 생존여부 Pclass
# 그냥 count를 사용하게 되면, surviced 1 과 0 둘다 합쳐져서 집계할 수 X

data[['Pclass', 'Survived']].groupby('Pclass').count()

Unnamed: 0_level_0,Survived
Pclass,Unnamed: 1_level_1
1,216
2,184
3,491


In [184]:
data[['Pclass', 'Survived']].groupby('Pclass').value_counts(sort=False)

Pclass  Survived
1       0            80
        1           136
2       0            97
        1            87
3       0           372
        1           119
Name: count, dtype: int64

In [186]:
# 성별별, 선실등급별 생존여부 


data[['Sex','Pclass', 'Survived']].groupby(['Sex','Pclass']).value_counts(sort=False)

Sex     Pclass  Survived
female  1       0             3
                1            91
        2       0             6
                1            70
        3       0            72
                1            72
male    1       0            77
                1            45
        2       0            91
                1            17
        3       0           300
                1            47
Name: count, dtype: int64

In [187]:
# 여성이면서 선실등급 1

91 / (91+3)

0.9680851063829787

In [188]:
# 여성이면서 선실등급 2

70 / (70+6)

0.9210526315789473

In [189]:
# 여성이면서 선실등급 3

72 / (72+72)

0.5