# Pandas
## 1. pandas 특징

* Numpy를 내부적으로 활용함(numpy 의 특징을 그대로 가짐)
* 데이터 분석에 특화된 데이터 구조 제공
* 다양한 데이터 분석 함수 제공
* 데이터베이스에 쉽게 연결 가능
* json 데이터나 table 요소를 데이터프레임으로 손쉽게 변형 가능

In [1]:
# 설치
# !pip install pandas
# !conda install pandas

## 2. Pandas 에서 다루는 데이터 타입
1) DataFrame : 2차원의 표형식 데이터  --> key : [value1, value2]
2) Series : 1차원 벡터형식 데이터  --> list, tuple, ndarray

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(
    {'이름': ['홍길동', '장마철', '소나기', '더워요'],  # 한줄 한줄이 series 시리즈임
     'Age' : [23, 55, 24, 16], 
     '성별' : ['male', 'female', 'female', 'male']}
)
df

Unnamed: 0,이름,Age,성별
0,홍길동,23,male
1,장마철,55,female
2,소나기,24,female
3,더워요,16,male


In [4]:
df['이름']

0    홍길동
1    장마철
2    소나기
3    더워요
Name: 이름, dtype: object

In [5]:
# display 통해 표형식으로 예쁘게 출력 가능함

display(df)

Unnamed: 0,이름,Age,성별
0,홍길동,23,male
1,장마철,55,female
2,소나기,24,female
3,더워요,16,male


In [6]:
display(df['이름'])

0    홍길동
1    장마철
2    소나기
3    더워요
Name: 이름, dtype: object

In [7]:
print(type(df['이름']))

<class 'pandas.core.series.Series'>


### pandas = 2차원 matrix 
### series = vector 

In [8]:
display(df['이름'])
print(type(df['이름']))
print('shape:' , df['이름'].shape)
print()



display(df[['이름']])
print(type(df[['이름']]))
print('shape:' , df[['이름']].shape)
print('ndim:' ,df[['이름']].ndim)
# vector에서 대괄호를 하나 더 씌웠다는 것은 matrix 가된다

0    홍길동
1    장마철
2    소나기
3    더워요
Name: 이름, dtype: object

<class 'pandas.core.series.Series'>
shape: (4,)



Unnamed: 0,이름
0,홍길동
1,장마철
2,소나기
3,더워요


<class 'pandas.core.frame.DataFrame'>
shape: (4, 1)
ndim: 2


## * 시리즈 만들기 pd.Series([리스트자료], name = '컬럼명')
* 이름을 가지고 있는 벡터

In [9]:
ages = pd.Series([22, 35, 58],name = "Age")
ages

0    22
1    35
2    58
Name: Age, dtype: int64

In [10]:
ages[1:]

1    35
2    58
Name: Age, dtype: int64

In [11]:
ages.max()

np.int64(58)

In [12]:
ages.mean()

np.float64(38.333333333333336)

In [14]:
ages.median()

np.float64(35.0)

## * pandas 에서 자료 불러오기  + 저장하기 

* csv, tsv, excel, json, html 
* pd.read_ 확장자

In [17]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl.metadata (1.8 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
   ---------------------------------------- 0.0/250.9 kB ? eta -:--:--
   - -------------------------------------- 10.2/250.9 kB ? eta -:--:--
   ----------------- ---------------------- 112.6/250.9 kB 2.2 MB/s eta 0:00:01
   ---------------------------------------- 250.9/250.9 kB 3.9 MB/s eta 0:00:00
Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.5


In [18]:
# 엑셀파일 열기 1

df1 = pd.read_excel("./data/Online Retail.xlsx")
df1

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,2011-12-09 12:50:00,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2011-12-09 12:50:00,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,2011-12-09 12:50:00,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,2011-12-09 12:50:00,4.15,12680.0,France


In [23]:
# 엑셀파일 열기 2

df2 = pd.read_excel("./data/아파트(매매)_실거래가_20240806113828.xlsx", header = 12, index_col=0)
df2.head(13)

# 13번째부터 불러와라?

  warn("Workbook contains no default style, apply openpyxl's default")


Unnamed: 0_level_0,시군구,번지,본번,부번,단지명,전용면적(㎡),계약년월,계약일,거래금액(만원),동,층,매수자,매도자,건축년도,도로명,해제사유발생일,거래유형,중개사소재지,등기일자,주택유형
NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,서울특별시 영등포구 당산동5가,42,42,0,당산삼성래미안4차,115.78,202407,31,139000,-,4,개인,개인,2003,당산로 214,-,직거래,-,-,아파트
2,강원특별자치도 원주시 지정면 가곡리,1512,1512,0,원주롯데캐슬골드파크1차(5단지),84.9644,202407,31,38900,-,10,개인,개인,2019,신지정로 250,20240802,중개거래,강원 원주시,-,아파트
3,대구광역시 달성군 화원읍 구라리,1650,1650,0,대곡역래미안,84.5958,202407,31,29500,-,11,개인,개인,2007,비슬로539길 35,-,중개거래,대구 달성군,-,아파트
4,광주광역시 남구 진월동,312-7,312,7,광명,70.625,202407,31,16900,-,1,개인,개인,1998,광복마을길 63,-,중개거래,광주 남구,-,아파트
5,광주광역시 남구 진월동,331-93,331,93,중흥,84.84,202407,31,17500,102,13,개인,개인,1992,서문대로749번마길 30,-,직거래,-,24.07.31,아파트
6,경기도 양평군 양평읍 양근리,140-5,140,5,양평현대,59.7,202407,31,22500,-,14,개인,개인,1996,관문길 60,-,중개거래,경기 양평군,-,아파트
7,서울특별시 동대문구 장안동,587,587,0,동대문더퍼스트데시앙,59.8074,202407,31,80500,-,10,개인,개인,2019,장한로27가길 37,-,중개거래,서울 동대문구,-,아파트
8,대구광역시 달성군 화원읍 천내리,690-1,690,1,화원파크뷰우방아이유쉘,69.6413,202407,31,38000,-,17,개인,개인,2022,명천로21길 55,-,중개거래,대구 달서구,-,아파트
9,충청남도 천안시서북구 성거읍 신월리,2-26,2,26,천안삼환나우빌,84.9132,202407,31,22800,-,7,개인,개인,2005,봉주로 120,-,중개거래,충남 천안시서북구,-,아파트
10,경기도 양평군 양평읍 창대리,844,844,0,한화포레나양평,59.8551,202407,31,31500,-,19,개인,개인,2023,남북로 53,-,중개거래,경기 양평군,-,아파트


## * csv 파일 읽기
* pd.read_csv(파일명, 옵션)

In [25]:
df3 = pd.read_csv("./data/06고객이탈예측.csv", encoding = 'cp949')
df3

Unnamed: 0,회원ID,성별,고연령,배우자,피부양자,가입기간,전화서비스,2회선이상,인터넷서비스,온라인보안,...,기기보호서비스,기술지원,스트리밍TV,스트리밍Movies,약정옵션,온라인고지서,지불수단,월요금,합산요금,이탈여부
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


## * json 파일 읽기

In [30]:
df4 = pd.read_json("./data/Chicken_shops", encoding = 'cp949')

UnicodeDecodeError: 'cp949' codec can't decode byte 0xe1 in position 106: illegal multibyte sequence

## * html 파일 읽기

In [28]:
df5 = pd.read_html("./data/corpList.htm")
df5

[    Unnamed: 0                             Unnamed: 1  \
 0     한국제15호스팩                             금융 지원 서비스업   
 1       에스오에스랩  측정, 시험, 항해, 제어 및 기타 정밀기기 제조업; 광학기기 제외   
 2   미래에셋비전스팩6호                             금융 지원 서비스업   
 3   에이치엠씨제7호스팩                             금융 지원 서비스업   
 4        파라다이스                     유원지 및 기타 오락관련 서비스업   
 5       한중엔시에스      전동기, 발전기 및 전기 변환 · 공급 · 제어 장치 제조업   
 6     KB제29호스팩                             금융 지원 서비스업   
 7   미래에셋비전스팩5호                             금융 지원 서비스업   
 8     씨어스테크놀로지                             의료용 기기 제조업   
 9     한국제14호스팩                             금융 지원 서비스업   
 10   디비금융스팩12호                                 기타 금융업   
 11        라메디텍                             의료용 기기 제조업   
 12       그리드위즈                 그외 기타 전문, 과학 및 기술 서비스업   
 13       다원넥스뷰                        사진장비 및 광학기기 제조업   
 14  미래에셋비전스팩4호                             금융 지원 서비스업   
 
                                            Unnamed: 2  Unnamed: 3 Unn

In [29]:
df5 = pd.read_html("./data/corpList.htm")
df5[0]

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7
0,한국제15호스팩,금융 지원 서비스업,기업인수합병,2024-06-26,12월,유한,,서울특별시
1,에스오에스랩,"측정, 시험, 항해, 제어 및 기타 정밀기기 제조업; 광학기기 제외",산업용 및 차량용 라이다(LiDAR),2024-06-25,12월,정지성,홈페이지 보기,광주광역시
2,미래에셋비전스팩6호,금융 지원 서비스업,기업인수합병,2024-06-24,12월,정명훈,,서울특별시
3,에이치엠씨제7호스팩,금융 지원 서비스업,기타금융,2024-06-24,12월,강신명,,서울특별시
4,파라다이스,유원지 및 기타 오락관련 서비스업,"카지노, 호텔, 복합리조트",2024-06-24,12월,최종환..,홈페이지 보기,서울특별시
5,한중엔시에스,"전동기, 발전기 및 전기 변환 · 공급 · 제어 장치 제조업","수냉식 냉각시스템 ESS Parts, 공랭식 ESS Module Parts, EV ...",2024-06-24,12월,김환식,홈페이지 보기,경상북도
6,KB제29호스팩,금융 지원 서비스업,기업인수합병,2024-06-21,12월,서영화,,서울특별시
7,미래에셋비전스팩5호,금융 지원 서비스업,기업인수합병,2024-06-19,12월,김대호,,서울특별시
8,씨어스테크놀로지,의료용 기기 제조업,심전도검사솔루션 입원환자모니터링솔루션,2024-06-19,12월,이영신,홈페이지 보기,경기도
9,한국제14호스팩,금융 지원 서비스업,기업인수합병,2024-06-19,12월,변성환,,서울특별시


In [1]:
import os
from sqlalchemy import create_engine
import pymysql
import pandas as pd
from dotenv import load_dotenv
 
pymysql.install_as_MySQLdb()

load_dotenv(dotenv_path="../05data_scraping/.env_db")



True

In [36]:
# MySQL에서 테이블 불러오기 

engine = create_engine(f"{os.getenv('db')}+{os.getenv('dbtype')}://{os.getenv('id')}:{os.getenv('pw')}@{os.getenv('host')}/{os.getenv('database')}")
conn = engine.connect()

data_07 = pd.read_sql('2024_07_stock_price_info', con=conn)
data_08 = pd.read_sql('2024_08_stock_price_info', con=conn)
conn.close()

In [38]:
data_07.head(10)

Unnamed: 0,수집일,회사명,종목코드,현재가,변동금액,변화율,전일,고가,거래량,시가,저가,거래대금
0,2024-07-30,산일전기,62040,55500,5300,10.56,50200,61300,12682123,55200,50900,718098
1,2024-07-30,에이치에스효성,487570,65700,16900,-20.46,82600,75900,372304,75400,64800,26079
2,2024-07-30,엔에이치스팩31호,481890,2035,15,0.74,2020,2040,313965,2025,2020,637
3,2024-07-30,SK증권제13호스팩,473950,2080,0,0.0,2080,2085,214025,2075,2075,445
4,2024-07-30,엑셀세라퓨틱스,373110,6780,180,2.73,6600,7160,1359481,6830,6550,9329
5,2024-07-30,이베스트스팩6호,478110,2060,0,0.0,2060,2065,61185,2055,2045,126
6,2024-07-30,시프트업,462870,67300,3200,4.99,64100,67800,832883,64100,64100,55607
7,2024-07-30,하스,450330,12560,40,-0.32,12600,13250,697300,12650,12330,8916
8,2024-07-30,이노스페이스,462350,22750,1250,-5.21,24000,24000,122218,24000,22750,2835
9,2024-07-30,신한글로벌액티브리츠,481850,2850,15,-0.52,2865,2855,266692,2855,2835,758


In [39]:
data_08.head(10)

Unnamed: 0,수집일,회사명,종목코드,현재가,변동금액,변화율,전일,고가,거래량,시가,저가,거래대금
0,2024-08-02,산일전기,62040,52500,400,0.77,52100,55300,1648658,54200,51800,88568
1,2024-08-02,에이치에스효성,487570,55000,5100,-8.49,60100,58400,63677,58400,54500,3598
2,2024-08-02,엔에이치스팩31호,481890,2045,10,-0.49,2055,2055,76286,2055,2045,156
3,2024-08-02,SK증권제13호스팩,473950,2090,10,-0.48,2100,2100,66668,2100,2090,140
4,2024-08-02,엑셀세라퓨틱스,373110,6080,320,-5.0,6400,6300,319509,6270,6000,1955
5,2024-08-02,이베스트스팩6호,478110,2060,5,-0.24,2065,2065,12245,2065,2055,25
6,2024-08-02,시프트업,462870,68700,1200,-1.72,69900,69500,220973,68500,67000,15114
7,2024-08-02,하스,450330,13440,660,-4.68,14100,14900,1226414,13920,13420,17516
8,2024-08-02,이노스페이스,462350,19830,2870,-12.64,22700,20350,402382,19730,19200,7978
9,2024-08-02,신한글로벌액티브리츠,481850,2800,10,-0.36,2810,2815,70562,2815,2790,197


## * MySQL 에서 titanic DB - passenger, surv, ticket 테이블 로드하기 

In [42]:
load_dotenv(dotenv_path="../05data_scraping/.env_db")

engine = create_engine(f"{os.getenv('db')}+{os.getenv('dbtype')}://{os.getenv('id')}:{os.getenv('pw')}@{os.getenv('host')}/titanic")
conn = engine.connect()

# 테이블 3개 불러오기 
passenger = pd.read_sql('passenger', con=conn)
ticket = pd.read_sql('ticket', con=conn)
surv = pd.read_sql('surv', con=conn)


conn.close()

In [43]:
passenger

# 기준으로 삼는다. 데이터가 제일 많기 때문

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch
0,193,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19.0,1,0
1,192,"Carbines, Mr. William",male,19.0,0,0
2,715,"Greenberg, Mr. Samuel",male,52.0,0,0
3,533,"Elias, Mr. Joseph Jr",male,17.0,1,1
4,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0
...,...,...,...,...,...,...
618,580,"Jussila, Mr. Eiriik",male,32.0,0,0
619,503,"O'Sullivan, Miss. Bridget Mary",female,,0,0
620,538,"LeRoy, Miss. Bertha",female,30.0,0,0
621,197,"Mernagh, Mr. Robert",male,,0,0


In [44]:
ticket

Unnamed: 0,PassengerId,Ticket,Pclass,Fare,Cabin,Embarked
0,486,4133,3,25.4667,,S
1,119,PC 17558,1,247.5208,B58 B60,C
2,836,PC 17756,1,83.1583,E49,C
3,528,PC 17483,1,221.7792,C95,S
4,396,350052,3,7.7958,,S
...,...,...,...,...,...,...
440,692,349256,3,13.4167,,C
441,585,3411,3,8.7125,,C
442,265,382649,3,7.7500,,Q
443,328,28551,2,13.0000,D,S


In [45]:
surv

Unnamed: 0,PassengerId,Survived
0,762,0
1,665,1
2,809,0
3,332,0
4,21,0
...,...,...
441,698,1
442,778,1
443,157,1
444,350,0


## * 3개의 데이터프레임을 1개로 합치기
* sql에서의 조인과 같다
* join( )
* merge( ) : 조인과 동일
* concat( ) 

In [46]:
test1 = pd.concat([passenger, ticket, surv], axis = 1)
test1.head() 

# 합쳐진 것을 보면 엉망으로 합쳐짐

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch,PassengerId.1,Ticket,Pclass,Fare,Cabin,Embarked,PassengerId.2,Survived
0,193,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19.0,1,0,486.0,4133,3.0,25.4667,,S,762.0,0.0
1,192,"Carbines, Mr. William",male,19.0,0,0,119.0,PC 17558,1.0,247.5208,B58 B60,C,665.0,1.0
2,715,"Greenberg, Mr. Samuel",male,52.0,0,0,836.0,PC 17756,1.0,83.1583,E49,C,809.0,0.0
3,533,"Elias, Mr. Joseph Jr",male,17.0,1,1,528.0,PC 17483,1.0,221.7792,C95,S,332.0,0.0
4,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,396.0,350052,3.0,7.7958,,S,21.0,0.0


In [47]:
test1 = pd.concat([passenger, ticket, surv], keys = 'PassengerId', axis = 1)
test1.head() 


  test1 = pd.concat([passenger, ticket, surv], keys = 'PassengerId', axis = 1)


Unnamed: 0_level_0,P,P,P,P,P,P,a,a,a,a,a,a,s,s
Unnamed: 0_level_1,PassengerId,Name,Sex,Age,SibSp,Parch,PassengerId,Ticket,Pclass,Fare,Cabin,Embarked,PassengerId,Survived
0,193,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19.0,1,0,486.0,4133,3.0,25.4667,,S,762.0,0.0
1,192,"Carbines, Mr. William",male,19.0,0,0,119.0,PC 17558,1.0,247.5208,B58 B60,C,665.0,1.0
2,715,"Greenberg, Mr. Samuel",male,52.0,0,0,836.0,PC 17756,1.0,83.1583,E49,C,809.0,0.0
3,533,"Elias, Mr. Joseph Jr",male,17.0,1,1,528.0,PC 17483,1.0,221.7792,C95,S,332.0,0.0
4,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,396.0,350052,3.0,7.7958,,S,21.0,0.0


In [48]:
test2 = pd.merge(passenger, ticket, how='inner', on='PassengerId')
test2.head()

# join 보다 merge 를 더 많이 사용함
# 2개의 테이블을 제대로 합치는 법

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Pclass,Fare,Cabin,Embarked
0,192,"Carbines, Mr. William",male,19.0,0,0,28424,2,13.0,,S
1,715,"Greenberg, Mr. Samuel",male,52.0,0,0,250647,2,13.0,,S
2,533,"Elias, Mr. Joseph Jr",male,17.0,1,1,2690,3,7.2292,,C
3,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,A/5. 3337,3,14.5,,S
4,597,"Leitch, Miss. Jessie Wills",female,,0,0,248727,2,33.0,,S


In [49]:
test3 = pd.merge(left = test2, right=surv, how='inner', on='PassengerId')
test3.head()

# 3개의 테이블을 제대로 합치는 법
# 겹치는 게 없어서 아무것도 뜨지 않는다. 

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Pclass,Fare,Cabin,Embarked,Survived


In [50]:
ticket = ticket.set_index("PassengerId")
ticket 

Unnamed: 0_level_0,Ticket,Pclass,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
486,4133,3,25.4667,,S
119,PC 17558,1,247.5208,B58 B60,C
836,PC 17756,1,83.1583,E49,C
528,PC 17483,1,221.7792,C95,S
396,350052,3,7.7958,,S
...,...,...,...,...,...
692,349256,3,13.4167,,C
585,3411,3,8.7125,,C
265,382649,3,7.7500,,Q
328,28551,2,13.0000,D,S


In [51]:
passenger.join(ticket)

Unnamed: 0,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Pclass,Fare,Cabin,Embarked
0,193,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19.0,1,0,,,,,
1,192,"Carbines, Mr. William",male,19.0,0,0,A/5 21171,3.0,7.250,,S
2,715,"Greenberg, Mr. Samuel",male,52.0,0,0,,,,,
3,533,"Elias, Mr. Joseph Jr",male,17.0,1,1,STON/O2. 3101282,3.0,7.925,,S
4,133,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,113803,1.0,53.100,C123,S
...,...,...,...,...,...,...,...,...,...,...,...
618,580,"Jussila, Mr. Eiriik",male,32.0,0,0,A/5. 3336,3.0,16.100,,S
619,503,"O'Sullivan, Miss. Bridget Mary",female,,0,0,230136,2.0,39.000,F4,S
620,538,"LeRoy, Miss. Bertha",female,30.0,0,0,31028,2.0,10.500,,S
621,197,"Mernagh, Mr. Robert",male,,0,0,,,,,


# ==========================================

## * 깃허브에서 파일 바로 연동시키기 

In [52]:
data = pd.read_csv("https://raw.githubusercontent.com/haram4th/ablearn/main/Taitanic_train.csv")
data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## * 데이터프레임이 어떻게 생겼는지 조회하는 명령어

* 앞쪽 5개 행을 읽는 head()    head(행갯수)
* 뒤쪽 5개 행을 읽는 tail()    tail(행갯수)

* head는 60을 넘어가면 자동 축약 출력, 즉 60개까지만 출력됨

In [57]:
data.head()


# .head() : 앞에서 5개만 출력함

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [58]:
data.tail()


# .tail() : 뒤에서 5개만 출력함

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [59]:
data.head(2)

# 앞에서 2개만 보기 

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [61]:
data.tail(7)

# 뒤에서 7개만 보기 

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
884,885,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.05,,S
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.125,,Q
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [64]:
# head는 60을 넘어가면 자동 축약 출력, 즉 60개까지만 전부 출력됨

data.head(60)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [67]:
# head는 60을 넘어가면 자동 축약 출력
# 0,1,2,3,4, ......... 60,61,62 등

data.head(65)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
60,61,0,3,"Sirayanian, Mr. Orsen",male,22.0,0,0,2669,7.2292,,C
61,62,1,1,"Icard, Miss. Amelie",female,38.0,0,0,113572,80.0000,B28,
62,63,0,1,"Harris, Mr. Henry Birkhardt",male,45.0,1,0,36973,83.4750,C83,S
63,64,0,3,"Skoog, Master. Harald",male,4.0,3,2,347088,27.9000,,S


## * 데이터프레임의 컬럼명, 결측치, 데이터타입을 같이 표시하는 명령어
* .info( )

In [69]:
data.info()

# 결측값, 칼럼, 데이터타입 등 다양한 정보를 알 수 있음 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


## * 데이터프레임에서 숫자 데이터의 통계를 보여주는 명령어

* .describe( ) 

In [70]:
data.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


## * pandas에서 일부 자료만 추출하기 

### 1) 데이터프레임에서 1개 컬럼만 가져오기 

In [71]:
data.head(1)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [73]:
data['Name']

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [74]:
display(data['Name'])
print(display(data['Name']))

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

None


### 2) 데이터프레임에서 2개 이상 컬럼 가져오기 

In [75]:
data[['Name', 'Sex']]

Unnamed: 0,Name,Sex
0,"Braund, Mr. Owen Harris",male
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female
2,"Heikkinen, Miss. Laina",female
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female
4,"Allen, Mr. William Henry",male
...,...,...
886,"Montvila, Rev. Juozas",male
887,"Graham, Miss. Margaret Edith",female
888,"Johnston, Miss. Catherine Helen ""Carrie""",female
889,"Behr, Mr. Karl Howell",male


## * 데이터프레임 컬럼 순서 바꾸기 

In [77]:
data = data[['PassengerId','Pclass', 'Survived', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']]
data

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,0,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,1,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,3,0,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,2,0,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,3,0,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## * 데이터프레임에서 일부 컬럼만 가져오기 2 

* loc : 인덱스 이름과 컬럼명으로 일부를 추출해서 가져오기 
* iloc : 슬라이싱을 이용해서 가져옴

In [85]:
data.loc[3:5,['Ticket', 'Name']]

# 앞쪽에는 인덱싱 번호, 뒷부분에는 출력할 컬럼만 

Unnamed: 0,Ticket,Name
3,113803,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,373450,"Allen, Mr. William Henry"
5,330877,"Moran, Mr. James"


In [83]:
data.loc[:,['Ticket', 'Name']]

# 전체를 불러오기 

Unnamed: 0,Ticket,Name
0,A/5 21171,"Braund, Mr. Owen Harris"
1,PC 17599,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,STON/O2. 3101282,"Heikkinen, Miss. Laina"
3,113803,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,373450,"Allen, Mr. William Henry"
...,...,...
886,211536,"Montvila, Rev. Juozas"
887,112053,"Graham, Miss. Margaret Edith"
888,W./C. 6607,"Johnston, Miss. Catherine Helen ""Carrie"""
889,111369,"Behr, Mr. Karl Howell"


In [84]:
data.loc[100:130,['Ticket', 'Name']]

Unnamed: 0,Ticket,Name
100,349245,"Petranec, Miss. Matilda"
101,349215,"Petroff, Mr. Pastcho (""Pentcho"")"
102,35281,"White, Mr. Richard Frasar"
103,7540,"Johansson, Mr. Gustaf Joel"
104,3101276,"Gustafsson, Mr. Anders Vilhelm"
105,349207,"Mionoff, Mr. Stoytcho"
106,343120,"Salkjelsvik, Miss. Anna Kristine"
107,312991,"Moss, Mr. Albert Johan"
108,349249,"Rekic, Mr. Tido"
109,371110,"Moran, Miss. Bertha"


In [87]:
data.iloc[0:10]

# 인덱스로만 가져오고 싶을 때는 iloc 를 사용하기 

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,0,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,1,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,3,0,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,3,0,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,1,0,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,3,0,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,3,1,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,2,1,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [88]:
data.loc[:130,['Ticket', 'Name']]

# 0 ~ 130 까지 

Unnamed: 0,Ticket,Name
0,A/5 21171,"Braund, Mr. Owen Harris"
1,PC 17599,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,STON/O2. 3101282,"Heikkinen, Miss. Laina"
3,113803,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,373450,"Allen, Mr. William Henry"
...,...,...
126,370372,"McMahon, Mr. Martin"
127,C 17369,"Madsen, Mr. Fridtjof Arne"
128,2668,"Peter, Miss. Anna"
129,347061,"Ekstrom, Mr. Johan"


In [90]:
data.loc[500:,['Ticket', 'Name']]

# 500 ~ 끝까지

Unnamed: 0,Ticket,Name
500,315086,"Calic, Mr. Petar"
501,364846,"Canavan, Miss. Mary"
502,330909,"O'Sullivan, Miss. Bridget Mary"
503,4135,"Laitinen, Miss. Kristina Sofia"
504,110152,"Maioni, Miss. Roberta"
...,...,...
886,211536,"Montvila, Rev. Juozas"
887,112053,"Graham, Miss. Margaret Edith"
888,W./C. 6607,"Johnston, Miss. Catherine Helen ""Carrie"""
889,111369,"Behr, Mr. Karl Howell"


In [91]:
data.iloc[:20, 4:]

Unnamed: 0,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,male,22.0,1,0,A/5 21171,7.25,,S
1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,female,35.0,1,0,113803,53.1,C123,S
4,male,35.0,0,0,373450,8.05,,S
5,male,,0,0,330877,8.4583,,Q
6,male,54.0,0,0,17463,51.8625,E46,S
7,male,2.0,3,1,349909,21.075,,S
8,female,27.0,0,2,347742,11.1333,,S
9,female,14.0,1,0,237736,30.0708,,C


In [93]:
data.iloc[:20:2, ::2]

# 2칸식 건너뛰기

Unnamed: 0,PassengerId,Survived,Sex,SibSp,Ticket,Cabin
0,1,0,male,1,A/5 21171,
2,3,1,female,0,STON/O2. 3101282,
4,5,0,male,0,373450,
6,7,0,male,0,17463,E46
8,9,1,female,0,347742,
10,11,1,female,1,PP 9549,G6
12,13,0,male,0,A/5. 2151,
14,15,0,female,0,350406,
16,17,0,male,4,382652,
18,19,0,female,1,345763,


In [94]:
data.iloc[:20:2, ::-1]

Unnamed: 0,Embarked,Cabin,Fare,Ticket,Parch,SibSp,Age,Sex,Name,Survived,Pclass,PassengerId
0,S,,7.25,A/5 21171,0,1,22.0,male,"Braund, Mr. Owen Harris",0,3,1
2,S,,7.925,STON/O2. 3101282,0,0,26.0,female,"Heikkinen, Miss. Laina",1,3,3
4,S,,8.05,373450,0,0,35.0,male,"Allen, Mr. William Henry",0,3,5
6,S,E46,51.8625,17463,0,0,54.0,male,"McCarthy, Mr. Timothy J",0,1,7
8,S,,11.1333,347742,2,0,27.0,female,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",1,3,9
10,S,G6,16.7,PP 9549,1,1,4.0,female,"Sandstrom, Miss. Marguerite Rut",1,3,11
12,S,,8.05,A/5. 2151,0,0,20.0,male,"Saundercock, Mr. William Henry",0,3,13
14,S,,7.8542,350406,0,0,14.0,female,"Vestrom, Miss. Hulda Amanda Adolfina",0,3,15
16,Q,,29.125,382652,1,4,2.0,male,"Rice, Master. Eugene",0,3,17
18,S,,18.0,345763,0,1,31.0,female,"Vander Planke, Mrs. Julius (Emelia Maria Vande...",0,3,19


## * 특정 컬럼에서 유일값 요소 출력하기 
* .unique( ) 

In [96]:
data['Embarked'].unique()

array(['S', 'C', 'Q', nan], dtype=object)

In [98]:
data['Pclass'].unique()

array([3, 1, 2])

## * 특정 칼럼에서 유일값 개수 출력
* .nunique( )

In [99]:
data['Name'].nunique()

891

In [100]:
data['Embarked'].nunique()

3

## * 카테고리 컬럼에서 유일값 별 개수를 세는 함수
* .value_counts( )

In [102]:
data['Pclass'].value_counts()

Pclass
3    491
1    216
2    184
Name: count, dtype: int64

In [103]:
data['Embarked'].value_counts()

Embarked
S    644
C    168
Q     77
Name: count, dtype: int64

## * 데이터를 정렬하는 함수 
* sort_values(by="컬럼명")

In [104]:
data.sort_values(by="Name")

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
845,846,3,0,"Abbing, Mr. Anthony",male,42.0,0,0,C.A. 5547,7.5500,,S
746,747,3,0,"Abbott, Mr. Rossmore Edward",male,16.0,1,1,C.A. 2673,20.2500,,S
279,280,3,1,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35.0,1,1,C.A. 2673,20.2500,,S
308,309,2,0,"Abelson, Mr. Samuel",male,30.0,1,0,P/PP 3381,24.0000,,C
874,875,2,1,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28.0,1,0,P/PP 3381,24.0000,,C
...,...,...,...,...,...,...,...,...,...,...,...,...
286,287,3,1,"de Mulder, Mr. Theodore",male,30.0,0,0,345774,9.5000,,S
282,283,3,0,"de Pelsmaeker, Mr. Alfons",male,16.0,0,0,345778,9.5000,,S
361,362,2,0,"del Carlo, Mr. Sebastiano",male,29.0,1,0,SC/PARIS 2167,27.7208,,C
153,154,3,0,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5000,,S


In [107]:
data.sort_values(by="Age")

# 나이가 어린 순으로 

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
803,804,3,1,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C
755,756,2,1,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5000,,S
644,645,3,1,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C
469,470,3,1,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C
78,79,2,1,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29.0000,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
859,860,3,0,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
863,864,3,0,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.5500,,S
868,869,3,0,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5000,,S
878,879,3,0,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S


In [108]:
data.sort_values(by="Age", ascending  = False)

# 나이가 많은 순으로 

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0000,A23,S
851,852,3,0,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
493,494,1,0,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
96,97,1,0,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
116,117,3,0,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.7500,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
859,860,3,0,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
863,864,3,0,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.5500,,S
868,869,3,0,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5000,,S
878,879,3,0,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S


In [111]:
data.sort_values(by=["Age","Name"], ascending  = False)

# age 랑 name 둘다 내림차순이다. 
# ascending = False : 내림차순

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0000,A23,S
851,852,3,0,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
96,97,1,0,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
493,494,1,0,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
116,117,3,0,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.7500,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
507,508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.5500,,S
593,594,3,0,"Bourke, Miss. Mary",female,,0,2,364848,7.7500,,Q
140,141,3,0,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C
598,599,3,0,"Boulos, Mr. Hanna",male,,0,0,2664,7.2250,,C


In [113]:
data.sort_values(by=["Age","Name"], ascending  = [False, True])

# age : 내림차순 , ascending  = False
# name : 오름차순 , ascending  = True

Unnamed: 0,PassengerId,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0000,A23,S
851,852,3,0,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
493,494,1,0,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
96,97,1,0,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
116,117,3,0,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.7500,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
55,56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5000,C52,S
354,355,3,0,"Yousif, Mr. Wazli",male,,0,0,2647,7.2250,,C
495,496,3,0,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C
240,241,3,0,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C


## *  컬럼을 인덱스로 지정하기
* set_index("컬럼명") 

In [118]:
data.set_index("PassengerId")

# PassengerId 를 인덱스로 지정하는 거 

Unnamed: 0_level_0,Pclass,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,3,0,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,3,1,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
5,3,0,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...
887,2,0,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
889,3,0,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [119]:
data = data.set_index(["PassengerId", "Name"])
data 

# PassengerId 랑 Name 을 멀티인덱스로 지정하는 거 

Unnamed: 0_level_0,Unnamed: 1_level_0,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
2,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...
887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


## * 인덱스를 숫자로 초기화 
* reset_index(drop = "원래 인덱스를 유지? 삭제) 

In [120]:
data

# 현재 여기서 PassengerId, Name 는 인덱스로 지정되어 있다.

Unnamed: 0_level_0,Unnamed: 1_level_0,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
2,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...
887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


In [122]:
data.reset_index(drop=True)

# 지정한 인덱스 삭제하기 

Unnamed: 0,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,1,1,female,35.0,1,0,113803,53.1000,C123,S
4,3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...
886,2,0,male,27.0,0,0,211536,13.0000,,S
887,1,1,female,19.0,0,0,112053,30.0000,B42,S
888,3,0,female,,1,2,W./C. 6607,23.4500,,S
889,1,1,male,26.0,0,0,111369,30.0000,C148,C


In [123]:
data = data.reset_index()
data

Unnamed: 0,PassengerId,Name,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


## * 컬럼 이름 출력 
* .columns 

In [125]:
data.columns[0:5]

Index(['PassengerId', 'Name', 'Pclass', 'Survived', 'Sex'], dtype='object')

In [126]:
col = data.columns
col

Index(['PassengerId', 'Name', 'Pclass', 'Survived', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

## * 컬럼 이름 변경하기 
* rename({이름1 : 변경할 이름})
* columns = [컬럼이름]   <br>
이렇게 덮어쓰면 이름이 변경됨

In [131]:
data = data.rename(columns = {"PassengerId" : "승객번호"})
data 

# 컬럼명을 "PassengerId" -->  "승객번호"  로 변경하기 

Unnamed: 0,승객번호,Name,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


In [130]:
data

Unnamed: 0,승객번호,Name,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


In [132]:
data = data.rename(columns = {"Name" : "승객이름"})
data 

# 컬럼명을 "Name" ---> "승객이름" 로 변경하기 

Unnamed: 0,승객번호,승객이름,Pclass,Survived,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


In [133]:
data.columns = ['승객번호', '이름', '선실등급', 'Survived', '성별', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']

In [134]:
data.columns

Index(['승객번호', '이름', '선실등급', 'Survived', '성별', 'Age', 'SibSp', 'Parch',
       'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [135]:
data

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


## * 컬럼 추가/삭제하기 

In [136]:
data['가족수'] = data['SibSp']

In [137]:
data

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,가족수
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S,1
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S,1
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S,0
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S,0
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S,1
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C,0


In [143]:
data['테스트'] = '테스트'
data

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,가족수,테스트
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S,1,테스트
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1,테스트
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0,테스트
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S,1,테스트
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S,0,테스트
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S,0,테스트
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S,0,테스트
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S,1,테스트
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C,0,테스트


## * 컬럼 삭제하기 
* del 변수명['컬럼명']
* 변수명.drop("컬럼명", axis = 1) <br>  변수명.drop(["컬럼명1", "컬럼명2"], axis = 1)

In [144]:
del data['테스트']
data

# '테스트' 칼럼 지우기 

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,가족수
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S,1
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S,1
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S,0
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S,0
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S,1
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C,0


In [146]:
data.drop('가족수', axis = 1)

# '가족수' 칼럼 삭제하기 

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C


## * 데이터프레임의 칼럼과 칼럼을 연산해서 계산 가능

In [147]:
data['가족수'] = data['SibSp'] + data['Parch'] 

In [148]:
data

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,가족수
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S,1
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S,1
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S,0
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S,0
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S,3
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C,0


In [151]:
data['승차권/좌석'] = data['Ticket'] + data['Cabin'] 

# 둘다 문자열이기 때문에 더해지는거 가능함 

In [152]:
data

Unnamed: 0,승객번호,이름,선실등급,Survived,성별,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,가족수,승차권/좌석
0,1,"Braund, Mr. Owen Harris",3,0,male,22.0,1,0,A/5 21171,7.2500,,S,1,
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,1,female,38.0,1,0,PC 17599,71.2833,C85,C,1,PC 17599C85
2,3,"Heikkinen, Miss. Laina",3,1,female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0,
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,1,female,35.0,1,0,113803,53.1000,C123,S,1,113803C123
4,5,"Allen, Mr. William Henry",3,0,male,35.0,0,0,373450,8.0500,,S,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,"Montvila, Rev. Juozas",2,0,male,27.0,0,0,211536,13.0000,,S,0,
887,888,"Graham, Miss. Margaret Edith",1,1,female,19.0,0,0,112053,30.0000,B42,S,0,112053B42
888,889,"Johnston, Miss. Catherine Helen ""Carrie""",3,0,female,,1,2,W./C. 6607,23.4500,,S,3,
889,890,"Behr, Mr. Karl Howell",1,1,male,26.0,0,0,111369,30.0000,C148,C,0,111369C148


## * 데이터타입 바꾸기 

In [153]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 891 entries, 0 to 890
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   승객번호      891 non-null    int64  
 1   이름        891 non-null    object 
 2   선실등급      891 non-null    int64  
 3   Survived  891 non-null    int64  
 4   성별        891 non-null    object 
 5   Age       714 non-null    float64
 6   SibSp     891 non-null    int64  
 7   Parch     891 non-null    int64  
 8   Ticket    891 non-null    object 
 9   Fare      891 non-null    float64
 10  Cabin     204 non-null    object 
 11  Embarked  889 non-null    object 
 12  가족수       891 non-null    int64  
 13  승차권/좌석    204 non-null    object 
dtypes: float64(2), int64(6), object(6)
memory usage: 104.4+ KB


In [158]:
data = data['승객번호'].astype('float32')

In [159]:
data.info()

<class 'pandas.core.series.Series'>
Index: 891 entries, 0 to 890
Series name: 승객번호
Non-Null Count  Dtype  
--------------  -----  
891 non-null    float32
dtypes: float32(1)
memory usage: 10.4 KB
