# DataFrame 객체

4.1 개요

In [220]:
# 시작하기
import pandas as pd
import numpy as np

In [221]:
city_data = {
    "City" : ['New York', 'Paris', 'Barcelona', 'Rome'],
    "Country" : ['United States', 'France', 'Spain', 'Italy'],
    "Population" : pd.Series([8600000, 2141000, 5515000, 2873000])
}

cities = pd.DataFrame(city_data)
cities

Unnamed: 0,City,Country,Population
0,New York,United States,8600000
1,Paris,France,2141000
2,Barcelona,Spain,5515000
3,Rome,Italy,2873000


In [222]:
# 열 <-> 행 인덱스(레이블) 교환하기!
cities.transpose()
cities.T  # same result

Unnamed: 0,0,1,2,3
City,New York,Paris,Barcelona,Rome
Country,United States,France,Spain,Italy
Population,8600000,2141000,5515000,2873000


In [223]:
random_data = np.random.randint(1,101,[3,5])   # 3행 5열의 1~101 무작위 정수 추출
random_data

array([[98, 56, 10, 15, 97],
       [13,  2, 88, 45, 37],
       [60, 11, 56, 42, 87]])

In [224]:
pd.DataFrame( data=random_data )   # 행열 데이터를 바로 DF 할 수 있음

Unnamed: 0,0,1,2,3,4
0,98,56,10,15,97
1,13,2,88,45,37
2,60,11,56,42,87


In [225]:
row_labels = ["Morning", 'Afternoon', 'Evening']
temperatures = pd.DataFrame(
    data = random_data, index = row_labels   # 인덱스를 부여하기
)
temperatures

Unnamed: 0,0,1,2,3,4
Morning,98,56,10,15,97
Afternoon,13,2,88,45,37
Evening,60,11,56,42,87


In [226]:
row_labels = ["Morning", 'Afternoon', 'Evening']
column_labels = (
    "Monday",
    "Tuesday",
    "Wednesday",
    "Thursday",
    "Friday"
)
pd.DataFrame(
    data = random_data, 
    index = row_labels,
    columns = column_labels   # column 라벨도 입력
)

Unnamed: 0,Monday,Tuesday,Wednesday,Thursday,Friday
Morning,98,56,10,15,97
Afternoon,13,2,88,45,37
Evening,60,11,56,42,87


In [227]:
# 중복된 값이 있을 때 : 그래도 잘 들어간다 (나중에 인식이 될란가)
row_labels = ["Morning", 'Afternoon', 'Morning']   #모닝 두 개
column_labels = (
    "Monday",
    "Tuesday",  # 화요일 두 개
    "Wednesday",
    "Tuesday",
    "Friday"
)
pd.DataFrame(
    data = random_data, 
    index = row_labels,
    columns = column_labels 
)

Unnamed: 0,Monday,Tuesday,Wednesday,Tuesday.1,Friday
Morning,98,56,10,15,97
Afternoon,13,2,88,45,37
Morning,60,11,56,42,87


### 4.2 Series와 DataFrame의 유사점

In [228]:
# .read_csv
pd.read_csv("nba.csv")

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,9/26/96,1445697
1,Christian Wood,Detroit Pistons,PF,9/27/95,1645357
2,PJ Washington,Charlotte Hornets,PF,8/23/98,3831840
3,Derrick Rose,Detroit Pistons,PG,10/4/88,7317074
4,Marial Shayok,Philadelphia 76ers,G,7/26/95,79568
...,...,...,...,...,...
445,Austin Rivers,Houston Rockets,PG,8/1/92,2174310
446,Harry Giles,Sacramento Kings,PF,4/22/98,2578800
447,Robin Lopez,Milwaukee Bucks,C,4/1/88,4767000
448,Collin Sexton,Cleveland Cavaliers,PG,1/4/99,4764960


In [229]:
# parse_dates : 값을 날짜/시간 유형으로 강제 변환
nba = pd.read_csv("nba.csv", parse_dates = ["Birthday"])
nba

  nba = pd.read_csv("nba.csv", parse_dates = ["Birthday"])


Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
...,...,...,...,...,...
445,Austin Rivers,Houston Rockets,PG,1992-08-01,2174310
446,Harry Giles,Sacramento Kings,PF,1998-04-22,2578800
447,Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
448,Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960


In [230]:
# 2. Series와 DataFrame 속성의 유사점과 차이점
print(pd.Series([1,2,3]).dtype,'\n')
print(nba.dtypes)   # Series 타입으로 반환, 열을 인덱스 라벨로 표현

int64 

Name                object
Team                object
Position            object
Birthday    datetime64[ns]
Salary               int64
dtype: object


In [231]:
# .value_counts() : 값의 개수 반환
nba.dtypes.value_counts()  # 타입의 종류가 각 몇 개인지

object            3
datetime64[ns]    1
int64             1
Name: count, dtype: int64

In [232]:
print(nba.columns,'\n')
print(nba.ndim,'\n')
print(nba.shape,'\n')
print(nba.size,'\n')     # 전체 값의 개수
print(nba.count(),'\n')  # 열별 유효한 값의 개수 반환

Index(['Name', 'Team', 'Position', 'Birthday', 'Salary'], dtype='object') 

2 

(450, 5) 

2250 

Name        450
Team        450
Position    450
Birthday    450
Salary      450
dtype: int64 



In [233]:
nba.count().sum()   # count()의 합계 = size() (결측값이 없기 때문)

2250

In [234]:
# 결측값이 있다면
data = {
    "A" : [1, np.nan],
    "B" : [2, 3]
}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B
0,1.0,2
1,,3


In [235]:
df.size, df.count()   # size는 결측값 포함, count는 결측값 제외!

(4,
 A    1
 B    2
 dtype: int64)

In [236]:
# 3. Series와 DataFrame의 공통 메서드
# head, tail, sample, nunique
print(nba.head(2))
print()
print(nba.tail(n=3))
print()
print(nba.tail())
print()
print(nba.sample(3))
print()
print(nba.nunique())
print()

             Name                Team Position   Birthday   Salary
0    Shake Milton  Philadelphia 76ers       SG 1996-09-26  1445697
1  Christian Wood     Detroit Pistons       PF 1995-09-27  1645357

              Name                 Team Position   Birthday    Salary
447    Robin Lopez      Milwaukee Bucks        C 1988-04-01   4767000
448  Collin Sexton  Cleveland Cavaliers       PG 1999-01-04   4764960
449    Ricky Rubio         Phoenix Suns       PG 1990-10-21  16200000

              Name                 Team Position   Birthday    Salary
445  Austin Rivers      Houston Rockets       PG 1992-08-01   2174310
446    Harry Giles     Sacramento Kings       PF 1998-04-22   2578800
447    Robin Lopez      Milwaukee Bucks        C 1988-04-01   4767000
448  Collin Sexton  Cleveland Cavaliers       PG 1999-01-04   4764960
449    Ricky Rubio         Phoenix Suns       PG 1990-10-21  16200000

                Name                Team Position   Birthday   Salary
244         Miye Oni      

In [237]:
print(nba.max())
print()
print(nba.min())

Name             Zylan Cheatham
Team         Washington Wizards
Position                     SG
Birthday    2000-12-23 00:00:00
Salary                 40231758
dtype: object

Name               Aaron Gordon
Team              Atlanta Hawks
Position                      C
Birthday    1977-01-26 00:00:00
Salary                    79568
dtype: object


In [238]:
# nlargest, .nsmallest : 해당 column 의 가장 큰/작은 원소를 n 개 출력 
nba.nlargest(n=4, columns="Salary")   # Salary 열에서 가장 큰 순서대로 출력

Unnamed: 0,Name,Team,Position,Birthday,Salary
205,Stephen Curry,Golden State Warriors,PG,1988-03-14,40231758
38,Chris Paul,Oklahoma City Thunder,PG,1985-05-06,38506482
219,Russell Westbrook,Houston Rockets,PG,1988-11-12,38506482
251,John Wall,Washington Wizards,PG,1990-09-06,38199000


In [239]:
nba.nsmallest(n=3, columns=['Birthday'])    # Birthday 열에서 작은 순서대로

Unnamed: 0,Name,Team,Position,Birthday,Salary
98,Vince Carter,Atlanta Hawks,PF,1977-01-26,2564753
196,Udonis Haslem,Miami Heat,C,1980-06-09,2564753
262,Kyle Korver,Milwaukee Bucks,PF,1981-03-17,6004753


In [240]:
nba.sum(numeric_only=True)  # 숫자 값만 합하기 / object는 문자열 합처럼 출력된다

Salary    3444112694
dtype: int64

In [241]:
print(nba.mean(numeric_only=True))
print()
print(nba.median(numeric_only=True))
print()
print(nba.mode(numeric_only=True))  # 최빈값
print()
print(nba.std(numeric_only=True))
print()

Salary    7.653584e+06
dtype: float64

Salary    3303074.5
dtype: float64

   Salary
0   79568

Salary    9.288810e+06
dtype: float64



### 4.3 DataFrame 정렬

In [242]:
# 알파벳 순 정렬
nba.sort_values("Name")
nba.sort_values(by="Name")

Unnamed: 0,Name,Team,Position,Birthday,Salary
52,Aaron Gordon,Orlando Magic,PF,1995-09-16,19863636
101,Aaron Holiday,Indiana Pacers,PG,1996-09-30,2239200
437,Abdel Nader,Oklahoma City Thunder,SF,1993-09-25,1618520
81,Adam Mokoka,Chicago Bulls,G,1998-07-18,79568
399,Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
...,...,...,...,...,...
159,Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000
302,Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
312,Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440


In [243]:
nba.sort_values("Name", ascending=False).head()   # ascending 변수 가능 / .head() 가능

Unnamed: 0,Name,Team,Position,Birthday,Salary
248,Zylan Cheatham,New Orleans Pelicans,SF,1995-11-17,79568
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
312,Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
302,Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
159,Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000


In [244]:
nba.sort_values("Birthday", ascending=False).head()  # Birthday 기준

Unnamed: 0,Name,Team,Position,Birthday,Salary
136,Sekou Doumbouya,Detroit Pistons,SF,2000-12-23,3285120
432,Talen Horton-Tucker,Los Angeles Lakers,GF,2000-11-25,898310
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
313,RJ Barrett,New York Knicks,SG,2000-06-14,7839960
392,Jalen Lecque,Phoenix Suns,G,2000-06-13,898310


In [245]:
nba.sort_values(by=["Team","Name"], ascending=False)  
# 다중 열 기준으로 정렬; Team 기준으로 정렬한 다음 Name 기준 정렬함; 내림차순 정렬 (ascending)

Unnamed: 0,Name,Team,Position,Birthday,Salary
36,Thomas Bryant,Washington Wizards,C,1997-07-31,8000000
21,Rui Hachimura,Washington Wizards,PF,1998-02-08,4469160
428,Moritz Wagner,Washington Wizards,C,1997-04-26,2063520
273,Justin Robinson,Washington Wizards,PG,1997-10-12,898310
418,Jordan McRae,Washington Wizards,PG,1991-03-28,1645357
...,...,...,...,...,...
194,Cam Reddish,Atlanta Hawks,SF,1999-09-01,4245720
438,Bruno Fernando,Atlanta Hawks,C,1998-08-15,1400000
276,Brandon Goodwin,Atlanta Hawks,PG,1995-10-02,79568
167,Allen Crabbe,Atlanta Hawks,SG,1992-04-09,18500000


In [246]:
nba = nba.sort_values(by=["Team","Salary"], ascending=[True, False])    # 이렇게 한 변수만 내림차순 정렬도 가능
nba
# 변수 할당

Unnamed: 0,Name,Team,Position,Birthday,Salary
111,Chandler Parsons,Atlanta Hawks,SF,1988-10-25,25102512
28,Evan Turner,Atlanta Hawks,PG,1988-10-27,18606556
167,Allen Crabbe,Atlanta Hawks,SG,1992-04-09,18500000
213,De'Andre Hunter,Atlanta Hawks,SF,1997-12-02,7068360
339,Jabari Parker,Atlanta Hawks,PF,1995-03-15,6500000
...,...,...,...,...,...
80,Isaac Bonga,Washington Wizards,PG,1999-11-08,1416852
399,Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
273,Justin Robinson,Washington Wizards,PG,1997-10-12,898310
283,Garrison Mathews,Washington Wizards,SG,1996-10-24,79568


### 4.4 인덱스별 정렬 .sort_index

In [247]:
nba.head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
111,Chandler Parsons,Atlanta Hawks,SF,1988-10-25,25102512
28,Evan Turner,Atlanta Hawks,PG,1988-10-27,18606556
167,Allen Crabbe,Atlanta Hawks,SG,1992-04-09,18500000
213,De'Andre Hunter,Atlanta Hawks,SF,1997-12-02,7068360
339,Jabari Parker,Atlanta Hawks,PF,1995-03-15,6500000


In [248]:
nba.sort_index().head() 
nba.sort_index(ascending=True).head()  # 두 값은 동일, 인덱스 기준 오름차순 정렬

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568


In [249]:
nba.sort_index(ascending=False).head()  # 내림차순 정렬

Unnamed: 0,Name,Team,Position,Birthday,Salary
449,Ricky Rubio,Phoenix Suns,PG,1990-10-21,16200000
448,Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960
447,Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
446,Harry Giles,Sacramento Kings,PF,1998-04-22,2578800
445,Austin Rivers,Houston Rockets,PG,1992-08-01,2174310


In [250]:
# 처음 상태로 되돌리기
nba = nba.sort_index()

In [251]:
# 열을 순서대로 정리하기; 똑같이 sort_index(), 대신 axis="columns" or =1
nba.sort_index(axis="columns").head()  # 교재가 이상한데...?

Unnamed: 0,Birthday,Name,Position,Salary,Team
0,1996-09-26,Shake Milton,SG,1445697,Philadelphia 76ers
1,1995-09-27,Christian Wood,PF,1645357,Detroit Pistons
2,1998-08-23,PJ Washington,PF,3831840,Charlotte Hornets
3,1988-10-04,Derrick Rose,PG,7317074,Detroit Pistons
4,1995-07-26,Marial Shayok,G,79568,Philadelphia 76ers


In [252]:
nba.sort_index(axis="columns", ascending=False).head()  # 내림차순도 가능

Unnamed: 0,Team,Salary,Position,Name,Birthday
0,Philadelphia 76ers,1445697,SG,Shake Milton,1996-09-26
1,Detroit Pistons,1645357,PF,Christian Wood,1995-09-27
2,Charlotte Hornets,3831840,PF,PJ Washington,1998-08-23
3,Detroit Pistons,7317074,PG,Derrick Rose,1988-10-04
4,Philadelphia 76ers,79568,G,Marial Shayok,1995-07-26


### 4.5 새 인덱스 설정 .set_index()

In [253]:
nba.set_index(keys = "Name")
nba = nba.set_index("Name") # 변수 설정
nba

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
...,...,...,...,...
Austin Rivers,Houston Rockets,PG,1992-08-01,2174310
Harry Giles,Sacramento Kings,PF,1998-04-22,2578800
Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960


In [254]:
# 다시 이전 데이터로 복원
nba = pd.read_csv("nba.csv", parse_dates=["Birthday"], index_col="Name")

  nba = pd.read_csv("nba.csv", parse_dates=["Birthday"], index_col="Name")


### 4.6 DataFrame에서 열과 행 선택

In [255]:
# 단일 열 선택
nba.Salary   # .col 로 열 선택 가능

Name
Shake Milton       1445697
Christian Wood     1645357
PJ Washington      3831840
Derrick Rose       7317074
Marial Shayok        79568
                    ...   
Austin Rivers      2174310
Harry Giles        2578800
Robin Lopez        4767000
Collin Sexton      4764960
Ricky Rubio       16200000
Name: Salary, Length: 450, dtype: int64

In [256]:
nba["Position"]  # DF[ ] 통해서도 가능

Name
Shake Milton      SG
Christian Wood    PF
PJ Washington     PF
Derrick Rose      PG
Marial Shayok      G
                  ..
Austin Rivers     PG
Harry Giles       PF
Robin Lopez        C
Collin Sexton     PG
Ricky Rubio       PG
Name: Position, Length: 450, dtype: object

In [257]:
nba["Player Position"]   # 이렇게 공백이 있을 때 [ ] 사용하면 됨

KeyError: 'Player Position'

In [None]:
# 다중 열 선택
nba[["Salary", "Birthday"]].head()  # 여러 열을 한 번에 추출

Unnamed: 0_level_0,Salary,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Shake Milton,1445697,1996-09-26
Christian Wood,1645357,1995-09-27
PJ Washington,3831840,1998-08-23
Derrick Rose,7317074,1988-10-04
Marial Shayok,79568,1995-07-26


In [None]:
nba[[ "Birthday", "Salary"]].head()  # 리스트 순서에 따라 추출

Unnamed: 0_level_0,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Shake Milton,1996-09-26,1445697
Christian Wood,1995-09-27,1645357
PJ Washington,1998-08-23,3831840
Derrick Rose,1988-10-04,7317074
Marial Shayok,1995-07-26,79568


In [None]:
nba.select_dtypes(include="object")  # 데이터 타입에 따라 선택한다: object 포함

Unnamed: 0_level_0,Team,Position
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Shake Milton,Philadelphia 76ers,SG
Christian Wood,Detroit Pistons,PF
PJ Washington,Charlotte Hornets,PF
Derrick Rose,Detroit Pistons,PG
Marial Shayok,Philadelphia 76ers,G
...,...,...
Austin Rivers,Houston Rockets,PG
Harry Giles,Sacramento Kings,PF
Robin Lopez,Milwaukee Bucks,C
Collin Sexton,Cleveland Cavaliers,PG


In [None]:
nba.select_dtypes(exclude=["object", "int"])  # 제외하기: exclude=

Unnamed: 0_level_0,Birthday
Name,Unnamed: 1_level_1
Shake Milton,1996-09-26
Christian Wood,1995-09-27
PJ Washington,1998-08-23
Derrick Rose,1988-10-04
Marial Shayok,1995-07-26
...,...
Austin Rivers,1992-08-01
Harry Giles,1998-04-22
Robin Lopez,1988-04-01
Collin Sexton,1999-01-04


### 4.7 DataFrame 에서 행 선택 .loc .iloc

In [None]:
# 인덱스 레이블로 행 추출
nba.loc["LeBron James"]   # .loc[index label]

Team         Los Angeles Lakers
Position                     PF
Birthday    1984-12-30 00:00:00
Salary                 37436858
Name: LeBron James, dtype: object

In [None]:
nba.loc[["Kawhi Leonard", "Paul George"]]  # 여러 레코드를 DF로 추출

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Kawhi Leonard,Los Angeles Clippers,SF,1991-06-29,32742000
Paul George,Los Angeles Clippers,SF,1990-05-02,33005556


In [None]:
nba.loc[["Paul George", "Kawhi Leonard"]]  # 순서대로 추출된다

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Paul George,Los Angeles Clippers,SF,1990-05-02,33005556
Kawhi Leonard,Los Angeles Clippers,SF,1991-06-29,32742000


In [None]:
# 슬라이싱
nba.sort_index().loc["Otto Porter":"Patrick Beverley"]  # 정렬 후 슬라이싱; 끝값 포함

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Otto Porter,Chicago Bulls,SF,1993-06-03,27250576
PJ Dozier,Denver Nuggets,PG,1996-10-25,79568
PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
Pascal Siakam,Toronto Raptors,PF,1994-04-02,2351838
Pat Connaughton,Milwaukee Bucks,SG,1993-01-06,1723050
Patrick Beverley,Los Angeles Clippers,PG,1988-07-12,12345680


In [None]:
player = ["AAA", "BBB", "CCC"]
player[0:2]   # 리스트 슬라이싱엔 끝값을 포함하지 않음

['AAA', 'BBB']

In [None]:
nba.sort_index().loc["Zach Collins":]  # 끝까지 추출

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Zach Collins,Portland Trail Blazers,C,1997-11-19,4240200
Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000
Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
Zylan Cheatham,New Orleans Pelicans,SF,1995-11-17,79568


In [None]:
nba.sort_index().loc[:"Al Horford"]  # 처음부터 추출

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aaron Gordon,Orlando Magic,PF,1995-09-16,19863636
Aaron Holiday,Indiana Pacers,PG,1996-09-30,2239200
Abdel Nader,Oklahoma City Thunder,SF,1993-09-25,1618520
Adam Mokoka,Chicago Bulls,G,1998-07-18,79568
Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
Al Horford,Philadelphia 76ers,C,1986-06-03,28000000


In [None]:
# nba.loc["Bugs Bunny"]   # 없으면 오류

In [None]:
# 인덱스 위치로 행 추출 : .iloc
nba.iloc[300]

Team             Denver Nuggets
Position                     PF
Birthday    1999-04-03 00:00:00
Salary                  1416852
Name: Jarred Vanderbilt, dtype: object

In [None]:
nba.iloc[[100, 200, 300, 400]]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brian Bowen,Indiana Pacers,SG,1998-10-02,79568
Marco Belinelli,San Antonio Spurs,SF,1986-03-25,5846154
Jarred Vanderbilt,Denver Nuggets,PF,1999-04-03,1416852
Louis King,Detroit Pistons,F,1999-04-06,79568


In [None]:
nba.iloc[400:404]  # 리스트 슬라이싱과 같다: 끝 값 포함 안함

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Louis King,Detroit Pistons,F,1999-04-06,79568
Kostas Antetokounmpo,Los Angeles Lakers,PF,1997-11-20,79568
Rodions Kurucs,Brooklyn Nets,PF,1998-02-05,1699236
Spencer Dinwiddie,Brooklyn Nets,PG,1993-04-06,10605600


In [None]:
nba.iloc[:2]  # 0 1 인덱스만 추출

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
Christian Wood,Detroit Pistons,PF,1995-09-27,1645357


In [None]:
nba.iloc[447:]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960
Ricky Rubio,Phoenix Suns,PG,1990-10-21,16200000


In [None]:
nba.iloc[-10:-6]  # 음수 인덱스도 가능

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jared Dudley,Los Angeles Lakers,PF,1985-07-10,2564753
Max Strus,Chicago Bulls,SG,1996-03-28,79568
Kevon Looney,Golden State Warriors,C,1996-02-06,4464286
Willy Hernangomez,Charlotte Hornets,C,1994-05-27,1557250


In [None]:
nba.iloc[0:10:2]  # step 도 정할 수 있음 ( 0 2 4 6 8 )

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
Kendrick Nunn,Miami Heat,SG,1995-08-03,1416852
Brook Lopez,Milwaukee Bucks,C,1988-04-01,12093024


In [None]:
# .loc , .iloc [ row : col ]
nba.loc["Giannis Antetokounmpo", "Team"]   # .loc[행 (이름) , 열 (팀) ]

'Milwaukee Bucks'

In [None]:
nba.loc["James Harden", ["Position", "Birthday"]]

Position                     PG
Birthday    1989-08-26 00:00:00
Name: James Harden, dtype: object

In [None]:
nba.loc[ 
    ["Russell Westbrook", "Anthony Davis"],   # row
    ["Team", "Salary"]                        # col
]

Unnamed: 0_level_0,Team,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Russell Westbrook,Houston Rockets,38506482
Anthony Davis,Los Angeles Lakers,27093019


In [None]:
nba.loc["James Harden", "Position":"Salary"]   # 슬라이싱 가능: [ ] 는 필요 없다

Position                     PG
Birthday    1989-08-26 00:00:00
Salary                 38199000
Name: James Harden, dtype: object

In [None]:
# iloc 로 행과 열 추출
nba.iloc[57, 3]

796806

In [None]:
nba.iloc[100:104, :3]   # 행열 슬라이싱; 열 인덱스=(0 1 2)

Unnamed: 0_level_0,Team,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brian Bowen,Indiana Pacers,SG,1998-10-02
Aaron Holiday,Indiana Pacers,PG,1996-09-30
Troy Daniels,Los Angeles Lakers,SG,1991-07-15
Buddy Hield,Sacramento Kings,SG,1992-12-17


In [None]:
# 단일 값을 찾는 메서드가 따로 있다: 더 빠름
nba.at["Austin Rivers", "Birthday"]  # .at

Timestamp('1992-08-01 00:00:00')

In [None]:
nba.iat[263, 1]  # .iat : index로 찾기

'PF'

In [None]:
%%timeit             # 코드 계산하는 시간을 계산
nba.at["Austin Rivers", "Birthday"]

4.62 µs ± 58.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [None]:
%%timeit             
nba.loc["Austin Rivers", "Birthday"]  # 시간이 약 1.6배 느림

7.63 µs ± 183 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [None]:
%%timeit    
nba.iat[263, 1]

8.85 µs ± 239 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [None]:
%%timeit    
nba.iloc[263, 1]  # 암튼 더 느리다

11.8 µs ± 127 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


### 4.8 Series 값 추출

In [None]:
# .loc, .iloc, .at, .iat 는 Series에서도 가능
print(nba["Salary"].loc["Damian Lillard"])
print()
print(nba["Salary"].at["Damian Lillard"])
print()
print(nba["Salary"].iloc[234])
print()
print(nba["Salary"].iat[234])
print()

29802321

29802321

2033160

2033160



### 4.9 열 또는 행 이름 바꾸기

In [None]:
nba.columns

Index(['Team', 'Position', 'Birthday', 'Salary'], dtype='object')

In [None]:
# 바로 대입하여 열 이름 바꾸기
nba.columns = ['Team', 'Position', 'Date of Birthday', 'Pay']
nba.head(1)

Unnamed: 0_level_0,Team,Position,Date of Birthday,Pay
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697


In [None]:
# .rename
nba = nba.rename( columns= { "Date of Birth" : "Birthday"})   # .rename( columns={old:new})
nba

Unnamed: 0_level_0,Team,Position,Date of Birthday,Pay
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
...,...,...,...,...
Austin Rivers,Houston Rockets,PG,1992-08-01,2174310
Harry Giles,Sacramento Kings,PF,1998-04-22,2578800
Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960


In [None]:
nba.loc["Giannis Antetokounmpo"]

Team                    Milwaukee Bucks
Position                             PF
Date of Birthday    1994-12-06 00:00:00
Pay                            25842697
Name: Giannis Antetokounmpo, dtype: object

In [None]:
nba = nba.rename( index= { "Giannis Antetokounmpo" : "Greek Freak"})  # index를 변경할 수도 있다!
nba.loc["Greek Freak"]

Team                    Milwaukee Bucks
Position                             PF
Date of Birthday    1994-12-06 00:00:00
Pay                            25842697
Name: Greek Freak, dtype: object

### 4.10 인덱스 재설정

In [None]:
nba.set_index("Team").head()

Unnamed: 0_level_0,Position,Date of Birthday,Pay
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Philadelphia 76ers,SG,1996-09-26,1445697
Detroit Pistons,PF,1995-09-27,1645357
Charlotte Hornets,PF,1998-08-23,3831840
Detroit Pistons,PG,1988-10-04,7317074
Philadelphia 76ers,G,1995-07-26,79568


In [None]:
# .reset_index()
nba.reset_index().head()  # Name 인덱스가 다시 열로 돌아옴

Unnamed: 0,Name,Team,Position,Date of Birthday,Pay
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568


In [None]:
nba.reset_index().set_index("Team").head()  # Team 을 인덱스로 넣기

Unnamed: 0_level_0,Name,Position,Date of Birthday,Pay
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Philadelphia 76ers,Shake Milton,SG,1996-09-26,1445697
Detroit Pistons,Christian Wood,PF,1995-09-27,1645357
Charlotte Hornets,PJ Washington,PF,1998-08-23,3831840
Detroit Pistons,Derrick Rose,PG,1988-10-04,7317074
Philadelphia 76ers,Marial Shayok,G,1995-07-26,79568


In [None]:
nba = nba.reset_index().set_index("Team")

# 4.11 코딩 첼린지

In [None]:
# 1. nfl.csv 파일 가져오기, Birthday열의 값을 날짜, 시간으로 변환하는 방법
data = pd.read_csv("nfl.csv", parse_dates=["Birthday"])   # data에 불러오기; parse_dates는 여기다 저장
nfl = pd.DataFrame(data)
nfl

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Tremon Smith,Philadelphia Eagles,RB,1996-07-20,570000
1,Shawn Williams,Cincinnati Bengals,SS,1991-05-13,3500000
2,Adam Butler,New England Patriots,DT,1994-04-12,645000
3,Derek Wolfe,Denver Broncos,DE,1990-02-24,8000000
4,Jake Ryan,Jacksonville Jaguars,OLB,1992-02-27,1000000
...,...,...,...,...,...
1650,Bashaud Breeland,Kansas City Chiefs,CB,1992-01-30,805000
1651,Craig James,Philadelphia Eagles,CB,1996-04-29,570000
1652,Jonotthan Harrison,New York Jets,C,1991-08-25,1500000
1653,Chuma Edoga,New York Jets,OT,1997-05-25,495000


In [None]:
# 2. 선수 이름을 DataFrame 인덱스로 지정하는 2가지 방법

# 2.1 .set_index
nfl.set_index("Name")

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tremon Smith,Philadelphia Eagles,RB,1996-07-20,570000
Shawn Williams,Cincinnati Bengals,SS,1991-05-13,3500000
Adam Butler,New England Patriots,DT,1994-04-12,645000
Derek Wolfe,Denver Broncos,DE,1990-02-24,8000000
Jake Ryan,Jacksonville Jaguars,OLB,1992-02-27,1000000
...,...,...,...,...
Bashaud Breeland,Kansas City Chiefs,CB,1992-01-30,805000
Craig James,Philadelphia Eagles,CB,1996-04-29,570000
Jonotthan Harrison,New York Jets,C,1991-08-25,1500000
Chuma Edoga,New York Jets,OT,1997-05-25,495000


In [None]:
# 2.2 직접 입력
nfl.index = ["Tremon Smith":"Tajae Sharpe"] # 안됨

SyntaxError: invalid syntax (1156169552.py, line 2)

In [None]:
# 정답 : 데이터 호출에서 인덱스 열을 지정하기
nfl = pd.read_csv("nfl.csv", index_col = "Name", parse_dates=["Birthday"])
nfl

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tremon Smith,Philadelphia Eagles,RB,1996-07-20,570000
Shawn Williams,Cincinnati Bengals,SS,1991-05-13,3500000
Adam Butler,New England Patriots,DT,1994-04-12,645000
Derek Wolfe,Denver Broncos,DE,1990-02-24,8000000
Jake Ryan,Jacksonville Jaguars,OLB,1992-02-27,1000000
...,...,...,...,...
Bashaud Breeland,Kansas City Chiefs,CB,1992-01-30,805000
Craig James,Philadelphia Eagles,CB,1996-04-29,570000
Jonotthan Harrison,New York Jets,C,1991-08-25,1500000
Chuma Edoga,New York Jets,OT,1997-05-25,495000


In [None]:
# 3. 데이터셋에서 팀당 선수가 몇 명인지 계산하기
nfl["Team"].value_counts()
# nfl.Team. 도 가능!

Team
New York Jets           58
Kansas City Chiefs      56
Washington Redskins     56
New Orleans Saints      55
San Francisco 49Ers     55
Denver Broncos          54
Minnesota Vikings       54
Los Angeles Chargers    54
Seattle Seahawks        53
Dallas Cowboys          53
Buffalo Bills           53
Atlanta Falcons         53
Detroit Lions           53
Chicago Bears           53
Los Angeles Rams        52
New York Giants         52
Philadelphia Eagles     52
Houston Texans          52
Arizona Cardinals       51
Cincinnati Bengals      51
Green Bay Packers       51
Oakland Raiders         51
Jacksonville Jaguars    50
Cleveland Browns        49
Miami Dolphins          49
Indianapolis Colts      49
Carolina Panthers       49
New England Patriots    49
Baltimore Ravens        48
Pittsburgh Steelers     47
Tampa Bay Buccaneers    47
Tennessee Titans        46
Name: count, dtype: int64

In [None]:
# 4. 가장 높은 연봉을 받는 5명의 선수
nfl.sort_values("Salary", ascending=False).head()

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Kirk Cousins,Minnesota Vikings,QB,1988-08-19,27500000
Jameis Winston,Tampa Bay Buccaneers,QB,1994-01-06,20922000
Marcus Mariota,Tennessee Titans,QB,1993-10-30,20922000
Derek Carr,Oakland Raiders,QB,1991-03-28,19900000
Jimmy Garoppolo,San Francisco 49Ers,QB,1991-11-02,17200000


In [None]:
# 5. 팀을 알파벳 순서로 정렬한 다음 연봉을 내림차순으로 정렬하기
nfl.sort_values(["Team", "Salary"], ascending=[True, False])
# by=

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chandler Jones,Arizona Cardinals,OLB,1990-02-27,16500000
Patrick Peterson,Arizona Cardinals,CB,1990-07-11,11000000
Larry Fitzgerald,Arizona Cardinals,WR,1983-08-31,11000000
David Johnson,Arizona Cardinals,RB,1991-12-16,5700000
Justin Pugh,Arizona Cardinals,G,1990-08-15,5000000
...,...,...,...,...
Ross Pierschbacher,Washington Redskins,C,1995-05-05,495000
Kelvin Harmon,Washington Redskins,WR,1996-12-15,495000
Wes Martin,Washington Redskins,G,1996-05-09,495000
Jimmy Moreland,Washington Redskins,CB,1995-08-26,495000


In [261]:
# 6. New York Jets 에서 가장 나이가 많은 선수는 누구이며 그의 생일은 언제인가요?
nflNYJ = nfl.reset_index().set_index(keys = "Team").loc["New York Jets"]
# 인덱스 리셋 후 Team을 인덱스로 지정, loc로 NYJ만 검색
nflNYJ
nflNYJ.sort_values("Birthday").head(1)
# Birthday 정렬 기준 맨 위의 값 출력

Unnamed: 0_level_0,Name,Position,Birthday,Salary
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
New York Jets,Ryan Kalil,C,1985-03-29,2400000
