In [1]:
import pandas as pd

In [2]:
cities = {"name": ["London", "Berlin", "Madrid", "Rome", 
                   "Paris", "Vienna", "Bucharest", "Hamburg", 
                   "Budapest", "Warsaw", "Barcelona", 
                   "Munich", "Milan"],
          "population": [8615246, 3562166, 3165235, 2874038,
                         2273305, 1805681, 1803425, 1760433,
                         1754000, 1740119, 1602386, 1493900,
                         1350680],
          "country": ["England", "Germany", "Spain", "Italy",
                      "France", "Austria", "Romania", 
                      "Germany", "Hungary", "Poland", "Spain",
                      "Germany", "Italy"]}


In [3]:
city_frame = pd.DataFrame(cities)

In [4]:
city_frame

Unnamed: 0,country,name,population
0,England,London,8615246
1,Germany,Berlin,3562166
2,Spain,Madrid,3165235
3,Italy,Rome,2874038
4,France,Paris,2273305
5,Austria,Vienna,1805681
6,Romania,Bucharest,1803425
7,Germany,Hamburg,1760433
8,Hungary,Budapest,1754000
9,Poland,Warsaw,1740119


## 인덱스를 추가해서 생성자에 할당하기

In [5]:
ordinals = ["first", "second", "third", "fourth",
            "fifth", "sixth", "seventh", "eigth",
            "ninth", "tenth", "eleventh", "twelvth",
            "thirteenth"]
city_frame1 = pd.DataFrame(cities, index=ordinals)

In [6]:
city_frame1

Unnamed: 0,country,name,population
first,England,London,8615246
second,Germany,Berlin,3562166
third,Spain,Madrid,3165235
fourth,Italy,Rome,2874038
fifth,France,Paris,2273305
sixth,Austria,Vienna,1805681
seventh,Romania,Bucharest,1803425
eigth,Germany,Hamburg,1760433
ninth,Hungary,Budapest,1754000
tenth,Poland,Warsaw,1740119


## 칼럼의 위치를 조정하기

In [7]:
city_frame2 = pd.DataFrame(cities,
                          columns=["name", 
                                   "country", 
                                   "population"],
                          index=ordinals)

In [8]:
city_frame2

Unnamed: 0,name,country,population
first,London,England,8615246
second,Berlin,Germany,3562166
third,Madrid,Spain,3165235
fourth,Rome,Italy,2874038
fifth,Paris,France,2273305
sixth,Vienna,Austria,1805681
seventh,Bucharest,Romania,1803425
eigth,Hamburg,Germany,1760433
ninth,Budapest,Hungary,1754000
tenth,Warsaw,Poland,1740119


## 특정 칼럼을 인덱스로 지정하고 나머지는 칼럼으로 정의하기

In [9]:
city_frame3 = pd.DataFrame(cities,
                          columns=["name", "population"],
                          index=cities["country"])

In [10]:
city_frame3

Unnamed: 0,name,population
England,London,8615246
Germany,Berlin,3562166
Spain,Madrid,3165235
Italy,Rome,2874038
France,Paris,2273305
Austria,Vienna,1805681
Romania,Bucharest,1803425
Germany,Hamburg,1760433
Hungary,Budapest,1754000
Poland,Warsaw,1740119


## 기존 만들어진 데이터 프레임에 특정 칼럼을 인덱스로 지정하기

### 특정 인덱스가 존재할 경우 

In [13]:
city_frame1

Unnamed: 0,country,name,population
first,England,London,8615246
second,Germany,Berlin,3562166
third,Spain,Madrid,3165235
fourth,Italy,Rome,2874038
fifth,France,Paris,2273305
sixth,Austria,Vienna,1805681
seventh,Romania,Bucharest,1803425
eigth,Germany,Hamburg,1760433
ninth,Hungary,Budapest,1754000
tenth,Poland,Warsaw,1740119


### 특정 칼럼을 새로운 인덱스로 지정하면 기존 인덱스는 사라진다

In [11]:
city_frame4 = city_frame1.set_index("country")

In [12]:
city_frame4

Unnamed: 0_level_0,name,population
country,Unnamed: 1_level_1,Unnamed: 2_level_1
England,London,8615246
Germany,Berlin,3562166
Spain,Madrid,3165235
Italy,Rome,2874038
France,Paris,2273305
Austria,Vienna,1805681
Romania,Bucharest,1803425
Germany,Hamburg,1760433
Hungary,Budapest,1754000
Poland,Warsaw,1740119


### 인덱스 지정을 해제해도 원래의 인덱스로는 돌아가지 않는다. 

In [14]:
city_frame4.reset_index()

Unnamed: 0,country,name,population
0,England,London,8615246
1,Germany,Berlin,3562166
2,Spain,Madrid,3165235
3,Italy,Rome,2874038
4,France,Paris,2273305
5,Austria,Vienna,1805681
6,Romania,Bucharest,1803425
7,Germany,Hamburg,1760433
8,Hungary,Budapest,1754000
9,Poland,Warsaw,1740119


In [18]:
city_frame5  = city_frame4.reset_index()

In [19]:
city_frame5

Unnamed: 0,country,name,population
0,England,London,8615246
1,Germany,Berlin,3562166
2,Spain,Madrid,3165235
3,Italy,Rome,2874038
4,France,Paris,2273305
5,Austria,Vienna,1805681
6,Romania,Bucharest,1803425
7,Germany,Hamburg,1760433
8,Hungary,Budapest,1754000
9,Poland,Warsaw,1740119


### 기존 인덱스의 값을 인덱스에 배정하기

In [20]:
city_frame5.index = ordinals

In [21]:
city_frame5

Unnamed: 0,country,name,population
first,England,London,8615246
second,Germany,Berlin,3562166
third,Spain,Madrid,3165235
fourth,Italy,Rome,2874038
fifth,France,Paris,2273305
sixth,Austria,Vienna,1805681
seventh,Romania,Bucharest,1803425
eigth,Germany,Hamburg,1760433
ninth,Hungary,Budapest,1754000
tenth,Poland,Warsaw,1740119


## 데이터 프레임 내부의 값을 변경하기 

###  메소드 내부에 inplace=True로 지정하면 새로운 데이터 프레임이 만들어지지 않고 기존 것을 갱신한다

In [22]:
city_frame5.set_index("country", inplace=True)

In [23]:
city_frame5

Unnamed: 0_level_0,name,population
country,Unnamed: 1_level_1,Unnamed: 2_level_1
England,London,8615246
Germany,Berlin,3562166
Spain,Madrid,3165235
Italy,Rome,2874038
France,Paris,2273305
Austria,Vienna,1805681
Romania,Bucharest,1803425
Germany,Hamburg,1760433
Hungary,Budapest,1754000
Poland,Warsaw,1740119


## 데이터 프레임 내의 칼람을 추가하기

### 특정 칼럼명을 인덱싱하고 내부의 값을 누적해서 할당하면 칼럼이 추가되는 것을 볼 수 있다.

In [24]:
city_frame5["cum_population"] = city_frame5["population"].cumsum()


In [25]:
city_frame5

Unnamed: 0_level_0,name,population,cum_population
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
England,London,8615246,8615246
Germany,Berlin,3562166,12177412
Spain,Madrid,3165235,15342647
Italy,Rome,2874038,18216685
France,Paris,2273305,20489990
Austria,Vienna,1805681,22295671
Romania,Bucharest,1803425,24099096
Germany,Hamburg,1760433,25859529
Hungary,Budapest,1754000,27613529
Poland,Warsaw,1740119,29353648


### 처음 생성할 때부터 columns에 새로운 칼럼명을 넣을 경우

    매칭되는 칼럼이 없으므로 값을 전부 nan으로 처리하는 것을 확인할 수 있다.

In [26]:
city_frame6 = pd.DataFrame(cities,
                          columns=["country", 
                                   "area",
                                   "population"],
                          index=cities["name"])

In [27]:
city_frame6

Unnamed: 0,country,area,population
London,England,,8615246
Berlin,Germany,,3562166
Madrid,Spain,,3165235
Rome,Italy,,2874038
Paris,France,,2273305
Vienna,Austria,,1805681
Bucharest,Romania,,1803425
Hamburg,Germany,,1760433
Budapest,Hungary,,1754000
Warsaw,Poland,,1740119


### 인덱싱을 통해 칼럼에 새로운 값을 할당하면 전체 칼럼값을 동일하게 처리

In [29]:
city_frame6["area"] = 1572

In [30]:
city_frame6

Unnamed: 0,country,area,population
London,England,1572,8615246
Berlin,Germany,1572,3562166
Madrid,Spain,1572,3165235
Rome,Italy,1572,2874038
Paris,France,1572,2273305
Vienna,Austria,1572,1805681
Bucharest,Romania,1572,1803425
Hamburg,Germany,1572,1760433
Budapest,Hungary,1572,1754000
Warsaw,Poland,1572,1740119


### 리스트 배열로 각 값을 넣고 넣을 경우 개별적인 값이 갱신되는 것을 볼 수 있다.

In [31]:
area = [1572, 891.85, 605.77, 1285, 
        105.4, 414.6, 228, 755, 
        525.2, 517, 101.9, 310.4, 
        181.8]
city_frame6["area"] = area

In [32]:
city_frame6

Unnamed: 0,country,area,population
London,England,1572.0,8615246
Berlin,Germany,891.85,3562166
Madrid,Spain,605.77,3165235
Rome,Italy,1285.0,2874038
Paris,France,105.4,2273305
Vienna,Austria,414.6,1805681
Bucharest,Romania,228.0,1803425
Hamburg,Germany,755.0,1760433
Budapest,Hungary,525.2,1754000
Warsaw,Poland,517.0,1740119


## 파일에 쓰고 그 파일을 읽어오기

In [33]:
import numpy as np
names = ['Frank', 'Eve', 'Stella', 'Guido', 'Lara']
index = ["January", "February", "March",
         "April", "May", "June",
         "July", "August", "September",
         "October", "November", "December"]
df11 = pd.DataFrame(np.random.randn(12, 5)*1000,
                columns=names,
                index=index)

In [34]:
df11.to_csv('data/counties.csv')

### 파일을 그대로 읽어오면 실제 인덱스가 숫자로 생긴다

In [39]:
df12 = pd.read_csv('data/counties.csv')

In [40]:
df12

Unnamed: 0.1,Unnamed: 0,Frank,Eve,Stella,Guido,Lara
0,January,-537.48393,-401.722082,-1290.901285,104.978371,-476.301459
1,February,-407.370217,639.670123,193.557624,-1329.294593,1428.893616
2,March,2014.667248,78.456725,-851.510533,-1091.416342,-1372.914278
3,April,460.311002,599.461422,676.919149,-204.573884,1374.374723
4,May,1205.642429,-1335.645827,1389.212124,-884.296915,1568.831914
5,June,-483.780589,-343.399247,642.027952,344.461119,666.57461
6,July,592.899853,-176.629759,-42.629643,-209.828634,1320.378019
7,August,335.865803,477.15496,1193.752539,-1088.623754,496.17917
8,September,675.255044,161.435864,-1548.024484,-664.526571,1121.209035
9,October,246.898238,-1377.341852,372.164561,2071.037293,-103.449627


In [43]:
df12.index

RangeIndex(start=0, stop=12, step=1)

In [44]:
df12.columns

Index(['Unnamed: 0', 'Frank', 'Eve', 'Stella', 'Guido', 'Lara'], dtype='object')

### 특정 칼럼을 인덱스로 처리하기

In [41]:
df13 = pd.read_csv('data/counties.csv',index_col=0)

In [42]:
df13

Unnamed: 0,Frank,Eve,Stella,Guido,Lara
January,-537.48393,-401.722082,-1290.901285,104.978371,-476.301459
February,-407.370217,639.670123,193.557624,-1329.294593,1428.893616
March,2014.667248,78.456725,-851.510533,-1091.416342,-1372.914278
April,460.311002,599.461422,676.919149,-204.573884,1374.374723
May,1205.642429,-1335.645827,1389.212124,-884.296915,1568.831914
June,-483.780589,-343.399247,642.027952,344.461119,666.57461
July,592.899853,-176.629759,-42.629643,-209.828634,1320.378019
August,335.865803,477.15496,1193.752539,-1088.623754,496.17917
September,675.255044,161.435864,-1548.024484,-664.526571,1121.209035
October,246.898238,-1377.341852,372.164561,2071.037293,-103.449627


In [45]:
df13.index

Index(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December'],
      dtype='object')

In [46]:
df13.columns

Index(['Frank', 'Eve', 'Stella', 'Guido', 'Lara'], dtype='object')