# 데이터 구조

- Series :         1D 동질의 데이터 타입을 갖는 배열(array)
- DataFrame :    2D 테이블 구조. 각 컬럼은 서로 다른 데이터타입을 가질 수 있음.
- Panel :         3D 테이블 구조.

### 라이브러리

In [33]:
import pandas as pd

In [34]:
import numpy as np

In [35]:
import matplotlib.pyplot as plt

### 객체 생성

##### 1. Series

In [36]:
s = pd.Series([1,3,5,np.nan,6,8])

In [37]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

##### 2. DataFrame

In [38]:
# row, column 데이터 지정하여 생성

In [39]:
dates = pd.date_range('20161001', periods=7)

In [40]:
dates

DatetimeIndex(['2016-10-01', '2016-10-02', '2016-10-03', '2016-10-04',
               '2016-10-05', '2016-10-06', '2016-10-07'],
              dtype='datetime64[ns]', freq='D')

In [41]:
df = pd.DataFrame(np.random.rand(7,4).round(2), index=dates, columns=list('ABCD'))

In [42]:
df

Unnamed: 0,A,B,C,D
2016-10-01,0.76,0.15,0.33,0.95
2016-10-02,0.22,0.42,0.91,0.3
2016-10-03,0.67,0.82,0.07,0.18
2016-10-04,0.65,0.08,0.65,0.78
2016-10-05,0.05,0.54,0.15,0.61
2016-10-06,0.53,0.19,0.03,0.63
2016-10-07,0.09,0.06,0.67,0.91


In [46]:
# dictionary 를 dataframe으로 변환

In [50]:
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt', 'Korea', 'China', 'England']
dr =  [True, False, False, False, True, True, True, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45, 122, 397, 255]

my_dict = {
    'country':names,
    'drives_right':dr,
    'cars_per_cap':cpc
}

In [51]:
cars = pd.DataFrame(my_dict)   

In [52]:
cars

Unnamed: 0,cars_per_cap,country,drives_right
0,809,United States,True
1,731,Australia,False
2,588,Japan,False
3,18,India,False
4,200,Russia,True
5,70,Morocco,True
6,45,Egypt,True
7,122,Korea,True
8,397,China,True
9,255,England,True


In [53]:
# dataframe 구조 보기

In [54]:
cars.dtypes

cars_per_cap     int64
country         object
drives_right      bool
dtype: object

### 데이터 조회

In [55]:
cars.head()

Unnamed: 0,cars_per_cap,country,drives_right
0,809,United States,True
1,731,Australia,False
2,588,Japan,False
3,18,India,False
4,200,Russia,True


In [56]:
cars.tail(3)

Unnamed: 0,cars_per_cap,country,drives_right
7,122,Korea,True
8,397,China,True
9,255,England,True


In [57]:
cars.index   # 각 행의 인덱스

RangeIndex(start=0, stop=10, step=1)

In [58]:
cars.columns   # 컬럼명

Index(['cars_per_cap', 'country', 'drives_right'], dtype='object')

In [59]:
cars.values   # 전체 데이터 조회

array([[809, 'United States', True],
       [731, 'Australia', False],
       [588, 'Japan', False],
       [18, 'India', False],
       [200, 'Russia', True],
       [70, 'Morocco', True],
       [45, 'Egypt', True],
       [122, 'Korea', True],
       [397, 'China', True],
       [255, 'England', True]], dtype=object)

In [60]:
cars.describe()   # 요약된 통계 정보

Unnamed: 0,cars_per_cap
count,10.0
mean,323.5
std,293.035929
min,18.0
25%,83.0
50%,227.5
75%,540.25
max,809.0


In [61]:
df.describe()

Unnamed: 0,A,B,C,D
count,7.0,7.0,7.0,7.0
mean,0.424286,0.322857,0.401429,0.622857
std,0.296865,0.282649,0.343678,0.2929
min,0.05,0.06,0.03,0.18
25%,0.155,0.115,0.11,0.455
50%,0.53,0.19,0.33,0.63
75%,0.66,0.48,0.66,0.845
max,0.76,0.82,0.91,0.95


In [63]:
cars.T   # transposing data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
cars_per_cap,809,731,588,18,200,70,45,122,397,255
country,United States,Australia,Japan,India,Russia,Morocco,Egypt,Korea,China,England
drives_right,True,False,False,False,True,True,True,True,True,True


In [74]:
df.sort_index(axis=1, ascending=False)   # 컬럼 순서 뒤집기

Unnamed: 0,D,C,B,A
2016-10-01,0.95,0.33,0.15,0.76
2016-10-02,0.3,0.91,0.42,0.22
2016-10-03,0.18,0.07,0.82,0.67
2016-10-04,0.78,0.65,0.08,0.65
2016-10-05,0.61,0.15,0.54,0.05
2016-10-06,0.63,0.03,0.19,0.53
2016-10-07,0.91,0.67,0.06,0.09


In [75]:
cars.sort_values(by='country')   # 특정 컬럼의 값을 기준으로 정렬

Unnamed: 0,cars_per_cap,country,drives_right
1,731,Australia,False
8,397,China,True
6,45,Egypt,True
9,255,England,True
3,18,India,False
2,588,Japan,False
7,122,Korea,True
5,70,Morocco,True
4,200,Russia,True
0,809,United States,True
