# 데이터 구조

- Series :         1D 동질의 데이터 타입을 갖는 배열(array)
- DataFrame :    2D 테이블 구조. 각 컬럼은 서로 다른 데이터타입을 가질 수 있음.
- Panel :         3D 테이블 구조.

### 라이브러리

In [1]:
import pandas as pd
import numpy as np

### 객체 생성

##### 1. Series

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

##### 2. DataFrame

In [3]:
# row, column 데이터 지정하여 생성

In [4]:
dates = pd.date_range('20161001', periods=7)
dates

DatetimeIndex(['2016-10-01', '2016-10-02', '2016-10-03', '2016-10-04',
               '2016-10-05', '2016-10-06', '2016-10-07'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.rand(7,4).round(2), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2016-10-01,0.25,0.1,0.6,1.0
2016-10-02,0.75,0.12,0.1,0.26
2016-10-03,0.51,0.73,0.88,0.09
2016-10-04,0.84,0.95,0.73,0.85
2016-10-05,0.93,0.05,0.03,0.02
2016-10-06,0.64,0.34,0.93,0.53
2016-10-07,0.82,0.48,1.0,0.24


In [6]:
# dictionary 를 dataframe으로 변환

In [7]:
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt', 'Korea', 'China', 'England']
dr =  [True, False, False, False, True, True, True, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45, 122, 397, 255]

my_dict = {
    'country':names,
    'drives_right':dr,
    'cars_per_cap':cpc
}

In [8]:
cars = pd.DataFrame(my_dict)
cars

Unnamed: 0,cars_per_cap,country,drives_right
0,809,United States,True
1,731,Australia,False
2,588,Japan,False
3,18,India,False
4,200,Russia,True
5,70,Morocco,True
6,45,Egypt,True
7,122,Korea,True
8,397,China,True
9,255,England,True


In [9]:
# dataframe 구조 보기

In [10]:
cars.dtypes

cars_per_cap     int64
country         object
drives_right      bool
dtype: object

### 데이터 조회

In [11]:
cars.head()

Unnamed: 0,cars_per_cap,country,drives_right
0,809,United States,True
1,731,Australia,False
2,588,Japan,False
3,18,India,False
4,200,Russia,True


In [12]:
cars.tail(3)

Unnamed: 0,cars_per_cap,country,drives_right
7,122,Korea,True
8,397,China,True
9,255,England,True


In [13]:
cars.index   # 각 행의 인덱스

RangeIndex(start=0, stop=10, step=1)

In [14]:
cars.columns   # 컬럼명

Index(['cars_per_cap', 'country', 'drives_right'], dtype='object')

In [15]:
cars.values   # 전체 데이터 조회

array([[809, 'United States', True],
       [731, 'Australia', False],
       [588, 'Japan', False],
       [18, 'India', False],
       [200, 'Russia', True],
       [70, 'Morocco', True],
       [45, 'Egypt', True],
       [122, 'Korea', True],
       [397, 'China', True],
       [255, 'England', True]], dtype=object)

In [16]:
cars.describe()   # 요약된 통계 정보

Unnamed: 0,cars_per_cap
count,10.0
mean,323.5
std,293.035929
min,18.0
25%,83.0
50%,227.5
75%,540.25
max,809.0


In [17]:
df.describe()

Unnamed: 0,A,B,C,D
count,7.0,7.0,7.0,7.0
mean,0.677143,0.395714,0.61,0.427143
std,0.233789,0.344522,0.395474,0.378581
min,0.25,0.05,0.03,0.02
25%,0.575,0.11,0.35,0.165
50%,0.75,0.34,0.73,0.26
75%,0.83,0.605,0.905,0.69
max,0.93,0.95,1.0,1.0


In [18]:
cars.T   # transposing data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
cars_per_cap,809,731,588,18,200,70,45,122,397,255
country,United States,Australia,Japan,India,Russia,Morocco,Egypt,Korea,China,England
drives_right,True,False,False,False,True,True,True,True,True,True


In [19]:
df.sort_index(axis=1, ascending=False)   # 컬럼 순서 뒤집기

Unnamed: 0,D,C,B,A
2016-10-01,1.0,0.6,0.1,0.25
2016-10-02,0.26,0.1,0.12,0.75
2016-10-03,0.09,0.88,0.73,0.51
2016-10-04,0.85,0.73,0.95,0.84
2016-10-05,0.02,0.03,0.05,0.93
2016-10-06,0.53,0.93,0.34,0.64
2016-10-07,0.24,1.0,0.48,0.82


In [20]:
cars.sort_values(by='country')   # 특정 컬럼의 값을 기준으로 정렬

Unnamed: 0,cars_per_cap,country,drives_right
1,731,Australia,False
8,397,China,True
6,45,Egypt,True
9,255,England,True
3,18,India,False
2,588,Japan,False
7,122,Korea,True
5,70,Morocco,True
4,200,Russia,True
0,809,United States,True
