# Import

In [42]:
import numpy as np
import pandas as pd

# Series

In [43]:
s = pd.Series([0, 1, 1.5, 2, 3.2])
s

0    0.0
1    1.0
2    1.5
3    2.0
4    3.2
dtype: float64

In [44]:
print(s.values)
print(s.index)
print(s[1])
print(s[1:5])
print(s>1.6)

[0.  1.  1.5 2.  3.2]
RangeIndex(start=0, stop=5, step=1)
1.0
1    1.0
2    1.5
3    2.0
4    3.2
dtype: float64
0    False
1    False
2    False
3     True
4     True
dtype: bool


In [45]:
s = pd.Series([0, 1, 1.5, 2, 3.2], index=['a', 'b', 'c', 'd', 'e'])
s

a    0.0
b    1.0
c    1.5
d    2.0
e    3.2
dtype: float64

In [46]:
print(s['c'])
print(s[1:2])
print(s['b':'e'])
print(s[['a', 'c', 'e']])
print('b' in s)

1.5
b    1.0
dtype: float64
b    1.0
c    1.5
d    2.0
e    3.2
dtype: float64
a    0.0
c    1.5
e    3.2
dtype: float64
True


In [47]:
s = pd.Series([0, 1, 1.5, 2, 3.2, 2, 1.5], index=[2, 4, 6, 8, 10, 12, 14])
s # 사용자 정의 인덱스

2     0.0
4     1.0
6     1.5
8     2.0
10    3.2
12    2.0
14    1.5
dtype: float64

In [48]:
print(s[2])
print(s[2:4])
print(s.unique())
print(s.value_counts())
print(s.isin([0.25, 1.5]))

0.0
6    1.5
8    2.0
dtype: float64
[0.  1.  1.5 2.  3.2]
1.5    2
2.0    2
0.0    1
1.0    1
3.2    1
dtype: int64
2     False
4     False
6      True
8     False
10    False
12    False
14     True
dtype: bool


  print(s[2:4])


In [49]:
fruit = {'사과': 10, '배': 5, '딸기': 20, '바나나': 3, '오렌지': 8}
s = pd.Series(fruit)
s

사과     10
배       5
딸기     20
바나나     3
오렌지     8
dtype: int64

In [50]:
s['배']

5

In [51]:
s['배':'오렌지']

배       5
딸기     20
바나나     3
오렌지     8
dtype: int64

# DataFrame

In [52]:
df = pd.DataFrame([{'A':2, 'B':4, 'D':3}, {'A':4, 'B':5, 'C':7}])
df

Unnamed: 0,A,B,D,C
0,2,4,3.0,
1,4,5,,7.0


In [53]:
df = pd.DataFrame(np.random.rand(5, 5), columns=list('ABCDE'), index=range(1, 6))
df

Unnamed: 0,A,B,C,D,E
1,0.510173,0.39806,0.25709,0.227845,0.924245
2,0.628575,0.811926,0.397341,0.722758,0.88936
3,0.893929,0.032527,0.433519,0.583336,0.522526
4,0.109233,0.958448,0.729668,0.93117,0.243578
5,0.982903,0.217162,0.259312,0.963398,0.662894


In [54]:
fruit_fresh = {'사과':846, '배':343, '바나나':17, '딸기':1954, '오렌지':440}
fruit_fresh = pd.Series(fruit_fresh)
fruit_fresh

사과      846
배       343
바나나      17
딸기     1954
오렌지     440
dtype: int64

In [55]:
fruit_rotten = {'사과': 232, '배': 123, '바나나': 45, '딸기': 2, '오렌지': 30}
fruit_rotten = pd.Series(fruit_rotten)
fruit_rotten

사과     232
배      123
바나나     45
딸기       2
오렌지     30
dtype: int64

In [56]:
fruit_df = pd.DataFrame({'싱싱한 과일': fruit_fresh, '썩은 과일': fruit_rotten})
fruit_df['모든 과일'] = np.sum(fruit_df, axis=1)
fruit_df

Unnamed: 0,싱싱한 과일,썩은 과일,모든 과일
사과,846,232,1078
배,343,123,466
바나나,17,45,62
딸기,1954,2,1956
오렌지,440,30,470


In [57]:
print(fruit_df.index)
print(fruit_df.columns)
print(fruit_df.values)
print(fruit_df['싱싱한 과일'])
print(fruit_df['썩은 과일']['바나나'])
print(fruit_df['배':'딸기'])

Index(['사과', '배', '바나나', '딸기', '오렌지'], dtype='object')
Index(['싱싱한 과일', '썩은 과일', '모든 과일'], dtype='object')
[[ 846  232 1078]
 [ 343  123  466]
 [  17   45   62]
 [1954    2 1956]
 [ 440   30  470]]
사과      846
배       343
바나나      17
딸기     1954
오렌지     440
Name: 싱싱한 과일, dtype: int64
45
     싱싱한 과일  썩은 과일  모든 과일
배       343    123    466
바나나      17     45     62
딸기     1954      2   1956


In [58]:
print(fruit_df['모든 과일'])
print(type(fruit_df['모든 과일']))
fruit_df = fruit_df.div(fruit_df['모든 과일'], axis=0)
fruit_df.T

사과     1078
배       466
바나나      62
딸기     1956
오렌지     470
Name: 모든 과일, dtype: int64
<class 'pandas.core.series.Series'>


Unnamed: 0,사과,배,바나나,딸기,오렌지
싱싱한 과일,0.784787,0.736052,0.274194,0.998978,0.93617
썩은 과일,0.215213,0.263948,0.725806,0.001022,0.06383
모든 과일,1.0,1.0,1.0,1.0,1.0


# Index

In [59]:
idx = pd.Index([2, 4, 6, 8, 10])
idx

Int64Index([2, 4, 6, 8, 10], dtype='int64')

In [60]:
print(idx.size)
print(idx.shape)
print(idx.ndim)
print(idx.dtype)

5
(5,)
1
int64


In [61]:
idx1 = pd.Index([1, 2, 3, 4, 5])
idx2 = pd.Index([4, 5, 6, 7, 8])

print(idx1.append(idx2)) 
# does return value.
# not like list.append() which returns None.

print(idx1.difference(idx2))
print(idx1.intersection(idx2))
print(idx1.union(idx2))

print(idx1.delete(0)) # delete by index
print(idx1.drop(1)) # delete by value
print(idx1.symmetric_difference(idx2))

Int64Index([1, 2, 3, 4, 5, 4, 5, 6, 7, 8], dtype='int64')
Int64Index([1, 2, 3], dtype='int64')
Int64Index([4, 5], dtype='int64')
Int64Index([1, 2, 3, 4, 5, 6, 7, 8], dtype='int64')
Int64Index([2, 3, 4, 5], dtype='int64')
Int64Index([2, 3, 4, 5], dtype='int64')
Int64Index([1, 2, 3, 6, 7, 8], dtype='int64')


# Series 인덱싱

# DataFrame 인덱싱