In [1]:
import numpy as np

xarr = np.array([1,2,3,4,5])
yarr = np.array([2,3,4,5,6])
cond = np.array([True, False, True, True, False])

res = [(x if c else y) for x,y ,c in zip(xarr, yarr, cond)] # 파이썬 연산 -> 속도가 느림

In [2]:
res

[1, 3, 3, 4, 6]

### Vectorization: SIMD Optimization

In [4]:
res = np.where(cond, xarr, yarr) # SIMD vectorization
res

array([1, 3, 3, 4, 6])

In [51]:
arr = np.random.randn(4, 4)
arr

array([[-1.39834167, -0.94883125,  0.16023536,  0.98786892],
       [-1.9609109 ,  0.02842034,  2.13109147,  1.29821109],
       [ 0.77548424,  1.81610514,  1.21157999,  1.34845582],
       [-0.53830052,  0.58776341,  0.50885801,  0.6528141 ]])

In [49]:
# arr에 저장된 값이 0초과 이면 값을 2로, 0이하이면 0으로 변경

arr = np.where(arr > 0, 2, 0)
arr

array([[0, 2, 0, 2],
       [0, 0, 2, 0],
       [0, 0, 0, 0],
       [2, 0, 2, 0]])

In [52]:
# arr에 저장된 값이 0초과 이면 값을 2로, 0이하이면 arr 값 그대로 사용
np.where(arr > 0, 2, arr)

array([[-1.39834167, -0.94883125,  2.        ,  2.        ],
       [-1.9609109 ,  2.        ,  2.        ,  2.        ],
       [ 2.        ,  2.        ,  2.        ,  2.        ],
       [-0.53830052,  2.        ,  2.        ,  2.        ]])

In [58]:
arr = np.arange(9)
arr.cumsum()

array([ 0,  1,  3,  6, 10, 15, 21, 28, 36])

In [66]:
arr = arr.reshape(3, -1)
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [67]:
arr.cumsum()

array([ 0,  1,  3,  6, 10, 15, 21, 28, 36])

In [68]:
arr.cumsum(axis=1)

array([[ 0,  1,  3],
       [ 3,  7, 12],
       [ 6, 13, 21]])

In [69]:
arr.cumsum(axis=0)

array([[ 0,  1,  2],
       [ 3,  5,  7],
       [ 9, 12, 15]])

In [70]:
arr.cumprod()

array([0, 0, 0, 0, 0, 0, 0, 0, 0])

In [71]:
arr.cumprod(axis=1)

array([[  0,   0,   0],
       [  3,  12,  60],
       [  6,  42, 336]])

In [72]:
arr.cumprod(axis=0)

array([[ 0,  1,  2],
       [ 0,  4, 10],
       [ 0, 28, 80]])

### 불린 배열 메서드

In [80]:
arr = np.random.randn(100)


In [81]:
(arr > 0).sum() # 조건을 만족하는 불린 값들의 합

45

In [82]:
bools = np.array([0, 0, 1, 0])
bools.any() # 하나 이상의 값이 True -> True (OR)
bools.all() # 모든 값이 True -> True (AND)

False

### NumPy Sort vs. Python Sort

```
x.sort() -> x.sort()와 동일 : x 자체가 바뀜
np.array(x) -> sorted(x)와 동일 : 새로운 배열을 sort
```

In [83]:
arr = np.random.randn(5, 3)
arr.sort()
arr

array([[ 0.15525448,  0.90057136,  1.01026964],
       [-0.31121276,  0.2323727 ,  0.61670751],
       [-0.82042136,  0.45635914,  1.94464205],
       [-1.59611045, -1.06893429,  1.1266116 ],
       [-1.23725426, -1.12528077, -0.42076595]])

In [86]:
arr.sort(0)
arr

array([[-1.59611045, -1.12528077, -0.42076595],
       [-1.23725426, -1.06893429,  0.61670751],
       [-0.82042136,  0.2323727 ,  1.01026964],
       [-0.31121276,  0.45635914,  1.1266116 ],
       [ 0.15525448,  0.90057136,  1.94464205]])

In [87]:
arr.sort(1)
arr

array([[-1.59611045, -1.12528077, -0.42076595],
       [-1.23725426, -1.06893429,  0.61670751],
       [-0.82042136,  0.2323727 ,  1.01026964],
       [-0.31121276,  0.45635914,  1.1266116 ],
       [ 0.15525448,  0.90057136,  1.94464205]])

In [97]:
x = np.array([3,2,5,4,1,9,0])

In [98]:
# x에 저장되어 있는 데이터를 오름차순 정렬했을 경우에,
# 오름 차순 정렬 결과에 대한 원 데이터 index 값을 리턴
np.argsort(x)

array([6, 4, 1, 0, 3, 2, 5])

In [99]:
# 내림차순
np.argsort(-x)

array([5, 2, 3, 0, 1, 4, 6])

In [101]:
x[np.argsort(-x)]

array([9, 5, 4, 3, 2, 1, 0])

In [102]:
x = np.arange(1, 10).reshape(3, -1)

In [103]:
np.sort(x, axis=1)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [104]:
np.sort(x, axis=1)[::-1]

array([[7, 8, 9],
       [4, 5, 6],
       [1, 2, 3]])

In [108]:
x = np.array([[2,1,6], [0,7,4], [5,3,2]])
x

array([[2, 1, 6],
       [0, 7, 4],
       [5, 3, 2]])

In [109]:
np.sort(x, axis=0)[::-1]

array([[5, 7, 6],
       [2, 3, 4],
       [0, 1, 2]])

### np.save() np.load()

In [152]:
x = np.array([0, 1, 2])
np.save("xsave", x)

In [153]:
xsave = np.load("xsave.npy")

#### 여러개의 배열을 바이너리로 저장
`np.savez()`

In [154]:
y = np.array([3,4,5])
np.savez("xysave", x=x, y=y)

In [155]:
xyload=np.load("xysave.npz")
xyload

<numpy.lib.npyio.NpzFile at 0x7fc4036dc3a0>

In [156]:
print(xyload['x'], xyload['y'])

[0 1 2] [3 4 5]


In [157]:
xyload.close()

### 여러개의 배열을 텍스트 파일로 저장 vice versa
`np.savetxt: 여러개의 배열을 텍스트 파일로 저장`  
`np.loadtxt: 텍스트 파일을 배열로 불러오기`

In [162]:
x = np.arange(1, 4)
y = np.arange(4, 7)

np.savetxt("xytxt.txt", (x, y))

In [163]:
np.loadtxt("xytxt.txt")

array([[1., 2., 3.],
       [4., 5., 6.]])

In [166]:
np.savetxt("xytxt.txt", (x, y), fmt="%.f")

In [169]:
np.loadtxt("xytxt.txt")

array([[1., 2., 3.],
       [4., 5., 6.]])

### NumPy 배열을 여러개로 분할

`np.hsplit(ary, indices_or_sections)`  
`np.vsplit()`

`sections -> tuple`  
`np.hpslit(arr, (2, 4, 7)) [0:2] [2:4] [4:7] <- 구간을 분할`

In [208]:
arr = np.arange(18).reshape(3, -1)
arr

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17]])

In [221]:
np.hsplit(arr, 3)
np.split(arr, 3, axis=1)

np.hsplit(arr, (2, 4)) # 0:2 2:4 4:6
np.split(arr, (2, 4), axis=1)

[array([[ 0,  1],
        [ 6,  7],
        [12, 13]]),
 array([[ 2,  3],
        [ 8,  9],
        [14, 15]]),
 array([[ 4,  5],
        [10, 11],
        [16, 17]])]

In [193]:
vsplit = np.vsplit(arr, 3)

In [206]:
for row in vsplit:
    print(*row)

[0 1 2 3 4 5]
[ 6  7  8  9 10 11]
[12 13 14 15 16 17]


In [223]:
arr = np.arange(10).reshape(2, 5)
arr

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [224]:
arr[::-1]

array([[5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4]])

In [225]:
np.flip(arr, axis=0)

array([[5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4]])

In [227]:
np.flip(arr, axis=1)

array([[4, 3, 2, 1, 0],
       [9, 8, 7, 6, 5]])

In [229]:
arr = np.array([[3,7,8,5,2],[10,11,1,0,15]])
arr

array([[ 3,  7,  8,  5,  2],
       [10, 11,  1,  0, 15]])

In [230]:
np.flip(arr)

array([[15,  0,  1, 11, 10],
       [ 2,  5,  8,  7,  3]])

In [231]:
np.flip(arr, axis=0)

array([[10, 11,  1,  0, 15],
       [ 3,  7,  8,  5,  2]])

In [232]:
np.flip(arr, axis=1)

array([[ 2,  5,  8,  7,  3],
       [15,  0,  1, 11, 10]])

In [233]:
arr = np.arange(10).reshape(2, 5)
arr

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [239]:
temp = np.flip(arr, axis=0)
np.flip(temp, axis=1)

array([[9, 8, 7, 6, 5],
       [4, 3, 2, 1, 0]])

In [241]:
temp = arr[:, ::-1]
temp[::-1, :]

array([[9, 8, 7, 6, 5],
       [4, 3, 2, 1, 0]])

In [250]:
a = np.arange(-5, 5)
np.where(a < 0, 0, a)

array([0, 0, 0, 0, 0, 0, 1, 2, 3, 4])

In [247]:
[0 if elem < 0 else elem for elem in a]

[0, 0, 0, 0, 0, 0, 1, 2, 3, 4]

In [248]:
a[a < 0] = 0
a

array([0, 0, 0, 0, 0, 0, 1, 2, 3, 4])

In [255]:
np.where(a < 0, 0, np.where(a > 2, 2, a))

array([0, 0, 0, 0, 0, 0, 1, 2, 2, 2])

### np.clip()
`np.clip(ary, min, max)`

interval: [min, max]

In [259]:
np.clip(a, 0, 2, out=a)

array([0, 0, 0, 0, 0, 0, 1, 2, 2, 2])

In [261]:
np.clip(a, 0, 2, out=a) # inplace
a

array([0, 0, 0, 0, 0, 0, 1, 2, 2, 2])

###  array() vs. asarray() 
* array() copy=True
* asarray() copy=False

In [262]:
a = [1, 2]
b = np.array([1, 2])
c = np.asarray(a) 

In [266]:
b is c

False

In [272]:
np.asarray(b) is b

True

### 단위행렬(항등행렬): Identity Matrix
* 대각성분 '1', 나머지 '0'

In [274]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [275]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Distributions

In [285]:
# 모집단: 0 ~ 4에서 균등확률분포를 따르는 임의의 수 3개를 복원추출 (uniform distributions)
np.random.choice(5, 3)      #5 -> arange(5) 3-> size
np.random.randint(0, 5, 3) 

array([2, 4, 4])

In [299]:
# Non-uniform distributions
p = [0.1, 0.2, 0.2, 0.4, 0.1] # 각 원소별 표본으로 뽑힐 확률
np.random.choice(5, 3, p=p)

array([1, 3, 4])

In [308]:
x = np.arange(20)
x = x.reshape(10,-1)
y = np.arange(10)

In [309]:
from sklearn.model_selection import train_test_split

train_test_split(x,
                 y,
                 test_size=0.4)

[array([[14, 15],
        [ 4,  5],
        [ 6,  7],
        [18, 19],
        [16, 17],
        [ 0,  1]]),
 array([[12, 13],
        [10, 11],
        [ 8,  9],
        [ 2,  3]]),
 array([7, 2, 3, 9, 8, 0]),
 array([6, 5, 4, 1])]

In [326]:
train_test_split(x,
                 y,
                 test_size=0.3,
                 shuffle=True,
                 random_state=1004)

[array([[18, 19],
        [ 2,  3],
        [ 8,  9],
        [ 6,  7],
        [14, 15],
        [10, 11],
        [ 4,  5]]),
 array([[ 0,  1],
        [12, 13],
        [16, 17]]),
 array([9, 1, 4, 3, 7, 5, 2]),
 array([0, 6, 8])]

In [327]:
xTrain, xTest, yTrain, yTest = train_test_split(x, 
                                                y, 
                                                train_size=0.7, 
                                                shuffle=True, 
                                                random_state=1004)

# Broadcasting
* NumPy는 모양이 다른 배열끼리 연산이 불가
* 어떤 조건일 때 모양이 다르더라도 배열간 연산이 가능할까?
    * 차원의 크기가 1일 경우
    * 차원의 짝이 맞을 경우

In [329]:
a = np.array([1, 2, 3])
b = np.array([1, 2])
a + b

ValueError: operands could not be broadcast together with shapes (3,) (2,) 

In [331]:
np.arange(3) + 5

array([5, 6, 7])

In [335]:
np.arange(11, 20).reshape(3, -1) + np.arange(3)

array([[11, 13, 15],
       [14, 16, 18],
       [17, 19, 21]])

In [342]:
np.arange(3).reshape(3, -1) + np.arange(3)

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [344]:
np.arange(24).reshape(3,4,2) + np.arange(8).reshape(4, -1)

array([[[ 0,  2],
        [ 4,  6],
        [ 8, 10],
        [12, 14]],

       [[ 8, 10],
        [12, 14],
        [16, 18],
        [20, 22]],

       [[16, 18],
        [20, 22],
        [24, 26],
        [28, 30]]])

In [355]:
arr3d = np.arange(24).reshape(3, 4, -1)
arr2d = np.arange(8).reshape(4, -1)

arr3d + arr2d

array([[[ 0,  2],
        [ 4,  6],
        [ 8, 10],
        [12, 14]],

       [[ 8, 10],
        [12, 14],
        [16, 18],
        [20, 22]],

       [[16, 18],
        [20, 22],
        [24, 26],
        [28, 30]]])