# NumPy : Numerical Python
### 특징
* 다차원 배열(ndarray) 계산
* 대용량 배열을 효율적으로 처리
* Broadcasting 기능을 제공
    * 모양이 다른 배열들 간의 연산이 어떤 조건을 만족했을 때 가능해지도록 배열을 자동적으로 변환하는 것
* 반복문이 필요하지 않음 -> 매우 빠른 연산이 가능
    * 정렬, 원소 찾기, 집합 연산
    * 통계, 데이터 요약(기술 통계 등)
    * 병합, 관계 표현
    * 벡터 데이터 가공, 정제, 필터링, 변형과 같은 연산들을 할 수 있는 기능들을 제공
* Linear Algebra에서 사용되는 연산들을 NumPy에서 기능을 제공(난수 생성기, 푸리에 변환)
* C언어 코드와 연결

### NumPy Arrays vs. Python Lists

In [55]:
import time
import numpy as np

SIZE_OF_VEC = 1000000

def python_time():
    t = time.time()
    A = list(range(SIZE_OF_VEC))
    B = list(range(SIZE_OF_VEC))
    Z = [A[i] - B[i] for i in range(SIZE_OF_VEC)]
    return time.time() - t

def numpy_time():
    t = time.time()
    A = np.arange(SIZE_OF_VEC)
    B = np.arange(SIZE_OF_VEC)
    C = A - B
    return time.time() - t


my_arr = np.arange(SIZE_OF_VEC)
my_list = list(range(SIZE_OF_VEC))


py_time = python_time()
np_time = numpy_time()

print(py_time, np_time)

0.1497328281402588 0.007515907287597656


In [33]:
%time for _ in range(30): my_arr *= 2

CPU times: user 22.7 ms, sys: 1.09 ms, total: 23.8 ms
Wall time: 22.2 ms


In [30]:
%time for _ in range(30): my_list = [elem * 2 for elem in my_list]

CPU times: user 2.35 s, sys: 492 ms, total: 2.85 s
Wall time: 2.85 s


### 난수 생성
* randn: returns a sample(or samples) from the **standard normal** distribution

In [41]:
np.random.randn(2, 3) # 2 x 3 matrix

array([[ 0.53062191, -0.34006235,  0.78742941],
       [-1.4697668 , -0.93032811,  0.57913635]])

In [46]:
data = np.random.randn(2, 3)
data

array([[-2.1216786 , -0.0715139 , -0.40670422],
       [ 2.74001906, -0.45640613,  0.64175394]])

In [47]:
data * 10

array([[-21.21678601,  -0.71513905,  -4.06704217],
       [ 27.40019061,  -4.56406133,   6.41753939]])

In [48]:
data + data

array([[-4.2433572 , -0.14302781, -0.81340843],
       [ 5.48003812, -0.91281227,  1.28350788]])

### NumPy Methods & Attributes

In [78]:
print(data.shape, data.dtype, data.ndim)

(2, 3) float64 2


In [60]:
data1 = [3, 1.5, 2, 0]
arr1 = np.array(data1)
arr1 # NumPy는 데이터를 추론하여 형변환

array([3. , 1.5, 2. , 0. ])

In [61]:
data2 = [[1, 2, 3], [4, 5, 6]]
data2

[[1, 2, 3], [4, 5, 6]]

In [68]:
arr2 = np.array(data2)
arr2

array([[1, 2, 3],
       [4, 5, 6]])

In [76]:
np.zeros(2)

array([0., 0.])

In [80]:
np.zeros((2, 3)) # or np.zeros([2, 3]) 

array([[0., 0., 0.],
       [0., 0., 0.]])

In [85]:
d1 = np.array([1, 2, 3], dtype=np.float64) # 데이터에 의미를 줄 수 있다 (메타데이터 == 데이터의 데이터)
d2 = np.array([1, 2, 3], dtype=np.int32)

print(d1.dtype, d2.dtype)

float64 int32


In [91]:
# 형변환
arr = np.array([1, 2, 3])
float_arr = arr.astype(np.float64)

print(arr.dtype, float_arr.dtype)

int64 float64


In [93]:
# 형변환 (Explicit Casting) : Loss of precision
arr = np.array([1.2, 2.8, 3.5])
int_arr = arr.astype(np.int32)

print(arr, int_arr)

[1.2 2.8 3.5] [1 2 3]


`< : Little Endian : LSB First (right to left | bottom to top)` 

`> : Big Endian : MSB First (left to right | top to bottom)`

In [103]:
arr = np.array('1.5') # array(['1.5'], dtype='<U3')   U3 -> Unicode 
arr.astype(np.float64) 

array(1.5)

### NumPy Operations
* Supports an arithematic operation on the array with a scalar value

In [108]:
arr = np.array([[1,2,3],[4,5,6]])
arr

array([[1, 2, 3],
       [4, 5, 6]])

#### Division

In [109]:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

#### Comparison

In [110]:
arr2 = np.array([[1, 0, 3], [6, 2, 9]])
arr > arr2

array([[False,  True, False],
       [False,  True, False]])

### Broadcasting

In [127]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [128]:
arr[5:8] = 12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

#### Reference

In [129]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [130]:
# Reference
arr_slice[1] = 999
arr_slice

array([ 12, 999,  12])

In [131]:
arr

array([  0,   1,   2,   3,   4,  12, 999,  12,   8,   9])

In [132]:
arr_slice[:] = 777
arr_slice

array([777, 777, 777])

In [133]:
arr

array([  0,   1,   2,   3,   4, 777, 777, 777,   8,   9])

In [138]:
arr_slice2 = arr[5:8].copy() #새로운 메모리 공간을 할당 후 NumPy 배열을 복사
arr_slice2[0] = 333

print(arr_slice2, arr_slice, arr)

[333 777 777] [777 777 777] [  0   1   2   3   4 777 777 777   8   9]


In [122]:
arr_slice = np.array(arr[5:8])
arr_slice[1] = 12
arr_slice

array([12, 12, 12])

In [139]:
arr2 = np.array([[1, 2], 
                 [3, 4], 
                 [5, 6]])

In [147]:
# 2차원 배열에서 1차원 배열을 참조
print(arr2[2])

# 2차원 배열에서 1차원 배열의 값을 참조
print(arr2[2][0], arr2[2, 0])


[5 6]
5 5


### reshape
```python
numpy.reshape(a, newshape, order='C')
```

In [227]:
arr = np.array(list(range(1, 25)))
arr.shape

(24,)

In [228]:
arr3d = arr.reshape(2, 3, 4)
arr3d

array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[13, 14, 15, 16],
        [17, 18, 19, 20],
        [21, 22, 23, 24]]])

In [183]:
arr3d[0]

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [222]:
old_values = arr3d[0].copy()
old_values[0] = 100 
arr3d[0] = old_values # not reference

In [221]:
arr3d

array([[[100, 100, 100, 100],
        [  5,   6,   7,   8],
        [  9,  10,  11,  12]],

       [[ 13,  14,  15,  16],
        [ 17,  18,  19,  20],
        [ 21,  22,  23,  24]]])

In [185]:
arr3d[0] = 99
arr3d

array([[[99, 99, 99, 99],
        [99, 99, 99, 99],
        [99, 99, 99, 99]],

       [[13, 14, 15, 16],
        [17, 18, 19, 20],
        [21, 22, 23, 24]]])

In [186]:
arr3d[0] = old_values
arr3d[0]

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [189]:
arr3d[-1][-2][-2]

19

In [206]:
arr[1:10]

array([[ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20],
       [21, 22, 23, 24]])

In [207]:
arr = arr.reshape(6, 4)
arr

array([[100,   2,   3,   4],
       [  5,   6,   7,   8],
       [  9,  10,  11,  12],
       [ 13,  14,  15,  16],
       [ 17,  18,  19,  20],
       [ 21,  22,  23,  24]])

In [208]:
arr[4]

array([17, 18, 19, 20])

In [209]:
arr2 = arr

In [210]:
arr2

array([[100,   2,   3,   4],
       [  5,   6,   7,   8],
       [  9,  10,  11,  12],
       [ 13,  14,  15,  16],
       [ 17,  18,  19,  20],
       [ 21,  22,  23,  24]])

In [224]:
a = [1, 3, 5]
a = a[::-1]
a

[5, 3, 1]

In [225]:
for i in reversed(a):
    print(i)

1
3
5


In [226]:
for i in range(len(a) // 2):
    a[i], a[len(a) - i - 1] = a[len(a) - i - 1], a[i]

a

[1, 3, 5]

In [229]:
arr3d

array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[13, 14, 15, 16],
        [17, 18, 19, 20],
        [21, 22, 23, 24]]])

In [230]:
arr3d[0][:2]

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [240]:
arr3d[1][:1, 2:]

array([[15, 16]])