### numpy 모듈의 호출

In [2]:
import numpy as np

### array의 생성

In [3]:
a = [1, 2, 3, 4, 5]
b = [5, 4, 3, 2, 1]

In [4]:
a = np.array(a, int)
a

array([1, 2, 3, 4, 5])

In [5]:
test_array = np.array(["1", "4", 5, 8], float)
test_array

array([1., 4., 5., 8.])

In [7]:
type(test_array)

numpy.ndarray

In [8]:
type(test_array[3])

numpy.float64

In [9]:
a = [1, 2, 3, 4, 5]
b = [5, 4, 3, 2, 1]
a[0] is b[-1]

True

In [10]:
a = np.array(a)
b = np.array(b)
a[0] is b[-1]

False

In [11]:
test_array.dtype

dtype('float64')

### data shape

In [24]:
matrix = [[1, 2, 3, 4], [4, 5, 6, 7], [4, 5, 6, 7]]
tensor = [
    [[1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8]],
    [[1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8]],
    [[1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8]],
]

In [25]:
np.array(matrix).shape

(3, 4)

In [26]:
np.array(tensor).shape

(3, 3, 4)

In [27]:
np.array(matrix, int).ndim

2

In [28]:
np.array(tensor, int).ndim

3

In [29]:
np.array(matrix, int).size

12

In [31]:
np.array(tensor, int).size

36

### numpy dtype

In [32]:
a = np.array([[1, 2, 3], [4.5, 5, 6]], dtype=np.int8)

In [33]:
a.dtype

dtype('int8')

In [36]:
np.array([[1, 2, 3], [4.5, "5", "6"]], dtype=np.float32).nbytes

24

In [39]:
np.array([[1, 2, 3], [4.5, "5", "6"]], dtype=np.int32).nbytes

24

In [37]:
np.array([[1, 2, 3], [4.5, "5", "6"]], dtype=np.float64).nbytes

48

In [38]:
np.array([[1, 2, 3], [4.5, "5", "6"]], dtype=np.int8).nbytes

6

### reshape
* array의 shape의 크기를 변경함, element의 갯수는 동일

In [3]:
test_matrix = [[1, 2, 3, 4], [1, 2, 5, 8]]
np.array(test_matrix).shape

(2, 4)

In [4]:
np.array(test_matrix).reshape(8,)

array([1, 2, 3, 4, 1, 2, 5, 8])

In [5]:
np.array(test_matrix).reshape(8,).shape

(8,)

* -1 : size를 기반으로 (row, column) 개수 선정

In [6]:
np.array(test_matrix).reshape(2,4).shape

(2, 4)

In [8]:
np.array(test_matrix).reshape(-1,2).shape    # element가 8개인 matrix에 column이 2이므로 row 자동으로 4로 설정

(4, 2)

### flatten
* 다차원 array를 1차원 array로 변환

In [10]:
test_matrix = [[[1, 2, 3, 4], [1, 2, 5, 8]], [[1, 2, 3, 4], [1, 2, 5, 8]]]
np.array(test_matrix)

array([[[1, 2, 3, 4],
        [1, 2, 5, 8]],

       [[1, 2, 3, 4],
        [1, 2, 5, 8]]])

In [16]:
test_matrix = np.array(test_matrix)
test_matrix.shape

(2, 2, 4)

In [17]:
np.array(test_matrix).flatten()

array([1, 2, 3, 4, 1, 2, 5, 8, 1, 2, 3, 4, 1, 2, 5, 8])

In [19]:
np.array(test_matrix).flatten().shape

(16,)

### indexing

In [20]:
test_example = np.array([[1, 2, 3], [4.5, 5, 6]], int)
test_example

array([[1, 2, 3],
       [4, 5, 6]])

In [21]:
test_example[0][2]

3

In [22]:
test_example[0, 2]

3

In [23]:
test_example[0, 0] = 10
test_example

array([[10,  2,  3],
       [ 4,  5,  6]])

In [24]:
test_example[1, 2] = 5
test_example[1, 2]

5

In [25]:
test_example

array([[10,  2,  3],
       [ 4,  5,  5]])

### slicing

In [33]:
a = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [1, 2, 5, 8, 9], [1, 2, 5, 8, 9]], int)
a

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [ 1,  2,  5,  8,  9],
       [ 1,  2,  5,  8,  9]])

In [34]:
a[:, 2:] # 전체 Row의 2열 이상

array([[ 3,  4,  5],
       [ 8,  9, 10],
       [ 5,  8,  9],
       [ 5,  8,  9]])

In [35]:
a[1, 1:3] # 1 Row의 1열 ~ 2열

array([7, 8])

In [36]:
a[1:3] # 1 Row ~ 2 Row의 전체

array([[ 6,  7,  8,  9, 10],
       [ 1,  2,  5,  8,  9]])

In [37]:
a[1] # 1차원 결과

array([ 6,  7,  8,  9, 10])

In [39]:
a[1:2] # 2차원 결과

array([[ 6,  7,  8,  9, 10]])

In [41]:
b = np.arange(100).reshape(10, 10)
b

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [42]:
b[:, -1]

array([ 9, 19, 29, 39, 49, 59, 69, 79, 89, 99])

## creation function
### arange : array의 범위를 지정하여, 값의 list를 생성하는 명령어

In [43]:
np.arange(30)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [44]:
np.arange(0, 5, 0.5)

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])

In [45]:
np.arange(30).reshape(5,6)

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])

### ones. zeros and empty

In [46]:
np.zeros(shape=(10,), dtype=np.int8)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

In [47]:
np.zeros((2, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [48]:
np.ones(shape=(10,), dtype=np.int8)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int8)

In [49]:
np.ones((2, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [57]:
np.empty(shape=(10,), dtype=np.int8) # 메모리 공간만 할당할뿐 초기화하진 않음 -> 기존 메모리에 있던 값들이 출력되곤 함

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

In [51]:
np.empty((10, 5))

array([[-0.00000000e+000, -0.00000000e+000,  1.23516411e-322,
         0.00000000e+000,  0.00000000e+000],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000,
         0.00000000e+000,  0.00000000e+000],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000,
         0.00000000e+000,  2.12199579e-314],
       [ 0.00000000e+000, -0.00000000e+000, -1.72723382e-077,
         2.13251900e-314,  2.31903296e-314],
       [ 2.13126702e-314,  2.13254681e-314,  2.13126698e-314,
         2.27040520e-314,  2.13254684e-314],
       [ 2.26458811e-314, -0.00000000e+000, -0.00000000e+000,
         2.27288234e-314,  2.27290953e-314],
       [ 2.13076324e-314,  2.26778348e-314,  2.27290957e-314,
         2.27290961e-314, -0.00000000e+000],
       [-2.68156175e+154, -3.95252517e-323, -0.00000000e+000,
         2.44190117e-314,  2.26847631e-314],
       [ 2.31989054e-314,  2.44137854e-314,  2.44137854e-314,
         2.31988513e-314,  2.31988513e-314],
       [ 2.26832140e-314, -0.00000000

### identity
* 단위 행렬을 생성

In [58]:
np.identity(n=3, dtype=np.int8)

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]], dtype=int8)

In [59]:
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### eye
* 대각선이 1인 행렬, k값의 시작 index의 변경이 가능

In [60]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [61]:
np.eye(3, 5, k=2)

array([[0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [62]:
np.eye(N=3, M=5, dtype=np.int8)

array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0]], dtype=int8)

### diag
* 대각 행렬의 값을 추출

In [66]:
matrix = np.arange(9).reshape(3, 3)
matrix

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [67]:
np.diag(matrix)

array([0, 4, 8])

In [68]:
np.diag(matrix, k=1)

array([1, 5])

### random sampling
* 데이터 분포에 따른 sampling으로 array를 생성

In [72]:
np.random.uniform(0, 1, 10).reshape(2, 5)  # 균등분포

array([[0.02613698, 0.87925931, 0.29115614, 0.18625349, 0.7766089 ],
       [0.58684929, 0.91426734, 0.57201986, 0.09006587, 0.23026954]])

In [74]:
np.random.normal(0, 1, 10).reshape(2, 5)  # 정규분포

array([[ 0.8292201 ,  2.25667828,  0.32310979,  0.85418069, -0.0769385 ],
       [ 1.04895761,  0.4707688 ,  0.33149999, -0.222488  , -0.74394403]])

## operation function

In [75]:
test_array = np.arange(1, 13).reshape(3, 4)
test_array

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [76]:
test_array.sum(axis=1)

array([10, 26, 42])

In [77]:
test_array.sum(axis=0)

array([15, 18, 21, 24])

In [78]:
third_order_tensor = np.array([test_array, test_array, test_array])
third_order_tensor

array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]]])

In [79]:
third_order_tensor.sum(axis=2)

array([[10, 26, 42],
       [10, 26, 42],
       [10, 26, 42]])

In [80]:
third_order_tensor.sum(axis=0)

array([[ 3,  6,  9, 12],
       [15, 18, 21, 24],
       [27, 30, 33, 36]])

### concatenate
* numpy array를 합치는 함수

In [81]:
a = np.array([1, 2, 3])
b = np.array([2, 3, 4])
np.vstack((a,b))

array([[1, 2, 3],
       [2, 3, 4]])

In [82]:
a = np.array([[1],[2],[3]])
b = np.array([[2],[3],[4]])
np.hstack((a,b))

array([[1, 2],
       [2, 3],
       [3, 4]])

In [83]:
a = np.array([[1, 2, 3]])
b = np.array([[2, 3, 4]])
np.concatenate((a, b), axis=0)

array([[1, 2, 3],
       [2, 3, 4]])

In [84]:
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])
np.concatenate((a, b.T), axis=1)

array([[1, 2, 5],
       [3, 4, 6]])

In [None]:
def scalar_vector_product(scalar, vector):
    result = []
    for value in vector:
        result.append(scalar * value)
    return result

iternation_max = 100000000

vector = list(range(iternation_max))
scalar = 2

%timeit scalar_vector_product(scalar, vector) # for loop을 이용한 성능

In [92]:
%timeit [scalar * value for value in range(iternation_max)] # list comprehension을 이용한 성능

4.61 s ± 45.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [93]:
%timeit np.arange(iternation_max) * scalar # numpy를 이용한 성능

143 ms ± 3.73 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
