# Creating NumPy arrays

## From a list

In [2]:
import numpy as np

In [279]:
data1 = [6, 7.5, 8, 0, 1]   # a list
arr1 = np.array(data1, float)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [15]:
print(type(arr1))
print(arr1)

<class 'numpy.ndarray'>
[6.  7.5 8.  0.  1. ]


In [18]:
print(arr1.dtype)
# dtype returns the data type of the array

print(arr1.shape)
# note the shape is a tuple of single element.
# you may think of it as a 5x1 matrix

float64
(5,)


In [19]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
print(arr2)

[[1 2 3 4]
 [5 6 7 8]]


In [20]:
print(arr2.dtype)
print(arr2.size)
print(np.size(arr2))
print(arr2.ndim)
print(arr2.shape)

int32
8
8
2
(2, 4)


In [21]:
arr3 = np.array([0, 1, 2, 3, 4, 5, 6, 7])
arr3.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [22]:
arr4 = np.array([0, 1, 2, 3, 4, 5]).reshape((2, 3))
arr4

array([[0, 1, 2],
       [3, 4, 5]])

## Special Arrays

In [20]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [21]:
np.zeros((3,6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [22]:
np.empty((2,3,2))

array([[[1.14963019e-311, 3.16202013e-322],
        [0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 1.38073109e-071]],

       [[3.60436775e+175, 6.94087641e+169],
        [8.76175557e+169, 5.15361695e-062],
        [1.82794975e+184, 2.00467035e-076]]])

## Arrays of Fixed Intervals

In [23]:
np.arange(8)

array([0, 1, 2, 3, 4, 5, 6, 7])

In [24]:
np.arange(0, 8, 2)

array([0, 2, 4, 6])

In [25]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [26]:
np.linspace(0, 10, num = 5, endpoint = True, dtype = float)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [27]:
np.linspace(0, 10, num = 5, endpoint = False, dtype = int)

array([0, 2, 4, 6, 8])

## Arrays of Random Numbers

In [28]:
np.random.rand(5)

array([0.56765599, 0.05515259, 0.83561558, 0.38032477, 0.1806717 ])

In [29]:
np.random.rand(4, 2)

array([[0.43567847, 0.16690481],
       [0.59766532, 0.3384431 ],
       [0.80420291, 0.09959448],
       [0.8130855 , 0.6401008 ]])

In [30]:
np.random.seed(40)
np.random.rand(3)

array([0.40768703, 0.05536604, 0.78853488])

In [31]:
np.random.rand(3)

array([0.28730518, 0.45035059, 0.30391231])

In [32]:
np.random.seed(40)
np.random.rand(4)

array([0.40768703, 0.05536604, 0.78853488, 0.28730518])

In [33]:
for _ in range(5):
    np.random.seed(100)
    print(np.random.rand(1))

[0.54340494]
[0.54340494]
[0.54340494]
[0.54340494]
[0.54340494]


In [34]:
np.random.seed(100)
for _ in range(5):
    print(np.random.rand(1))

[0.54340494]
[0.27836939]
[0.42451759]
[0.84477613]
[0.00471886]


In [35]:
data = np.random.randn(5, 3)
data

array([[ 0.35467445, -0.78606433, -0.2318722 ],
       [ 0.20797568,  0.93580797,  0.17957831],
       [-0.5771615 , -0.53337271, -0.22540212],
       [-0.31491934,  0.42299678, -0.43882681],
       [-0.50141346, -1.65870284, -1.0075773 ]])

In [36]:
# to generate a random sample with mean of 80 and std of 2.5
mu = 80
sigma = 2.5
sample = mu + sigma * np.random.rand(10)
sample

array([80.93208012, 80.01422127, 80.63106588, 81.98915627, 80.03813743,
       81.49710844, 81.50951135, 80.26286921, 80.95485861, 80.09119014])

In [37]:
# alternatively, call normal method
np.random.normal(mu, sigma, 10)   # three arguments are: mean, standard deviation, and size

array([78.72816392, 83.91794696, 81.24256179, 82.88968084, 84.59652919,
       83.83931654, 80.63749432, 77.88960687, 77.54264136, 79.23475542])

In [105]:
np.random.randint(0, 5, (2, 4))

array([[3, 1, 0, 4],
       [0, 4, 4, 0]])

In [28]:
np.random.choice(range(10, 100), size = 5, replace=True)

array([79, 24, 78, 35, 25])

In [276]:
# # simulate flipping a coin 10 times, tested 1000 times.
np.random.binomial(10, 0.5, 1000)

array([5, 5, 5, 5, 3, 6, 4, 4, 2, 5, 4, 5, 4, 6, 3, 4, 5, 5, 4, 2, 4, 5,
       3, 3, 6, 4, 7, 5, 3, 5, 6, 7, 5, 2, 5, 4, 5, 5, 4, 6, 5, 5, 5, 4,
       5, 5, 6, 4, 4, 5, 5, 6, 3, 4, 4, 5, 4, 3, 9, 5, 7, 4, 6, 5, 6, 3,
       6, 9, 4, 3, 6, 3, 5, 4, 6, 6, 3, 6, 4, 5, 4, 4, 3, 6, 6, 5, 5, 4,
       8, 5, 8, 6, 7, 5, 5, 5, 5, 6, 3, 6, 3, 5, 4, 7, 3, 7, 4, 3, 6, 7,
       7, 6, 6, 7, 5, 6, 7, 7, 4, 6, 5, 4, 5, 2, 3, 4, 4, 4, 5, 3, 6, 4,
       3, 4, 7, 3, 7, 5, 3, 3, 6, 7, 3, 4, 6, 6, 6, 9, 5, 4, 6, 2, 6, 2,
       6, 6, 5, 8, 4, 6, 4, 5, 5, 5, 5, 5, 6, 3, 6, 4, 4, 5, 5, 4, 5, 5,
       4, 6, 6, 7, 6, 4, 7, 7, 5, 4, 7, 4, 9, 6, 4, 4, 7, 4, 3, 4, 5, 3,
       5, 2, 6, 6, 3, 6, 4, 3, 4, 5, 5, 6, 3, 3, 3, 8, 6, 5, 6, 5, 3, 6,
       5, 5, 5, 7, 6, 6, 2, 3, 5, 8, 6, 4, 7, 8, 3, 7, 3, 3, 4, 7, 3, 6,
       5, 6, 4, 7, 6, 7, 7, 6, 6, 5, 3, 4, 8, 5, 2, 4, 3, 6, 4, 7, 4, 3,
       7, 5, 5, 3, 3, 5, 6, 6, 4, 4, 7, 5, 4, 6, 3, 5, 4, 2, 5, 3, 6, 5,
       6, 4, 5, 5, 6, 3, 5, 3, 6, 4, 5, 4, 4, 2, 4,

## `NaN`

In [3]:
# NaN: means not a number
# it indicates missing value

print(np.nan)
print(type(np.nan))
# note the type of NaN is float.

nan
<class 'float'>


In [5]:
# NaN is a trouble maker
np.nan + 12

nan

In [6]:
# np.isnan: to find whether there is NaN
np.isnan([12, 0.9, 5, np.nan])

array([False, False, False,  True])

In [7]:
# np.sum: to find out how many NaNs

np.isnan([12, 0.9, 5, np.nan]).sum()

# True value is equivalent to 1
# False value is equivalent to 0

1

In [43]:
# np.argwhere: to find out the indices of non-zero elements

np.argwhere(np.arange(4).reshape((2,2)))

array([[0, 1],
       [1, 0],
       [1, 1]], dtype=int64)

In [44]:
np.argwhere(np.isnan([12, 0.9, 5, np.nan]))

array([[3]], dtype=int64)

In [21]:
# np.any: returns True if there is any True value in the array

np.any([1, 0, 0, 0])

# this is equivalent to testing
# np.any([True, False, False, False])

True

In [20]:
# we use np.any to determine whether there is any NaN value in the row/column
np.isnan([12, 0.9, 5, np.nan]).any()

True

In [16]:
# np.all: returns True if all the array elements are True
np.all([1, 1, 1])

# this is equivalent to testing
# np.all([True, True, True])

True

In [17]:
np.all([1, 1, 0])

False

In [19]:
# we use np.all to determine whether the entire row/column is empty
np.isnan([np.nan, np.nan, np.nan]).all()

True

# Array indexing & slicing

In [30]:
arr5 = np.arange(30).reshape(5,6)
arr5

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])

In [31]:
arr5[0]
# retrieves the first row, result is still an array, one dimensional.

array([0, 1, 2, 3, 4, 5])

In [32]:
arr5[0:1]
# also retrieves the first row, but result is two dimensional

array([[0, 1, 2, 3, 4, 5]])

In [34]:
arr5[1, 1]
# retrieves the value from second row and second column
# the index should follow the axes 0, 1

7

In [35]:
arr5[1:2, 1:2]
# also retrieves value from the first row, second column, but result is a matrix.

array([[7]])

In [36]:
arr5[:, 1:2]
# retrieves all row values v
# in the second column

array([[ 1],
       [ 7],
       [13],
       [19],
       [25]])

In [45]:
arr5[[1, 3]]

array([[ 6,  7,  8,  9, 10, 11],
       [18, 19, 20, 21, 22, 23]])

In [55]:
arr5[(0, 2, 4),(1, 2, 3)]
# if two sequences are given, the length must match.

array([ 1, 14, 27])

In [58]:
arr5[2] > 15

array([False, False, False, False,  True,  True])

In [59]:
arr5[3, arr5[2] > 15]

array([22, 23])

In [66]:
mask = np.array([1, 1, 0, 0, 1], dtype=np.bool)
mask

array([ True,  True, False, False,  True])

In [67]:
arr5[mask, 5]

array([ 5, 11, 29])

# Array operations

## Basic Operations

In [68]:
arr6 = np.array([[1, 2, 3],[4, 5, 6]])
arr6

array([[1, 2, 3],
       [4, 5, 6]])

In [69]:
arr6 * arr6

array([[ 1,  4,  9],
       [16, 25, 36]])

In [70]:
arr6 - arr6

array([[0, 0, 0],
       [0, 0, 0]])

In [71]:
1 / arr6

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [72]:
arr6 * 0.5

array([[0.5, 1. , 1.5],
       [2. , 2.5, 3. ]])

In [114]:
np.append(arr6, [[-100, -999, 0]], axis=0)
# the value to add must have the same shape if axis is specified.
# otherwise, the array is flattened before use.

array([[   1,    2,    3],
       [   4,    5,    6],
       [-100, -999,    0]])

In [91]:
arr6.tolist()

[[1, 2, 3], [4, 5, 6]]

In [99]:
arr6.astype(np.string_)

array([[b'1', b'2', b'3'],
       [b'4', b'5', b'6']], dtype='|S11')

In [73]:
zero_arr = np.zeros(5, int)
zero_arr

array([0, 0, 0, 0, 0])

In [101]:
zero_arr.fill(4)
zero_arr

array([4, 4, 4, 4, 4])

In [75]:
zero_arr[0] = 5.8
zero_arr   # since the datatype is int, 5.8 will be truncated

array([5, 4, 4, 4, 4])

## Transposing Arrays

In [76]:
arr7 = np.arange(15).reshape((3, 5))
arr7

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [77]:
arr7.T   
# transpose returns a view without copying
# note that transpose does not work on a one-dimensional array

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [78]:
arr7  
# so the array stays unchanged

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [79]:
arr8 = np.random.randn(6, 3)
np.dot(arr8.T, arr8)

array([[ 4.01031118,  0.55498018, -0.91171373],
       [ 0.55498018,  1.56922107,  0.36407164],
       [-0.91171373,  0.36407164,  2.86606973]])

## Mathematical and Statistical Methods

In [80]:
arr9 = np.arange(10)
print(arr9)

[0 1 2 3 4 5 6 7 8 9]


In [81]:
np.sqrt(arr9)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [82]:
np.exp(arr9)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [106]:
x, y = np.random.randn(8), np.random.randn(8)
print(x)
print(y)

[-0.18161764 -0.23743719 -0.85902543 -1.79471315 -0.79147502 -0.38578128
  0.49049107 -0.35733781]
[ 0.75863701  0.3422803   0.37755075 -0.13551252 -1.63023107  0.02406023
  1.18129151 -2.15994325]


In [108]:
np.maximum(x, y)

array([ 0.75863701,  0.3422803 ,  0.37755075, -0.13551252, -0.79147502,
        0.02406023,  1.18129151, -0.35733781])

In [86]:
arr10 = np.arange(15).reshape((3, 5))
arr10

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [90]:
print(arr10.mean())
print(arr10.sum())
print(arr10.std())

print(arr10.mean(axis=1))
# taking the average along axis 1
print(arr10.sum(axis=0))
# taking the sum along axis 0

7.0
105
4.320493798938574
[ 2.  7. 12.]
[15 18 21 24 27]


# File I/O

In [234]:
values1 = np.random.random((10, 5))
np.savetxt('../data_out/nparray.txt', values1)

In [235]:
values2 = np.loadtxt('../data_in/nparray.txt', dtype = str)
print(values2)

[['8.596048874892303626e-01' '4.397795212767580120e-01'
  '5.604446606574813305e-01' '8.174052622969787407e-01'
  '8.900629672033200102e-02']
 ['7.014877271171647832e-01' '3.855721221192063286e-01'
  '1.742254466209747221e-02' '5.096687040757574838e-02'
  '4.350717385994705699e-01']
 ['9.768438671574116716e-01' '9.378330928207601058e-02'
  '5.515195019569686385e-01' '6.431031429474873828e-01'
  '4.402546325769717273e-01']
 ['7.888738215073869409e-01' '7.044575392805060288e-01'
  '7.204583621931595072e-01' '4.967767328238511526e-01'
  '7.335955750517419993e-01']
 ['8.745325095997327214e-01' '5.598994154123709821e-01'
  '6.804146773400364667e-01' '3.684308474776365472e-01'
  '5.911287959179065687e-01']
 ['4.790627436827468166e-01' '4.504165801405808356e-01'
  '2.465452945466365398e-01' '7.866969077463757953e-01'
  '4.405400007284641095e-01']
 ['7.001220508861707437e-01' '8.210630043828290869e-01'
  '3.717355661842847159e-01' '9.525241379773723427e-01'
  '9.754348212777587124e-01']
 ['4.2