# Learning the NumPy Library

In [2]:
import numpy as np
np.__version__

'1.23.3'

## Arrays

In [16]:
# integer array
print(np.array([2, 4, 6, 8, 10]))

# upcast array (to float32)
print(np.array([2, np.sqrt(2), 17, 14.9]))

# arrays have a fixed type, inferred from values or set explicityly
print(np.array([2, np.sqrt(2), 17, 14.9], dtype='float64'))

# multidimensional
print(np.array([range(i, i + 4) for i in [1, 2, 3]]))

[ 2  4  6  8 10]
[ 2.          1.41421356 17.         14.9       ]
[ 2.          1.41421356 17.         14.9       ]
[[1 2 3 4]
 [2 3 4 5]
 [3 4 5 6]]


## Built-in Arrays

In [38]:
print("array of 0's, length 10:", np.zeros(10, dtype=int), end="\n\n")
print("2x3 array of 1's:", np.ones((2, 3), dtype=float), end="\n\n")
print("array of values [0, 30), multiples of 3:", np.arange(0, 30, 3), end="\n\n")
print("array of four values [0, 2], even spaced:", np.linspace(0, 2, 4), end="\n\n")
print("random 3x3 array, values [0, 1):", np.random.random((3, 3)), end="\n\n")
print("random 3x3 array of normally distributed values:", np.random.normal(0, 1, (3, 3)), end="\n\n")
print("random 3x3 array of integer values [0, 10):", np.random.randint(0, 10, (3,3)), end="\n\n")
print("4x4 identity matrix:", np.eye(4), end="\n\n")
print("array from [0, 10):", np.arange(10))

array of 0's, length 10: [0 0 0 0 0 0 0 0 0 0]

2x3 array of 1's: [[1. 1. 1.]
 [1. 1. 1.]]

array of values [0, 30), multiples of 3: [ 0  3  6  9 12 15 18 21 24 27]

array of four values [0, 2], even spaced: [0.         0.66666667 1.33333333 2.        ]

random 3x3 array, values [0, 1): [[0.70388858 0.10022689 0.91948261]
 [0.7142413  0.99884701 0.1494483 ]
 [0.86812606 0.16249293 0.61555956]]

random 3x3 array of normally distributed values: [[ 0.05564534  0.29823817  1.3263859 ]
 [-0.69456786 -0.14963454 -0.43515355]
 [ 1.84926373  0.67229476  0.40746184]]

random 3x3 array of integer values [0, 10): [[9 7 7]
 [5 1 2]
 [2 8 1]]

4x4 identity matrix: [[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]

array from [0, 10): [0 1 2 3 4 5 6 7 8 9]


## Array Attributes

In [36]:
np.random.seed(0) # seed for reproducibility

x1 = np.random.randint(10, size=5)
x2 = np.random.randint(10, size=(5, 5))
x3 = np.random.randint(10, size=(5, 5, 5))

# slicing is [start:end:increment]
print("access index 0 of x1:", x1[0])
print("access last value of x1:", x1[-1])
print("access [0][3] value of x2:", x2[0, 3])
print("access slice [2][3:5] of x2:", x2[2, 3:5])

print()

print("x3 dimension:", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size:", x3.size)
print("x3 dtype:", x3.dtype)
print(f"{x3.nbytes} x {x3.size} = {x3.nbytes}")


## TODO
# we can get copies of arrays with .copy()
# we can reshape arrays with .reshape()
# we can concatenate arrays with np.concatenate()
# we can stack arrays with np.hstack() and np.vstack()
# we can split arrays with np.split(), np.hsplit(), and np.vsplit()

access index 0 of x1: 5
access last value of x1: 7
access [0][3] value of x2: 2
access slice [2][3:5] of x2: [8 1]

x3 dimension: 3
x3 shape: (5, 5, 5)
x3 size: 125
x3 dtype: int64
1000 x 125 = 1000


## UFuncs (Universal Functions)

In [34]:
## NumPy implements ufuncs, which are a faster, vectorized verson of standard Python operations

x = np.array([0, 1, 2, 3])
y = np.array([100, 90, 80, 70])

print("add:", x + y)   # same thing as np.add(x, y)
print("subtract:", y - x)   # same thing as np.subtract(x, y)
print("negative:", -y)   # unary operation: same thing as np.negative(y)
print("multiply:", x * y)   # same thing as np.multiply(x, y)
print("divide:", x / y)   # same thing as np.divide(x, y)
print("floor division:", x // y)   # same thing as np.floor_divide(x, y)
print("exponent:", y ** x)   # same thing as np.power(y, x). related: np.exp(x), np.exp2(x)
print("mod:", x % y)   # same thing as np.mod(x, y)
print("absolute value:", abs(-x))   # same thing as np.abs(-x) or np.absolute(-x)

## NumPy also has functions for np.sin(), np.cos(), np.tan()
##     also np.arcsin(), np.arccos(), np.arctan()
## NumPy also has functions for logs: np.log(x), np.log2(x), np.log10(x))

print("multiplication, reduced:", np.multiply.reduce(x))   # multiplies all elements of x together, until it is one value
print("addition, accumulated:", np.add.accumulate(y))   # adds all elements together, but holds onto previous values

# we can also use boolean operations
print("x < y:", x < y)   # np.less(), np.less_equal()
print("x >= y:", x >= y)   # np.greater_equal(), np.greater
print("x != y:", x != y)   # np.not_equal()
print("x * 2 == y / 100:", (x * 2) == (y / 100))   #np.equal()

# counting boolean arrays
print("how many x < y:", np.sum(x < y))   # same as np.count_nonzero(x<y)

## more operator ufuncs
# &, np.bitwise_and()
# ^, np.bitwise_xor()
# ~, np.bitwise_not()
# |, np.bitwise_or

## masking can also be helpful
print("values in y, >= 100:", y[y>=100])

add: [100  91  82  73]
subtract: [100  89  78  67]
negative: [-100  -90  -80  -70]
multiply: [  0  90 160 210]
divide: [0.         0.01111111 0.025      0.04285714]
floor division: [0 0 0 0]
exponent: [     1     90   6400 343000]
mod: [0 1 2 3]
absolute value: [0 1 2 3]
multiplication, reduced: 0
addition, accumulated: [100 190 270 340]
x < y: [ True  True  True  True]
x >= y: [False False False False]
x != y: [ True  True  True  True]
x * 2 == y / 100: [False False False False]
how many x < y: 4
values in y, >= 100: [100]


In [None]:
## Sometimes, NP operations are not aliased!
# np.sum() is not the same as sum()
# np.max() is not the same as max()
# np.min() is not the same as min()
## these operations can be done on the object, for example object.min() versus np.min(object)

## for multi-dimensional arrays, we can specify columns with axis=[column number]

## function --> ## NaN safe version --> definition
# np.sum	np.nansum	Compute sum of elements
# np.prod	np.nanprod	Compute product of elements
# np.mean	np.nanmean	Compute mean of elements
# np.std	np.nanstd	Compute standard deviation
# np.var	np.nanvar	Compute variance
# np.min	np.nanmin	Find minimum value
# np.max	np.nanmax	Find maximum value
# np.argmin	np.nanargmin	Find index of minimum value
# np.argmax	np.nanargmax	Find index of maximum value
# np.median	np.nanmedian	Compute median of elements
# np.percentile	np.nanpercentile	Compute rank-based statistics of elements
# np.any	N/A	Evaluate whether any elements are true
# np.all	N/A	Evaluate whether all elements are true

## Broadcasting

In [24]:
## broadcasting changes the shape of data to fit certain operations

## for example:
a = np.ones(3)
b = np.ones(3)[:, np.newaxis]

print("a:", a)
print("b:", b)
print("a+b:", a + b)

## rules:
# Rule 1: If the two arrays differ in their number of dimensions,
#         the shape of the one with fewer dimensions is padded with ones on its leading (left) side.
# Rule 2: If the shape of the two arrays does not match in any dimension,
#         the array with shape equal to 1 in that dimension is stretched to match the other shape.
# Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

a: [1. 1. 1.]
b: [[1.]
 [1.]
 [1.]]
a+b: [[2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]]


## Fancy Indexing

In [39]:
a = np.array([17, 21, 23])
print("regular indexing:", a[0], a[2])
print("fancy indexing:", a[[0, 2]])
a[[1, 2]] = 23
print("fancy indexing, modification:", a)

regular indexing: 17 23
fancy indexing: [17 23]
fancy indexing, modification: [17 23 23]


## Sorting

In [44]:
## NP has built-in functions for sorting
# np.sort() has NlogN time efficiency (quicksort)

x = np.array([23, 17, 400, 57])
print('quicksort x:', np.sort(x))   # to sort in place: x.sort()
print('indices of sorted:', np.argsort(x))
# np.sort() can also be sued to sort each column of a n-dimensional array

print('partitioning sort:', np.partition(x, 1))   # only values to the left of the partition are sorted

quicksort x: [ 17  23  57 400]
indices of sorted: [1 0 3 2]
partitioning sort: [ 17  23 400  57]


## Structured Arrays

In [49]:
## a structured array is a multidimensional array that is composed of datatypes, organized as fields

np.dtype([('name', 'S10'), ('major', 'S10'), ('year', 'i4')])

dtype([('name', 'S10'), ('major', 'S10'), ('year', '<i4')])