## Numpy

In [1]:
import numpy as np

## Datatypes and Attributes

In [2]:
# Numpy's main datatype is ndarrray (for n-dimensional array)
a1 = np.array([1, 2, 3])
a1

array([1, 2, 3])

In [3]:
type(a1)

numpy.ndarray

In [4]:
# We can also create matrices!
matrix = np.array([[1.0, 2.0, 3.0], [4, 5, 6]])
matrix

array([[1., 2., 3.],
       [4., 5., 6.]])

In [5]:
# But it has the same type as a 1-dimensional array, that is, ndarray
type(matrix)

numpy.ndarray

In [6]:
# We can inspect teh shape of a ndarray
matrix.shape

(2, 3)

In [7]:
# And the number of dimensions
matrix.ndim

2

In [8]:
# ndarrays also have dtype
matrix.dtype

dtype('float64')

In [9]:
# If we wnat to know the number of values stored in a ndarray
matrix.size

6

In [10]:
# We can also create a dataframe from numpy arrays
import pandas as pd
dataframe = pd.DataFrame(matrix)
dataframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,5.0,6.0


## Creating NumPy Arrays

In [11]:
# As we saw, we can create arrays the simple way
simple = np.array([1,2,3])
simple

array([1, 2, 3])

In [12]:
# We can create an array of any shape filled with ones
ones = np.ones(shape=(2,3))
ones

array([[1., 1., 1.],
       [1., 1., 1.]])

In [13]:
# Or an array filled with zeros
zeros = np.zeros(shape=(2,3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [14]:
# Or even a array spannign a range of numbers
arange = np.arange(0, 5, 1)
arange

array([0, 1, 2, 3, 4])

In [15]:
# An array with random integers
randint = np.random.randint(0, 10, size=(3,5))
randint

array([[4, 2, 2, 3, 8],
       [5, 2, 6, 2, 2],
       [7, 6, 3, 1, 6]])

In [16]:
# Random floats in [0,1) (uniform distribution)
random = np.random.random(size=(3,5))
random

array([[0.8485642 , 0.07406709, 0.73684094, 0.40965807, 0.20430443],
       [0.26580883, 0.93832979, 0.06288894, 0.49054212, 0.15554833],
       [0.48625518, 0.58418192, 0.36909467, 0.86783977, 0.50309499]])

In [17]:
rand = np.random.rand(3,5)
rand

array([[0.54798219, 0.68616942, 0.51119099, 0.93516819, 0.19654568],
       [0.00599738, 0.52415065, 0.90803567, 0.68619776, 0.75281401],
       [0.40642398, 0.69100624, 0.92884388, 0.8652275 , 0.02906621]])

In [18]:
# Array with values from a normal distribution
norm = np.random.normal(size=(10))
norm

array([-0.4083176 , -0.54316159, -0.71556249,  0.31414142,  0.2667225 ,
        0.44369281, -0.22498166, -0.67083916, -1.86538273, -0.38533968])

In [19]:
# Random numbers are generated from a random seed, if we set a fixed seed we always get the same number
# Run this cell multiple times an you will see the value doesn't change
np.random.seed(0)
np.random.normal()

1.764052345967664

## Viewing Arrays and Matriz

In [20]:
# We can index numpy array in any dimension
print(matrix[0], matrix[1])

[1. 2. 3.] [4. 5. 6.]


In [21]:
# We can also use slicing with numpy arrays
# Even when there are multiple dimensions
# This codes returns the first two elements of the first two rows of matrix
matrix[:2, :2]

array([[1., 2.],
       [4., 5.]])

## Manipulating Arrays

### Arithmethic

In [22]:
# We can do element wise addition by using +
ones + ones

array([[2., 2., 2.],
       [2., 2., 2.]])

In [23]:
# Of course, subtraction works too
ones - ones

array([[0., 0., 0.],
       [0., 0., 0.]])

In [24]:
# Element wise and scalar multiplication also work
x = np.random.randint(0,10, size=(2,3))
x * ones, 3 * ones

(array([[3., 3., 7.],
        [9., 3., 5.]]),
 array([[3., 3., 3.],
        [3., 3., 3.]]))

In [25]:
# Element wise division
ones / x

array([[0.33333333, 0.33333333, 0.14285714],
       [0.11111111, 0.33333333, 0.2       ]])

In [26]:
# Exponentiation
x ** 2

array([[ 9,  9, 49],
       [81,  9, 25]])

In [27]:
# And other operators too
x % 2, np.log(x)

(array([[1, 1, 1],
        [1, 1, 1]]),
 array([[1.09861229, 1.09861229, 1.94591015],
        [2.19722458, 1.09861229, 1.60943791]]))

### Matrix Multiplication

In [28]:
# Python has a matrix multiplciation operando, @
# We need to tranpose one of the matrices to satisfy the multiplation constraints
x @ ones.transpose(), ones @ x.transpose()

(array([[13., 13.],
        [17., 17.]]),
 array([[13., 17.],
        [13., 17.]]))

### Aggregation

In [29]:
# Of course, numpy offers aggregation functions, for example sum
x = np.random.randint(10, size=(1,5))
x, np.sum(x)

(array([[2, 4, 7, 6, 8]]), 27)

In [30]:
massive_array = np.random.random(10000)
massive_array[:50]

array([0.96366276, 0.38344152, 0.79172504, 0.52889492, 0.56804456,
       0.92559664, 0.07103606, 0.0871293 , 0.0202184 , 0.83261985,
       0.77815675, 0.87001215, 0.97861834, 0.79915856, 0.46147936,
       0.78052918, 0.11827443, 0.63992102, 0.14335329, 0.94466892,
       0.52184832, 0.41466194, 0.26455561, 0.77423369, 0.45615033,
       0.56843395, 0.0187898 , 0.6176355 , 0.61209572, 0.616934  ,
       0.94374808, 0.6818203 , 0.3595079 , 0.43703195, 0.6976312 ,
       0.06022547, 0.66676672, 0.67063787, 0.21038256, 0.1289263 ,
       0.31542835, 0.36371077, 0.57019677, 0.43860151, 0.98837384,
       0.10204481, 0.20887676, 0.16130952, 0.65310833, 0.2532916 ])

In [31]:
# Now we can compare pute python's sum against numpy's sum
# We see that numopy is way faster
%timeit np.sum(massive_array)
%timeit sum(massive_array)

5.09 µs ± 195 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.09 ms ± 16.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [32]:
# Other aggregation functions
# max, min, mean, standard deviation and variance
np.max(massive_array), np.min(massive_array), np.mean(massive_array), np.std(massive_array), np.var(massive_array)

(0.9999779517807228,
 7.2449638492178e-05,
 0.4963001728694622,
 0.2897005362063954,
 0.083926400678273)