# Numpy

The structure of this notebook is inspired by w3schools: https://www.w3schools.com/python/numpy/

NumPy = Numerical Python

What is numpy?
- A library for arrays
- Good for linear algebra
- Fast computation

Why use numpy?
- A lot faster than lists
- Often easier to use
- Great for data science

Code can be found here:
https://github.com/numpy/numpy


## Installation

In [1]:
!pip install numpy



## Importing numpy

In [2]:
import numpy as np

# Arrays

## Creating arrays from other data types

In [3]:
# Python list
dat = [1 ,3, 4, 5]
print(type(dat))
print(dat)

<class 'list'>
[1, 3, 4, 5]


In [4]:
# NumPy array from lists
dat = np.array([1 ,3, 4, 5])
print(type(dat))
print(dat)

<class 'numpy.ndarray'>
[1 3 4 5]


In [5]:
# NumPy array from tuples
dat = np.array((1 ,3, 4, 5))
print(type(dat))
print(dat)

<class 'numpy.ndarray'>
[1 3 4 5]


## Creating multi dimensional arrays

In [6]:
# 0 dimensions
dat = np.array(1)
print(dat.ndim)
print(dat.shape)

0
()


In [7]:
# 1 dimension
dat = np.array([1, 3, 4])
print(dat.ndim)
print(dat.shape)

1
(3,)


In [8]:
# 2 dimensions
dat = np.array([[1, 3, 4], [10, 3, 4]])
print(dat.ndim)
print(dat.shape)

2
(2, 3)


In [9]:
# 3 dimensions
dat = np.array([[[1, 3, 4], [10, 3, 4], [1, 3, 4], [10, 3, 4]]])
print(dat.ndim)
print(dat.shape)

3
(1, 4, 3)


# New arrays

In [10]:
dat = np.ones((3, 4))
print(dat)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [11]:
dat = np.zeros((3, 4), dtype=bool)
print(dat)

[[False False False False]
 [False False False False]
 [False False False False]]


In [12]:
dat = np.full((2, 2), 99)
print(dat)

[[99 99]
 [99 99]]


In [13]:
dat = np.arange(6)
print(dat)

[0 1 2 3 4 5]


# Indexing

## Simple indexing

In [14]:
# 1 d
dat = np.array([1, 2, 3, 4])
print(dat[0])

1


In [15]:
# 2 d
dat = np.array([[1, 3, 4], [10, 3, 5]])
print(dat[0, 0])
print(dat[1, 2])

1
5


## Negative indexing

In [16]:
# 1 d
dat = np.array([1, 2, 3, 4])
print(dat[-1])
print(dat[-2])

4
3


In [17]:
# 2 d
dat = np.array([[1, 3, 4], [10, 3, 5]])
print(dat[0, -1])
print(dat[-1, 0])

4
10


## Slicing

In [18]:
# 1 d
dat = np.array([1, 2, 3, 4, 5, 6])
print(dat[1:4])
print(dat[1:])
print(dat[:4])
print(dat[-3:])
print(dat[2:-2])
print(dat[-4:-2])
print(dat[:])

[2 3 4]
[2 3 4 5 6]
[1 2 3 4]
[4 5 6]
[3 4]
[3 4]
[1 2 3 4 5 6]


In [19]:
# 2 d
dat = np.array([[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6]])
print(dat[1:4, :])
print(dat[-1, 1:4])
print(dat[:, :-3])

[[1 2 3 4 5 6]]
[2 3 4]
[[1 2 3]
 [1 2 3]]


## Steps

In [20]:
# 1 d
dat = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
print(dat[0:-1:1])
print(dat[0:-1:2])
print(dat[0:-1:3])
print(dat[::1])

[1 2 3 4 5 6 7 8]
[1 3 5 7]
[1 4 7]
[1 2 3 4 5 6 7 8 9]


# Data Types

Common types in Data Science:
- integer (int)
- float
- boolean
- unsigned integer (uint)
- strings


## Getting started

In [21]:
dat = np.array([1, 2, 3])
dat.dtype

dtype('int32')

In [22]:
dat = np.array([1.3, 2.2, 3.4])
dat.dtype

dtype('float64')

In [23]:
dat = np.array([1., 2., 3.])
dat.dtype

dtype('float64')

## Setting datatypes

In [24]:
dat = np.array([1, 2, 3], dtype=float)
dat.dtype

dtype('float64')

In [25]:
dat = np.array([1, 2, 3], dtype=np.float16)
dat.dtype

dtype('float16')

In [26]:
dat = np.array([1, 2, 3], dtype="uint")
dat.dtype

dtype('uint32')

In [27]:
dat = np.array([-1, 2, 3], dtype="uint")
print(dat.dtype)
print(dat)

uint32
[4294967295          2          3]


In [28]:
dat = np.array(['a', 2, 3], dtype=str)
print(dat.dtype)
print(dat)

<U1
['a' '2' '3']


In [29]:
dat = np.array(['a', 2, 3], dtype=float)
print(dat.dtype)
print(dat)

ValueError: could not convert string to float: 'a'

# Copy and view

Be careful when changing data

In [30]:
dat = np.array([0, 1, 2, 3])
dat2 = dat
dat2[1] = 9

print(dat)
print(dat2)

[0 9 2 3]
[0 9 2 3]


In [31]:
dat = np.array([0, 1, 2, 3])
dat2 = dat[1:]

print(dat)
print(dat2)

dat2[1] = 9

print(dat)
print(dat2)

[0 1 2 3]
[1 2 3]
[0 1 9 3]
[1 9 3]


In [32]:
dat = np.array([0, 1, 2, 3])
dat2 = dat.copy()
dat2[1] = 9

print(dat)
print(dat2)

[0 1 2 3]
[0 9 2 3]


In [33]:
dat = np.array([0, 1, 2, 3])
dat2 = dat.view()
dat2[1] = 9

print(dat)
print(dat2)

[0 9 2 3]
[0 9 2 3]


In [34]:
def add_value(arr, val):  # Makes a copy
    arr = arr + val
    return arr

dat = np.array([0, 1, 2, 3])
dat2 = add_value(arr=dat, val=3)
print(dat)
print(dat2)

[0 1 2 3]
[3 4 5 6]


In [35]:
def add_value(arr, val):  # Does not make copy
    arr += val
    return arr

dat = np.array([0, 1, 2, 3])
dat2 = add_value(arr=dat, val=3)
print(dat)
print(dat2)

[3 4 5 6]
[3 4 5 6]


# Reshaping

In [36]:
dat = np.array([1, 2, 3, 4, 5, 6])

dat = dat.reshape((2, 3))
print(dat)

dat = dat.reshape((6, ))
print(dat)

[[1 2 3]
 [4 5 6]]
[1 2 3 4 5 6]


In [37]:
dat = np.array([1, 2, 3, 4, 5, 6])
dat = dat.reshape((2, -1))
print(dat)

[[1 2 3]
 [4 5 6]]


In [38]:
dat = np.array([1, 2, 3, 4, 5, 6])
dat = dat.reshape((2, 2))

ValueError: cannot reshape array of size 6 into shape (2,2)

In [None]:
dat = np.array([[1, 2, 3], [4, 5, 6]])
dat = dat.flatten()
print(dat)

# Iterating arrays

In [39]:
dat = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

for i in range(3):
    print(f"i={i}:", dat[i])

i=0: [1 2 3]
i=1: [4 5 6]
i=2: [7 8 9]


In [40]:
dat = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

for i in range(3):
    for j in range(3):
        print('i, j:', dat[i, j])

i, j: 1
i, j: 2
i, j: 3
i, j: 4
i, j: 5
i, j: 6
i, j: 7
i, j: 8
i, j: 9


In [41]:
dat = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

for dat_i in dat:
    print(dat_i)

[1 2 3]
[4 5 6]
[7 8 9]


In [42]:
dat = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

for i, dat_i in enumerate(dat):
    print(f"i={i}:", dat_i)

i=0: [1 2 3]
i=1: [4 5 6]
i=2: [7 8 9]


In [43]:
dat = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

for i, dat_i in enumerate(dat.flat[::2]):
    print(f"i={i}:", dat_i)

i=0: 1
i=1: 3
i=2: 5
i=3: 7
i=4: 9


# Combine multiple arrays

In [44]:
dat1 = np.array([1, 2, 3])
dat2 = np.array([4, 5, 6])
dat = np.concatenate([dat1, dat2])
print(dat)

[1 2 3 4 5 6]


In [45]:
dat1 = np.array([1, 2, 3])
dat2 = np.array([4, 5, 6])
dat = np.hstack([dat1, dat2])
print(dat)

[1 2 3 4 5 6]


In [46]:
dat1 = np.array([1, 2, 3])
dat2 = np.array([4, 5, 6])
dat = np.vstack([dat1, dat2])
print(dat)

[[1 2 3]
 [4 5 6]]


In [47]:
dat1 = np.array([1, 2, 3])
dat2 = np.array([4, 5, 6])
dat3 = np.vstack([dat1, dat2])
dat = np.hstack([dat3, dat3])
print(dat)

[[1 2 3 1 2 3]
 [4 5 6 4 5 6]]


In [48]:
dat1 = np.array([1, 2, 3])
dat2 = np.array([4, 5, 6])
dat = np.stack([dat1, dat2], axis=0)
print(dat)

[[1 2 3]
 [4 5 6]]


In [49]:
dat1 = np.array([1, 2, 3])
dat2 = np.array([4, 5, 6])
dat = np.stack([dat1, dat2], axis=1)
print(dat)

[[1 4]
 [2 5]
 [3 6]]


# Splitting arrays

In [50]:
dat = np.array([[1, 2, 3], [4, 5, 6]])
dat1, dat2 = dat

print(dat1)

[1 2 3]


In [51]:
dat = np.array([[1, 2, 3], [4, 5, 6]])
dat1, dat2 = np.array_split(dat, 2)
print(dat1, dat2)
print(dat1.shape)

[[1 2 3]] [[4 5 6]]
(1, 3)


In [52]:
dat = np.array([[1, 2, 3], [4, 5, 6]])
dat1, dat2, dat3 = np.array_split(dat, 3)
print(dat1, dat2, dat3)
print(dat1.shape)

[[1 2 3]] [[4 5 6]] []
(1, 3)


In [53]:
dat = np.array([[1, 2, 3], [4, 5, 6]])
dat1, dat2 = np.vsplit(dat, 2)
print(dat1, dat2)
print(dat1.shape)

[[1 2 3]] [[4 5 6]]
(1, 3)


# Searching

In [54]:
dat = np.array([1, 2, 3, 4, 5, 6])
np.where(dat > 3)

(array([3, 4, 5], dtype=int64),)

In [55]:
dat = np.array([1, 2, 3, 4, 5, 6])
dat[np.argmax(dat)]

6

# Sorting

In [56]:
dat = np.array([2, 123, 5, 12, 4, ])
print(np.sort(dat))

[  2   4   5  12 123]


In [57]:
dat = np.array([2, 123, 5, 12, 4, ])
print(np.argsort(dat))

[0 4 2 3 1]


In [58]:
dat = np.array(["as", "asd", "%%%", "12", "b", ])
print(np.sort(dat))

['%%%' '12' 'as' 'asd' 'b']


In [59]:
dat = np.array([[2, 123, 5], [12, 4, 8]])
print(np.sort(dat, axis=1))

[[  2   5 123]
 [  4   8  12]]


In [60]:
dat = np.array([[2, 123, 5], [12, 4, 8]])
print(np.sort(dat, axis=0))

[[  2   4   5]
 [ 12 123   8]]


# Filtering and masking

In [61]:
dat = np.array([2, 123, 5, 12, 4])
idxs = np.where(dat > 10)[0]
print(dat[idxs])

[123  12]


In [62]:
dat = np.array([2, 123, 5, 12, 4])
print(dat[dat > 10])

[123  12]


# Random

In [63]:
print(np.random.randint(0, 100))
print(np.random.randint(0, 100))

63
15


In [64]:
print(np.random.normal(0, 1, size=(3, 3)))

[[-0.8412727  -1.67004094 -2.95747518]
 [ 1.208871    0.42530846  0.80571787]
 [-1.2465848  -0.02968269  1.01189943]]


# Computation

In [65]:
dat = np.ones(3) + np.ones(3)
print(dat)

[2. 2. 2.]


In [66]:
dat = np.arange(5) * np.arange(5)
print(dat)

[ 0  1  4  9 16]


In [67]:
dat = np.arange(5)**2
print(dat)

[ 0  1  4  9 16]


In [68]:
dat = np.arange(5)**2 - np.arange(5)
print(dat)

[ 0  0  2  6 12]


In [69]:
dat = np.ones(5)
print(dat)

dat2 = np.cumsum(dat)
print(dat2)

[1. 1. 1. 1. 1.]
[1. 2. 3. 4. 5.]


In [70]:
dat = np.arange(5) @ np.ones(5)
print(dat)

10.0


In [71]:
dat = np.dot(np.arange(5), np.ones(5))
print(dat)

10.0
