### Basic NumPy examples

In [228]:
# improt NumPy, np is the "standard" alias for numpy
# in conventional data analytics and ML code
import numpy as np

In [229]:
# RUN THIS CODE CELL IN ORDER TO USE THE CUSTOM STYLES
# THE ONLY PURPOSE OF THIS IS THE MAKE NUMPY ARRAYS
# VISUALLY MORE PLEASING
from IPython.core.interactiveshell import InteractiveShell

# Custom HTML formatter for NumPy arrays
# you can customize the color's if you wish!
# NOTE: this can be a bit buggy with some 
# more advanced NumPy operations, try removing
# this custom formatting code if problems arise

# NOTE: you can control the amount of decimals by changing 
# precision value below (9 is enough for the NumPy exercises)
def array_to_html(arr, precision=9):
    # Round the numbers to the specified precision and convert to string
    str_arr = np.round(arr, precision).astype(str)
    
    # Find the maximum width (longest string length)
    max_width = max([len(val) for val in str_arr.flatten()])
    
    # Create the HTML table with consistent column width and centered text
    html = "<table style='border: 1px solid black; border-collapse: collapse;'>"
    
    if arr.ndim == 1:
        # Handle 1D array (vector)
        for val in str_arr:
            html += f"<tr><td style='padding: 5px; border: 1px solid black; font-weight: bold; width: {max_width}ch; text-align: center;'>{val}</td></tr>"
    else:
        # Handle 2D array (matrix)
        for row in str_arr:
            html += "<tr>"
            for val in row:
                html += f"<td style='padding: 5px; border: 1px solid black; font-weight: bold; width: {max_width}ch; text-align: center;'>{val}</td>"
            html += "</tr>"
    
    html += "</table>"
    return html

# Register the formatter in IPython
InteractiveShell.instance().display_formatter.formatters['text/html'].for_type(
    np.ndarray, lambda arr: array_to_html(arr)
)

<function __main__.<lambda>(arr)>

### Data generators

In [230]:
# generate a vector (list)
data = np.arange(0, 10)
data

0
0
1
2
3
4
5
6
7
8
9


In [231]:
# we can also define a step size in order to skip numbers
# this example skips 4 numbers every time
# so from 0 -> 20 -> 0, 4, 8, 12, 16
data = np.arange(0, 20, 4)
data

0
0
4
8
12
16


In [232]:
# NOTE! if you print NumPy data with print()
# you will not see any special styles (because printing
# is now done by Python instead of Jupyter)
print(data)

[ 0  4  8 12 16]


### You can convert conventional Python lists or lists-of-lists into NumPy arrays easily

In [233]:
# np.array is also handy if for some reason your
# data still remains in Python format, and your AI algorithm
# provides an error indicating your data is not in NumPy -format => np.array()
# often resolves this problem (but might need more features, like. np.expand_dims())
numbers = [6, 4, 8, 7, 2, 1, 3]
data = np.array(numbers)
data

0
6
4
8
7
2
1
3


In [234]:
# example 2, list of lists
day1 = [-32, -29, -30, -31, -37]
day2 = [-5, -2, 0, 1, -6]
day3 = [-10, -14, -11, -12, -8]

# list of lists (matrix)
temperatures = [day1, day2, day3]

# convert to NumPy -format
data = np.array(temperatures)
data

0,1,2,3,4
-32,-29,-30,-31,-37
-5,-2,0,1,-6
-10,-14,-11,-12,-8


### Some special data generators

In [235]:
# we can generate a collection of zeroes if we want
# this could be used to generate a set of default values
# for a real life dataset => usually 0 => "no"
data = np.zeros(8)
data

0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [236]:
# a matrix version also possible:
data = np.zeros((5, 5))
data

0,1,2,3,4
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0


In [237]:
# we can also change the data type into integer
# astype() is also usable most of the time in pandas
# very handy when the data is in a weird format (from a file etc.)
data = np.zeros((5, 5)).astype(int)
data

0,1,2,3,4
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0


In [238]:
# we can generate a collection of ones too
# this could be used to generate a set of default values
# for a real life dataset => usually 1 => "yes"
data = np.ones((5, 5)).astype(int)
data

0,1,2,3,4
1,1,1,1,1
1,1,1,1,1
1,1,1,1,1
1,1,1,1,1
1,1,1,1,1


### linspace => linearly spaced array

In [239]:
# without NumPy, you'd need a Python list,
# a for loop and division calculations

# linspace is often used in visualizations
# (modules like seaborn and matplotlib)
# in order to automatically accomodate the amount
# of data you wish to visualize
data = np.linspace(0, 10, 15)
data

0
0.0
0.714285714
1.428571429
2.142857143
2.857142857
3.571428571
4.285714286
5.0
5.714285714
6.428571429


### Random numbers in NumPy

In [240]:
# random number generation is often used
# in order to create imaginary test data for other modules
# like pandas and seaborn
# very practical if you need a smaller non-sense dataset
# in order to test your advanced data filtering/removal -algorithms
# before applying them into real datasets (to ensure you don't remove too much data)

# matrix version
data = np.random.rand(5, 5)
data

0,1,2,3,4
0.520476549,0.084057038,0.189120659,0.912397656,0.223770899
0.495633887,0.68228201,0.201941903,0.884550218,0.23596187
0.304143189,0.085316062,0.278886294,0.372819136,0.05135423
0.407491086,0.755848018,0.74213614,0.102875823,0.952038652
0.150630012,0.809016503,0.512847588,0.681982384,0.607298279


In [241]:
# matrix version, values between 1-100 => 10 numbers
data = np.random.randint(1, 100, 10)
data

0
87
15
75
74
72
59
90
27
37
91


In [242]:
# generate random numebrs from standard normal distribution
# value range is commonly between -3 and +3, not values slightly out of bounds
# also possible (meaning extreme values => outliers)
data = np.random.randn(5, 5)
data

0,1,2,3,4
0.546445838,1.333774869,0.280767846,-0.846147143,-0.072314848
-1.527443866,-0.962988327,-1.034132595,1.594215877,0.575003852
0.71017947,1.038849372,0.011066118,-0.382562322,0.255569698
0.865248947,-1.971584784,-1.110622248,-0.256064929,-0.642432914
0.50710508,0.348456882,-1.182023123,-0.53703731,-0.702078141


In [243]:
# in future NumPy -versions the previous number examples
# might be removed, the new recommended way is to 
# use NumPy's random number generator
rng = np.random.default_rng()

# example to replace randn()
data = rng.normal(loc=0.0, scale=1.0, size=(5, 5))
data

0,1,2,3,4
0.768994477,0.606708823,-1.391531552,-0.446178027,1.298748661
1.311724259,0.436073854,-0.238346906,0.987216541,-1.708110763
-1.295438598,1.311950061,-0.729744054,0.361601627,0.514786257
0.153028408,0.827921302,-0.167442116,-1.001857206,-0.134387604
-0.814465873,0.914329678,0.098386019,-0.636234766,-0.658124063


In [244]:
# in future NumPy -versions the previous number examples
# might be removed, the new recommended way is to 
# use NumPy's random number generator
rng = np.random.default_rng()

# example to replace randn()
data = rng.integers(0, 100, size=25).reshape(5, 5)
data

0,1,2,3,4
60,18,15,71,18
95,55,26,75,96
40,40,60,99,98
61,27,64,2,95
78,79,44,19,52


### reshape() -> convert any vector/list into a matrix!

In [245]:
# reshape() is very handy, as long as you have some kind of a vector (list)
# you can reshape it into a matrix easily (no need to remember specialized syntax)

# in this case we have 25 values (0-24) => 5 x 5 matrix (25 values)
# the values have to match, otherwise you'll get an error
data = np.arange(25).reshape(5, 5)
data

0,1,2,3,4
0,1,2,3,4
5,6,7,8,9
10,11,12,13,14
15,16,17,18,19
20,21,22,23,24


In [246]:
# sometimes in ML etc. code, you might need to check
# your current data format in order to adapt it to your algorithm
data.shape

(5, 5)

### NumPy functions and tools