### Basic NumPy examples

In [190]:
# improt NumPy, np is the "standard" alias for numpy
# in conventional data analytics and ML code
import numpy as np

In [191]:
# RUN THIS CODE CELL IN ORDER TO USE THE CUSTOM STYLES
# THE ONLY PURPOSE OF THIS IS THE MAKE NUMPY ARRAYS
# VISUALLY MORE PLEASING
from IPython.core.interactiveshell import InteractiveShell

# Custom HTML formatter for NumPy arrays
# you can customize the color's if you wish!
# NOTE: this can be a bit buggy with some 
# more advanced NumPy operations, try removing
# this custom formatting code if problems arise

# NOTE: you can control the amount of decimals by changing 
# precision value below (9 is enough for the NumPy exercises)
def array_to_html(arr, precision=9):
    # Round the numbers to the specified precision and convert to string
    str_arr = np.round(arr, precision).astype(str)
    
    # Find the maximum width (longest string length)
    max_width = max([len(val) for val in str_arr.flatten()])
    
    # Create the HTML table with consistent column width and centered text
    html = "<table style='border: 1px solid black; border-collapse: collapse;'>"
    
    if arr.ndim == 1:
        # Handle 1D array (vector)
        for val in str_arr:
            html += f"<tr><td style='padding: 5px; border: 1px solid black; font-weight: bold; width: {max_width}ch; text-align: center;'>{val}</td></tr>"
    else:
        # Handle 2D array (matrix)
        for row in str_arr:
            html += "<tr>"
            for val in row:
                html += f"<td style='padding: 5px; border: 1px solid black; font-weight: bold; width: {max_width}ch; text-align: center;'>{val}</td>"
            html += "</tr>"
    
    html += "</table>"
    return html

# Register the formatter in IPython
InteractiveShell.instance().display_formatter.formatters['text/html'].for_type(
    np.ndarray, lambda arr: array_to_html(arr)
)

<function __main__.<lambda>(arr)>

### Data generators

In [192]:
# generate a vector (list)
data = np.arange(0, 10)
data

0
0
1
2
3
4
5
6
7
8
9


In [193]:
# we can also define a step size in order to skip numbers
# this example skips 4 numbers every time
# so from 0 -> 20 -> 0, 4, 8, 12, 16
data = np.arange(0, 20, 4)
data

0
0
4
8
12
16


In [194]:
# NOTE! if you print NumPy data with print()
# you will not see any special styles (because printing
# is now done by Python instead of Jupyter)
print(data)

[ 0  4  8 12 16]


### You can convert conventional Python lists or lists-of-lists into NumPy arrays easily

In [195]:
# np.array is also handy if for some reason your
# data still remains in Python format, and your AI algorithm
# provides an error indicating your data is not in NumPy -format => np.array()
# often resolves this problem (but might need more features, like. np.expand_dims())
numbers = [6, 4, 8, 7, 2, 1, 3]
data = np.array(numbers)
data

0
6
4
8
7
2
1
3


In [196]:
# example 2, list of lists
day1 = [-32, -29, -30, -31, -37]
day2 = [-5, -2, 0, 1, -6]
day3 = [-10, -14, -11, -12, -8]

# list of lists (matrix)
temperatures = [day1, day2, day3]

# convert to NumPy -format
data = np.array(temperatures)
data

0,1,2,3,4
-32,-29,-30,-31,-37
-5,-2,0,1,-6
-10,-14,-11,-12,-8


### Some special data generators

In [197]:
# we can generate a collection of zeroes if we want
# this could be used to generate a set of default values
# for a real life dataset => usually 0 => "no"
data = np.zeros(8)
data

0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [198]:
# a matrix version also possible:
data = np.zeros((5, 5))
data

0,1,2,3,4
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0


In [199]:
# we can also change the data type into integer
# astype() is also usable most of the time in pandas
# very handy when the data is in a weird format (from a file etc.)
data = np.zeros((5, 5)).astype(int)
data

0,1,2,3,4
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0


In [200]:
# we can generate a collection of ones too
# this could be used to generate a set of default values
# for a real life dataset => usually 1 => "yes"
data = np.ones((5, 5)).astype(int)
data

0,1,2,3,4
1,1,1,1,1
1,1,1,1,1
1,1,1,1,1
1,1,1,1,1
1,1,1,1,1


### linspace => linearly spaced array

In [201]:
# without NumPy, you'd need a Python list,
# a for loop and division calculations

# linspace is often used in visualizations
# (modules like seaborn and matplotlib)
# in order to automatically accomodate the amount
# of data you wish to visualize
data = np.linspace(0, 10, 15)
data

0
0.0
0.714285714
1.428571429
2.142857143
2.857142857
3.571428571
4.285714286
5.0
5.714285714
6.428571429


### Random numbers in NumPy

In [202]:
# random number generation is often used
# in order to create imaginary test data for other modules
# like pandas and seaborn
# very practical if you need a smaller non-sense dataset
# in order to test your advanced data filtering/removal -algorithms
# before applying them into real datasets (to ensure you don't remove too much data)

# matrix version
data = np.random.rand(5, 5)
data

0,1,2,3,4
0.636623801,0.078422563,0.058464432,0.475157045,0.387765413
0.746132808,0.865157046,0.416517948,0.494945662,0.195490201
0.543749997,0.102565897,0.850974828,0.236109448,0.601720799
0.961142697,0.259586464,0.184277179,0.698849457,0.56560275
0.376177965,0.121760655,0.234359086,0.138234643,0.029925615


In [203]:
# matrix version, values between 1-100 => 10 numbers
data = np.random.randint(1, 100, 10)
data

0
22
40
5
85
50
8
86
60
13
51


In [204]:
# generate random numebrs from standard normal distribution
# value range is commonly between -3 and +3, not values slightly out of bounds
# also possible (meaning extreme values => outliers)
data = np.random.randn(5, 5)
data

0,1,2,3,4
1.112149169,-0.570950366,2.63137453,2.567553518,0.70397037
0.771463026,-0.949742942,-1.540937095,0.722596535,0.173449454
-1.420320149,0.016841461,-0.245905446,-0.667663382,-2.308389901
0.515215983,-1.055199229,-0.150850006,0.040161574,0.941590661
-0.728466558,-0.420967415,0.989113736,-0.606996109,1.371331481


In [205]:
# in future NumPy -versions the previous number examples
# might be removed, the new recommended way is to 
# use NumPy's random number generator
rng = np.random.default_rng()

# example to replace randn()
data = rng.normal(loc=0.0, scale=1.0, size=(5, 5))
data

0,1,2,3,4
-0.301449513,-1.268005622,1.402905594,0.976615949,-1.198720131
1.054003976,-0.459961881,1.065994446,-2.266943336,-0.075566484
1.622354366,-0.100318673,-0.254209922,0.017134558,0.0466379
0.142968533,0.576545186,0.564555522,0.733514945,-0.376949022
0.625495392,0.925084253,0.51513767,-0.850622281,3.014110772


In [206]:
# in future NumPy -versions the previous number examples
# might be removed, the new recommended way is to 
# use NumPy's random number generator
rng = np.random.default_rng()

# example to replace randn()
data = rng.integers(0, 100, size=25).reshape(5, 5)
data

0,1,2,3,4
70,94,6,62,52
19,94,5,34,41
42,96,90,34,58
19,6,71,49,95
98,92,20,71,82
