### Basic NumPy examples

In [409]:
# improt NumPy, np is the "standard" alias for numpy
# in conventional data analytics and ML code
import numpy as np

In [410]:
# RUN THIS CODE CELL IN ORDER TO USE THE CUSTOM STYLES
# THE ONLY PURPOSE OF THIS IS THE MAKE NUMPY ARRAYS
# VISUALLY MORE PLEASING
from IPython.core.interactiveshell import InteractiveShell

# Custom HTML formatter for NumPy arrays
# you can customize the color's if you wish!
# NOTE: this can be a bit buggy with some 
# more advanced NumPy operations, try removing
# this custom formatting code if problems arise

# NOTE: you can control the amount of decimals by changing 
# precision value below (9 is enough for the NumPy exercises)
def array_to_html(arr, precision=9):
    # Round the numbers to the specified precision and convert to string
    str_arr = np.round(arr, precision).astype(str)
    
    # Find the maximum width (longest string length)
    max_width = max([len(val) for val in str_arr.flatten()])
    
    # Create the HTML table with consistent column width and centered text
    html = "<table style='border: 1px solid black; border-collapse: collapse;'>"
    
    if arr.ndim == 1:
        # Handle 1D array (vector)
        for val in str_arr:
            html += f"<tr><td style='padding: 5px; border: 1px solid black; font-weight: bold; width: {max_width}ch; text-align: center;'>{val}</td></tr>"
    else:
        # Handle 2D array (matrix)
        for row in str_arr:
            html += "<tr>"
            for val in row:
                html += f"<td style='padding: 5px; border: 1px solid black; font-weight: bold; width: {max_width}ch; text-align: center;'>{val}</td>"
            html += "</tr>"
    
    html += "</table>"
    return html

# Register the formatter in IPython
InteractiveShell.instance().display_formatter.formatters['text/html'].for_type(
    np.ndarray, lambda arr: array_to_html(arr)
)

<function __main__.<lambda>(arr)>

### Data generators

In [411]:
# generate a vector (list)
data = np.arange(0, 10)
data

0
0
1
2
3
4
5
6
7
8
9


In [412]:
# we can also define a step size in order to skip numbers
# this example skips 4 numbers every time
# so from 0 -> 20 -> 0, 4, 8, 12, 16
data = np.arange(0, 20, 4)
data

0
0
4
8
12
16


In [413]:
# NOTE! if you print NumPy data with print()
# you will not see any special styles (because printing
# is now done by Python instead of Jupyter)
print(data)

[ 0  4  8 12 16]


### You can convert conventional Python lists or lists-of-lists into NumPy arrays easily

In [414]:
# np.array is also handy if for some reason your
# data still remains in Python format, and your AI algorithm
# provides an error indicating your data is not in NumPy -format => np.array()
# often resolves this problem (but might need more features, like. np.expand_dims())
numbers = [6, 4, 8, 7, 2, 1, 3]
data = np.array(numbers)
data

0
6
4
8
7
2
1
3


In [415]:
# example 2, list of lists
day1 = [-32, -29, -30, -31, -37]
day2 = [-5, -2, 0, 1, -6]
day3 = [-10, -14, -11, -12, -8]

# list of lists (matrix)
temperatures = [day1, day2, day3]

# convert to NumPy -format
data = np.array(temperatures)
data

0,1,2,3,4
-32,-29,-30,-31,-37
-5,-2,0,1,-6
-10,-14,-11,-12,-8


### Some special data generators

In [416]:
# we can generate a collection of zeroes if we want
# this could be used to generate a set of default values
# for a real life dataset => usually 0 => "no"
data = np.zeros(8)
data

0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [417]:
# a matrix version also possible:
data = np.zeros((5, 5))
data

0,1,2,3,4
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0


In [418]:
# we can also change the data type into integer
# astype() is also usable most of the time in pandas
# very handy when the data is in a weird format (from a file etc.)
data = np.zeros((5, 5)).astype(int)
data

0,1,2,3,4
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0


In [419]:
# we can generate a collection of ones too
# this could be used to generate a set of default values
# for a real life dataset => usually 1 => "yes"
data = np.ones((5, 5)).astype(int)
data

0,1,2,3,4
1,1,1,1,1
1,1,1,1,1
1,1,1,1,1
1,1,1,1,1
1,1,1,1,1


### linspace => linearly spaced array

In [420]:
# without NumPy, you'd need a Python list,
# a for loop and division calculations

# linspace is often used in visualizations
# (modules like seaborn and matplotlib)
# in order to automatically accomodate the amount
# of data you wish to visualize
data = np.linspace(0, 10, 15)
data

0
0.0
0.714285714
1.428571429
2.142857143
2.857142857
3.571428571
4.285714286
5.0
5.714285714
6.428571429


### Random numbers in NumPy

In [421]:
# random number generation is often used
# in order to create imaginary test data for other modules
# like pandas and seaborn
# very practical if you need a smaller non-sense dataset
# in order to test your advanced data filtering/removal -algorithms
# before applying them into real datasets (to ensure you don't remove too much data)

# matrix version
data = np.random.rand(5, 5)
data

0,1,2,3,4
0.348443883,0.545636513,0.706163721,0.57258966,0.61028336
0.891257909,0.955746538,0.999097309,0.851267723,0.10625069
0.229435053,0.613950942,0.345547795,0.256906621,0.306439199
0.120267113,0.568585791,0.416749018,0.770162734,0.825306577
0.542007512,0.018112219,0.761828347,0.682559864,0.235650889


In [422]:
# matrix version, values between 1-100 => 10 numbers
data = np.random.randint(1, 100, 10)
data

0
95
25
43
92
81
64
71
68
54
38


In [423]:
# generate random numebrs from standard normal distribution
# value range is commonly between -3 and +3, not values slightly out of bounds
# also possible (meaning extreme values => outliers)
data = np.random.randn(5, 5)
data

0,1,2,3,4
1.194372783,0.030888978,-0.879717138,-0.346908041,-0.11301136
-2.025907601,-0.313797267,2.614072992,0.835039121,0.973465919
0.401678766,-0.839458874,-0.022011674,0.086089759,0.24729771
-0.596728516,-0.659026011,-0.806833576,0.473255614,0.873493836
-0.365823655,0.126319977,-1.178176133,-0.557663691,0.186016773


In [424]:
# in future NumPy -versions the previous number examples
# might be removed, the new recommended way is to 
# use NumPy's random number generator
rng = np.random.default_rng()

# example to replace randn()
data = rng.normal(loc=0.0, scale=1.0, size=(5, 5))
data

0,1,2,3,4
-1.818233149,-1.653177544,-0.352287075,0.927652738,-0.497337654
0.585614595,-0.251678789,0.05557318,1.712047389,-0.381877409
-0.353665494,0.9840678,0.239799405,-0.830991413,-0.845355235
0.072672004,0.187263222,0.591713504,-0.435696672,1.85429884
1.244980928,-0.855844619,-0.216980967,0.71210852,0.712355974


In [425]:
# in future NumPy -versions the previous number examples
# might be removed, the new recommended way is to 
# use NumPy's random number generator
rng = np.random.default_rng()

# example to replace randn()
data = rng.integers(0, 100, size=25).reshape(5, 5)
data

0,1,2,3,4
22,7,35,22,72
82,62,66,77,27
62,21,40,70,56
7,72,74,42,88
33,29,17,10,57


### reshape() -> convert any vector/list into a matrix!

In [426]:
# reshape() is very handy, as long as you have some kind of a vector (list)
# you can reshape it into a matrix easily (no need to remember specialized syntax)

# in this case we have 25 values (0-24) => 5 x 5 matrix (25 values)
# the values have to match, otherwise you'll get an error
data = np.arange(25).reshape(5, 5)
data

0,1,2,3,4
0,1,2,3,4
5,6,7,8,9
10,11,12,13,14
15,16,17,18,19
20,21,22,23,24


In [427]:
# sometimes in ML etc. code, you might need to check
# your current data format in order to adapt it to your algorithm
data.shape

(5, 5)

In [428]:
# sometimes you also need to check what is the datatype
# a common bug => data looks like a number, but it's a text/object instead
data.dtype

dtype('int64')

### NumPy functions and tools

In [429]:
# some example data, random numbers between 1-100, 25 numbers => 5 x 5 matrix
data = np.random.randint(0, 100, 25).reshape(5, 5)
data

0,1,2,3,4
93,79,42,45,94
3,40,87,81,75
46,80,97,76,35
12,49,36,7,27
10,41,26,50,26


In [430]:
# getting the largest value in the dataset
max_value = data.max()
max_value

np.int32(97)

In [431]:
# getting the smallest value in the dataset
min_value = data.min()
min_value

np.int32(3)

In [432]:
# if you want remove the np.int32() wrapper
# from the print => just print directly
print(max_value)
print(min_value)

97
3


In [433]:
# argmax gives only a position as if the data was a vector (list)
# not needed commonly, except some specialized ML cases, see below
data.argmax()

np.int64(12)

In [434]:
# imaginary example, but realistic

# this example, we recognized hotel feedback, and
# whether it has a negative, neutral or positive tone

# indeces: 0 => negative, 1 => neutral, 2 => positive

# in AI code, we usually save the actual names of the values 
# since it's easier to understand when our code outputs "Neutral"
# instead of 1
categories = ["Negative", "Neutral", "Positive"]

# this is an imaginary output from a classification hotel feedback model
predictions = [0.37, 0.53, 0.97]

# convert into NumPy -array
data = np.array(predictions)

# get the index with the highest prediction in the list
highest_index = data.argmax()

# get the actual description of this result from the categories
result = categories[highest_index]
print(result)

Positive
