In [77]:
import numpy as np

arr1 = np.array([1, 2, 3, 4, 5])
arr2 = np.array([[1, 2, 3], [4, 5, 6]])

arr2

array([[1, 2, 3],
       [4, 5, 6]])

In [78]:

# Special arrays

zeros = np.zeros((3, 4))        # 3x4 array of zeros
ones = np.ones((2, 3))          # 2x3 array of ones
empty = np.empty((2, 2))        # Uninitialized array
full = np.full((3, 3), 7)       # 3x3 array filled with 7

# zeros

In [79]:
#  Sequences
range_arr = np.arange(0, 10, 2)   # Range of 0 - 10, step=2 [0, 2, 4, 6, 8]
lin_arr = np.linspace(0, 10, 5)    # generate 5 values with equal intervals [0., 0.25, 0.5, 0.75, 1.]
identity = np.eye(3)              # 3x3 identity matrix # Identity matrix
random_arr = np.random.random((3, 5))  # Random array values 0-1 on a 3 by 5 matrix
randint_arr = np.random.randint(0, 10, (3, 3))  # Random integers from 0-10, on 3x3 matrix


randint_arr

array([[0, 1, 1],
       [7, 4, 7],
       [2, 6, 9]], dtype=int32)

In [80]:
# Properties

arr = np.array([[1, 2, 3], [4, 5, 6]])

print(arr)
print(arr.shape)        # Shape: (2, 3) - dimensions
print(arr.ndim)  # Number of dimensions - 2
print(arr.size)    # Number of elements - 6
print(arr.dtype)    # Data type - int64 (default)
print(arr.itemsize) # Item size - 8 bytes

[[1 2 3]
 [4 5 6]]
(2, 3)
2
6
int64
8


In [81]:
# Basic Operations

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# Element-wise operations
print(a + b)        # Addition:  [5, 7, 9]
print(a * b)  # Multiplication:  [4, 10, 18]
print(b / a)        # Division:  [4., 2.5, 2.]
print(a ** 2)          # Power:  [1, 4, 9]
print(np.sqrt(a)) # Square root:  [1., 1.414, 1.732]

# With scalars
print(a + 10)     # Add scalar: [11, 12, 13]

[5 7 9]
[ 4 10 18]
[4.  2.5 2. ]
[1 4 9]
[1.         1.41421356 1.73205081]
[11 12 13]


In [82]:
# Explicit data types
arr_int32 = np.array([1, 2, 3], dtype=np.int32)
arr_float64 = np.array([1, 2, 3], dtype=np.float64)
arr_complex = np.array([1, 2, 3], dtype=np.complex128)
arr_bool = np.array([1, 0, 1], dtype=bool)

# Changing data type
arr_float = arr_int32.astype(np.float64)


In [83]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

# Matrix multiplication (dot product)
print(np.dot(A, B))           # Traditional way
print(A @ B)                  # or

# Element-wise multiplication
print(A * B)

# Transpose
print(A.T)          # Transpose of A

[[19 22]
 [43 50]]
[[19 22]
 [43 50]]
[[ 5 12]
 [21 32]]
[[1 3]
 [2 4]]


In [84]:
arr = np.array([[1, 2, 3, 4], 
                 [5, 6, 7, 8], 
                 [9, 10, 11, 12],
                 [91, 10, 101, 12],
                 [51, 61, 7, 18]])

# Single element
print(arr[0, 3])        # 4

# Slicing
print(arr[0:3, 1:4])    # [[ 2  3  4], [ 6  7  8], [10 11 12]] -> row 0 - 3, slice 1:4 from each column

# Row and column access
print(arr[0, :])      # First row: [1, 2, 3, 4]
print(arr[:, 1])        # Second column: [ 2  6 10 10 61]

# Step slicing
print(arr[::2, ::2])    # [[1, 3], [9, 11], [51  7]] - 

# Boolean indexing
print(arr[arr > 5])     # [6, 7, 8, 9, 10, 11, 12]

# Fancy indexing
print(arr[[0, 2], 1:3])   # Rows 0 and 2


4
[[ 2  3  4]
 [ 6  7  8]
 [10 11 12]]
[1 2 3 4]
[ 2  6 10 10 61]
[[ 1  3]
 [ 9 11]
 [51  7]]
[  6   7   8   9  10  11  12  91  10 101  12  51  61   7  18]
[[ 2  3]
 [10 11]]


In [85]:
arr = np.arange(12)

print(arr)
# Reshape
reshaped = arr.reshape(3, 4)
print(reshaped)

# Flatten
flattened = reshaped.flatten()
print(flattened)

# Concatenation
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])

print(np.vstack((a, b)))        # Vertical concatenation

print(np.hstack((a, b)))        # Horizontal concatenation

# Split
arr = np.arange(9).reshape(3, 3)

print(arr)
print(np.vsplit(arr, 3))        # Split vertically

print(np.hsplit(arr, 3))        # Split horizontally


[ 0  1  2  3  4  5  6  7  8  9 10 11]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]
[[0 1 2]
 [3 4 5]
 [6 7 8]]
[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]
[array([[0],
       [3],
       [6]]), array([[1],
       [4],
       [7]]), array([[2],
       [5],
       [8]])]


In [86]:
arr = np.array([1, 2, 3, 4])



# Aggregate functions
print(np.sum(arr))              # Sum
print(np.mean(arr))         # Mean
print(np.std(arr))              # Standard deviation  
print(np.max(arr))          # Max 
print(np.min(arr))          # Min
print(np.prod(arr))         # Product

10
2.5
1.118033988749895
4
1
24


In [87]:
# Random numbers
print(np.random.rand())          # Random float [0,1
print(np.random.rand(2, 3))            # Random float [0,1 in 2x3 matrix
print(np.random.randint(0, 10, 5))  # random numbers from 0 - 10, 5 arrays 


# Shuffling
arr = np.arange(10)
np.random.shuffle(arr)
print(arr)

0.8106422365698773
[[0.58856452 0.36479757 0.96775065]
 [0.91032965 0.09695524 0.16502836]]
[9 0 0 8 7]
[6 9 2 3 7 0 1 5 8 4]


In [88]:
# Operations between arrays of different shapes
a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([10, 20, 30])

# b is broadcast to match a's shape
print(a + b)  # [[11, 22, 33], [14, 25, 36]]


[[11 22 33]
 [14 25 36]]


In [89]:
# python loop
result = []
for i in range(1000):
    result.append(i * 2)

# print(result)


# FUsing NumPy vectorization - faster
result = np.arange(1000) * 2

# print(result)

In [90]:
# Use float32 instead of float64 if precision allows
arr = np.array([1.0, 2.0, 3.0], dtype=np.float32)

print(arr.dtype)

float32


In [91]:
# EXAMPLE 1
m = np.array([[1, 2, 3],
              [4, 5, 6]])

print(m.sum())          # 21
print(m.mean())         # average of all
print(m.sum(axis=0))    # column sums
print(m.sum(axis=1))    # row sums
print(m.min(), m.max())

21
3.5
[5 7 9]
[ 6 15]
1 6


In [92]:
x = np.array([50, 10, 30, 20])
print(np.sort(x))          # sorted values
print(np.argsort(x))       # indices that would sort x
print(x[np.argsort(x)])     # sorted using indices


data = np.array([2, 2, 5, 1, 5, 5, 2, 1])
vals, counts = np.unique(data, return_counts=True)
print(vals)    # unique values
print(counts)  # their frequencies


scores = np.array([45, 72, -90, 200, 66, 38, 120, -5])        # 

# clamp to 0..100
fixed = np.clip(scores, 0, 100)  # shouldnt be more than 0- 100, round off negative numbers to zero and number above 100 to 100
print(fixed)

# set anything < 50 to 50 (like before)
print(np.where(scores < 50, 50, scores))

[10 20 30 50]
[1 3 2 0]
[10 20 30 50]
[1 2 5]
[2 3 3]
[ 45  72   0 100  66  38 100   0]
[ 50  72  50 200  66  50 120  50]


In [93]:
x = np.array([10, 12, 9, 11, 50])  

print(x.mean())     # average
print(np.median(x)) # middle value 
print(x.std())      # spread
print(x.min(), x.max())

# Cumulative sum
x = np.array([2, 5, 1, 9, 3])
print(np.cumulative_sum(x))         # [ 2  7  8 17 20]

# Differences between neighbors
y = np.array([10, 13, 9, 20])
print(np.diff(y))                   # [ 3 -4 11]


18.4
11.0
15.831613941730641
9 50
[ 2  7  8 17 20]
[ 3 -4 11]


In [94]:
print("==============Example1=================")
# Create a 4×5 matrix of random integers 1–20 print the max of each column
mat = np.random.randint(1, 20, (4, 5))
print(mat)
print(mat.max(axis=0))

# produce a frequency table using unique
x = np.array([2, 5, 1, 8, 3, 8, 8])

uniq, val = np.unique(x, return_counts=True)
print(uniq)
print(val)

# sort each row from smallest to biggest.
M = np.array([[3, 1, 7],[9, 7, 6]])
print(np.sort(M, axis=1))
print(np.sort(M, axis=0))  # sort each column


# convert to pass/fail (pass ≥ 50) as True/False
scores = np.array([35, 55, 78, 49, 100])

Passed = [scores >= 50]
print(Passed)


# 3) Missing values (NaN)
a = np.array([1, 2, np.nan, 4, np.nan, 6], dtype=float)

mean_no_nan = np.nanmean(a)   # ignores NaN
print(mean_no_nan)              # mean ignoring NaN

a_filled = np.where(np.isnan(a), mean_no_nan, a)  # replace NaN with mean
print(a_filled)                    

[[17  4  4  3 13]
 [ 9 17  8  9 13]
 [10 12  9  2  5]
 [ 5  6 16  8  5]]
[17 17 16  9 13]
[1 2 3 5 8]
[1 1 1 1 3]
[[1 3 7]
 [6 7 9]]
[[3 1 6]
 [9 7 7]]
[array([False,  True,  True, False,  True])]
3.25
[1.   2.   3.25 4.   3.25 6.  ]


In [95]:
print("==============Example2=================")

# Create a 6×6 random matrix (ints 1–9
# replace every value < 4 with 0
# count how many zeros you now have
m_arr = np.random.randint(1, 9, (6, 6))
m_arr[m_arr < 4] = 0
print(m_arr)

zeros = np.count_nonzero(m_arr == 0)        # count how many zeros you now have
print(zeros)


# Create A shape (3×4) and B shape (4×2).
A = np.random.randint(1, 11, (3, 4))
B = np.random.randint(1, 9, (4, 2))

print((A @ B) + 10)

# Reshape it to (2×3), then swap rows to get
x = np.array([1, 2, 3, 4, 5, 6])
shape_x = x.reshape(2, 3)
swap = shape_x[::-1]                # then swap rows to get
print(swap)

[[6 7 6 0 0 0]
 [0 8 7 6 5 0]
 [6 0 7 0 0 5]
 [8 0 0 5 0 7]
 [0 0 7 5 5 7]
 [7 0 5 6 0 4]]
15
[[172 102]
 [278 151]
 [191  86]]
[[4 5 6]
 [1 2 3]]


In [96]:
mat = np.random.randint(1, 10, (4, 4))
np.savetxt("mat.csv", mat, delimiter=",", fmt="%d")

print("Saved mat.csv")


loaded = np.loadtxt("mat.csv", delimiter=",")
print(loaded)
print(loaded.dtype)  # usually float, even if it looks like ints

loaded_int = loaded.astype(int)
print(loaded_int)

Saved mat.csv
[[5. 6. 1. 9.]
 [7. 5. 7. 2.]
 [6. 8. 3. 7.]
 [8. 1. 2. 4.]]
float64
[[5 6 1 9]
 [7 5 7 2]
 [6 8 3 7]
 [8 1 2 4]]


In [97]:
np.savetxt(
    "mat_with_header.csv",
    mat,
    delimiter=",",
    fmt="%d",
    header="c1,c2,c3,c4",
    comments=""   # prevents '#' from being added
)

loaded2 = np.loadtxt("mat_with_header.csv", delimiter=",", skiprows=1)
print(loaded2)

a = np.array([1, 2, np.nan, 4, np.nan, 6], dtype=float)

np.savetxt("missing.csv", a, delimiter=",")
b = np.loadtxt("missing.csv", delimiter=",")

print("mean ignoring NaN:", np.nanmean(b))
b_filled = np.where(np.isnan(b), np.nanmean(b), b)
print("filled:", b_filled)


[[5. 6. 1. 9.]
 [7. 5. 7. 2.]
 [6. 8. 3. 7.]
 [8. 1. 2. 4.]]
mean ignoring NaN: 3.25
filled: [1.   2.   3.25 4.   3.25 6.  ]


In [98]:
print("============Example===================")
# Create a 5×3 matrix of ints 0–20 and save as scores.csv with header A,B,C.
mat1 = np.random.randint(0, 20, (5, 3))

print(mat1)

# Load it back, and compute:
np.savetxt(
    "mat_new.csv",
    mat1, delimiter=",",
    fmt="%d",
    header="A,B,C",
    comments=""          # important: prevents "# " before header
)

load_mat = np.loadtxt("mat_new.csv", delimiter=",", skiprows=1)
# mean of each column
print(load_mat.mean(axis=0))

count_mat = np.count_nonzero(load_mat < 10)
# number of values in the whole matrix that are < 10
print(count_mat)

load_mat[load_mat < 10] = 10

np.savetxt("scores_clean.csv", load_mat, delimiter=",", fmt="%d")

[[15  4 10]
 [15  4  3]
 [ 2 15  6]
 [ 3  3  3]
 [10  3 19]]
[9.  5.8 8.2]
9
