# NumPy Basics for Scientists

Numerical computing with arrays - core concepts and operations

In [2]:
import numpy as np
import matplotlib.pyplot as plt

# Set print options for cleaner output
np.set_printoptions(precision=3, suppress=True)

## Creating Arrays

In [3]:
# From lists
data_1d = np.array([1, 2, 3, 4, 5])
data_2d = np.array([[1, 2, 3], [4, 5, 6]])

print("1D array:", data_1d)
print("2D array:")
print(data_2d)
print(f"Shape: {data_2d.shape}, Dimensions: {data_2d.ndim}")

# Special arrays
zeros = np.zeros((3, 4))
ones = np.ones((2, 3))
identity = np.eye(3)
random_data = np.random.random((2, 3))

print("\nZeros array:")
print(zeros)
print("\nRandom array:")
print(random_data)

# Ranges
sequence = np.arange(0, 10, 2)  # start, stop, step
linspace_data = np.linspace(0, 1, 5)  # start, stop, num_points

print(f"\nSequence: {sequence}")
print(f"Linspace: {linspace_data}")

1D array: [1 2 3 4 5]
2D array:
[[1 2 3]
 [4 5 6]]
Shape: (2, 3), Dimensions: 2

Zeros array:
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]

Random array:
[[0.911 0.587 0.365]
 [0.924 0.104 0.213]]

Sequence: [0 2 4 6 8]
Linspace: [0.   0.25 0.5  0.75 1.  ]


## Array Indexing and Slicing

In [None]:
# Create sample data
matrix = np.array([[1, 2, 3, 4],
                   [5, 6, 7, 8],
                   [9, 10, 11, 12]])

print("Original matrix:")
print(matrix)

# Basic indexing
print(f"\nElement at [1,2]: {matrix[1, 2]}")
print(f"First row: {matrix[0, :]}")
print(f"Last column: {matrix[:, -1]}")

# Slicing
print("\nSubmatrix [0:2, 1:3]:")
print(matrix[0:2, 1:3])

# Boolean indexing
mask = matrix > 6
print(f"\nElements > 6: {matrix[mask]}")
print("Boolean mask:")
print(mask)

# Fancy indexing
rows = [0, 2]
cols = [1, 3]
print(f"\nSelected elements: {matrix[rows, cols]}") 
#!! Notice how its not the submatrix comprising rows 0, 2 and cols 1, 3. 
# Instead its a list of two elements, first one 0,1 and second one 2,3. 
# This is different from the slicing submatrix example above (contiguous blocks allowed in submatrix. 
# I guess you cannot do non contiguous blocks. )

# 3. How to get a non-contiguous submatrix?

# You need to use indexing tricks:

# Use np.ix_() to form the Cartesian product:

# print(matrix[np.ix_([0, 2], [1, 3])])

# Output is the 2×2 submatrix:


# [[ 1  3]
#  [ 9 11]]


# So:

# matrix[rows, cols] → pairwise selection → [matrix[0,1], matrix[2,3]]

# matrix[np.ix_(rows, cols)] → submatrix from all row–col combinations.

print(np.ix_([0, 2], [1, 3]))
matrix[np.ix_([0, 2], [1, 3])]
# array([[ 2,  4],
#        [10, 12]])

Original matrix:
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

Element at [1,2]: 7
First row: [1 2 3 4]
Last column: [ 4  8 12]

Submatrix [0:2, 1:3]:
[[2 3]
 [6 7]]

Elements > 6: [ 7  8  9 10 11 12]
Boolean mask:
[[False False False False]
 [False False  True  True]
 [ True  True  True  True]]

Selected elements: [ 2 12]
(array([[0],
       [2]]), array([[1, 3]]))


array([[ 2,  4],
       [10, 12]])

## Array Operations

In [None]:
# Element-wise operations
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7, 8])

print(f"a = {a}")
print(f"b = {b}")
print(f"a + b = {a + b}")
print(f"a * b = {a * b}")
print(f"a / b = {a / b}")
print(f"a ** 2 = {a ** 2}")


# Scalar operations
print("\nScalar operations:")
print(f"Matrix * 2:\n{matrix * 2}")
print(f"Matrix + 100:\n{matrix + 100}")

# Broadcasting
matrix = np.array([[1, 2, 3],
                   [4, 5, 6]])
vector = np.array([10, 20, 30])

print("\nBroadcasting:")
print("Matrix:")
print(matrix)
print(f"Vector: {vector}")
print("Matrix + Vector:")
print(matrix + vector)

# Broadcasting possible column wise? yes. 
matrix = np.array([[1, 2, 3],
                   [4, 5, 6]])
vector = np.array([[10], [20]])

print("\nBroadcasting:")
print("Matrix:")
print(matrix)
print(f"Vector: {vector}")
print("Matrix + Vector:")
print(matrix + vector)

a = [1 2 3 4]
b = [5 6 7 8]
a + b = [ 6  8 10 12]
a * b = [ 5 12 21 32]
a / b = [0.2   0.333 0.429 0.5  ]
a ** 2 = [ 1  4  9 16]

Scalar operations:
Matrix * 2:
[[ 2  4  6]
 [ 8 10 12]]
Matrix + 100:
[[101 102 103]
 [104 105 106]]

Broadcasting:
Matrix:
[[1 2 3]
 [4 5 6]]
Vector: [10 20 30]
Matrix + Vector:
[[11 22 33]
 [14 25 36]]

Broadcasting:
Matrix:
[[1 2 3]
 [4 5 6]]
Vector: [[10]
 [20]]
Matrix + Vector:
[[11 12 13]
 [24 25 26]]


## Mathematical Functions

In [11]:
# Trigonometric functions
angles = np.array([0, np.pi/4, np.pi/2, np.pi])
print(f"Angles: {angles}")
print(f"Sine: {np.sin(angles)}")
print(f"Cosine: {np.cos(angles)}")

# Exponential and logarithmic
x = np.array([1, 2, 3, 4])
print(f"\nx = {x}")
print(f"exp(x) = {np.exp(x)}")
print(f"log(x) = {np.log(x)}")
print(f"log10(x) = {np.log10(x)}")
print(f"sqrt(x) = {np.sqrt(x)}")

# Statistical functions
data = np.random.normal(0, 1, 1000)  # Normal distribution
print(f"\nStatistics on 1000 random numbers:")
print(f"Mean: {np.mean(data):.3f}")
print(f"Std: {np.std(data):.3f}")
print(f"Min: {np.min(data):.3f}")
print(f"Max: {np.max(data):.3f}")
print(f"Median: {np.median(data):.3f}")

Angles: [0.    0.785 1.571 3.142]
Sine: [0.    0.707 1.    0.   ]
Cosine: [ 1.     0.707  0.    -1.   ]

x = [1 2 3 4]
exp(x) = [ 2.718  7.389 20.086 54.598]
log(x) = [0.    0.693 1.099 1.386]
log10(x) = [0.    0.301 0.477 0.602]
sqrt(x) = [1.    1.414 1.732 2.   ]

Statistics on 1000 random numbers:
Mean: 0.049
Std: 0.981
Min: -3.886
Max: 3.177
Median: 0.025


## Array Manipulation

In [12]:
# Reshaping
original = np.arange(12)
reshaped = original.reshape(3, 4)
flattened = reshaped.flatten()

print(f"Original (12,): {original}")
print(f"Reshaped (3,4):\n{reshaped}")
print(f"Flattened: {flattened}")

# Transposing
matrix = np.array([[1, 2, 3],
                   [4, 5, 6]])
print(f"\nOriginal matrix (2,3):\n{matrix}")
print(f"Transposed (3,2):\n{matrix.T}")

# Concatenation
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])

horizontal = np.hstack([a, b])  # or np.concatenate([a, b], axis=1)
vertical = np.vstack([a, b])    # or np.concatenate([a, b], axis=0)

print(f"\nHorizontal stack:\n{horizontal}")
print(f"Vertical stack:\n{vertical}")

# Splitting
array_to_split = np.arange(8)
split_arrays = np.split(array_to_split, 4)
print(f"\nOriginal: {array_to_split}")
print(f"Split into 4: {split_arrays}")

Original (12,): [ 0  1  2  3  4  5  6  7  8  9 10 11]
Reshaped (3,4):
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Flattened: [ 0  1  2  3  4  5  6  7  8  9 10 11]

Original matrix (2,3):
[[1 2 3]
 [4 5 6]]
Transposed (3,2):
[[1 4]
 [2 5]
 [3 6]]

Horizontal stack:
[[1 2 5 6]
 [3 4 7 8]]
Vertical stack:
[[1 2]
 [3 4]
 [5 6]
 [7 8]]

Original: [0 1 2 3 4 5 6 7]
Split into 4: [array([0, 1]), array([2, 3]), array([4, 5]), array([6, 7])]


## Linear Algebra

In [13]:
# Matrix multiplication
A = np.array([[1, 2],
              [3, 4]])
B = np.array([[5, 6],
              [7, 8]])

print("Matrix A:")
print(A)
print("Matrix B:")
print(B)

# Dot product
dot_product = np.dot(A, B)  # or A @ B
print(f"\nA @ B (matrix multiplication):\n{dot_product}")

# Linear algebra operations
det_A = np.linalg.det(A)
inv_A = np.linalg.inv(A)
eigenvals, eigenvecs = np.linalg.eig(A)

print(f"\nDeterminant of A: {det_A}")
print(f"Inverse of A:\n{inv_A}")
print(f"Eigenvalues: {eigenvals}")

# Solving linear equations Ax = b
b = np.array([1, 2])
x = np.linalg.solve(A, b)
print(f"\nSolution to Ax = b: {x}")
print(f"Verification Ax = {A @ x}")

Matrix A:
[[1 2]
 [3 4]]
Matrix B:
[[5 6]
 [7 8]]

A @ B (matrix multiplication):
[[19 22]
 [43 50]]

Determinant of A: -2.0000000000000004
Inverse of A:
[[-2.   1. ]
 [ 1.5 -0.5]]
Eigenvalues: [-0.372  5.372]

Solution to Ax = b: [0.  0.5]
Verification Ax = [1. 2.]


## Random Number Generation

In [14]:
# Set seed for reproducibility
np.random.seed(42)

# Different distributions
uniform = np.random.uniform(0, 1, 5)
normal = np.random.normal(0, 1, 5)
integers = np.random.randint(1, 10, 5)
choice = np.random.choice(['A', 'B', 'C'], 5)

print(f"Uniform [0,1]: {uniform}")
print(f"Normal (0,1): {normal}")
print(f"Random integers [1,10): {integers}")
print(f"Random choice: {choice}")

# Random sampling
population = np.arange(100)
sample = np.random.choice(population, size=10, replace=False)
print(f"\nSample from 0-99: {sample}")

# Shuffle
deck = np.arange(1, 53)  # deck of cards
np.random.shuffle(deck)
print(f"Shuffled deck (first 10): {deck[:10]}")

Uniform [0,1]: [0.375 0.951 0.732 0.599 0.156]
Normal (0,1): [ 0.279  1.011 -0.581 -0.525 -0.571]
Random integers [1,10): [6 9 1 3 7]
Random choice: ['A' 'C' 'A' 'C' 'C']

Sample from 0-99: [45 73 27 19 42 21 29 81 71 18]
Shuffled deck (first 10): [41 14 19 16 44 40 46 33 17 24]


## Working with Real Data Example

In [15]:
# Simulate experimental data
np.random.seed(123)
n_samples = 100
time = np.linspace(0, 10, n_samples)
signal = 2 * np.sin(2 * np.pi * time) + 0.5 * np.random.normal(0, 1, n_samples)

print(f"Time range: {time[0]:.1f} to {time[-1]:.1f}")
print(f"Signal statistics:")
print(f"  Mean: {np.mean(signal):.3f}")
print(f"  Std: {np.std(signal):.3f}")
print(f"  Min: {np.min(signal):.3f}")
print(f"  Max: {np.max(signal):.3f}")

# Data analysis
# Find peaks (simple approach)
peak_indices = []
for i in range(1, len(signal)-1):
    if signal[i] > signal[i-1] and signal[i] > signal[i+1] and signal[i] > 1:
        peak_indices.append(i)

peak_times = time[peak_indices]
peak_values = signal[peak_indices]

print(f"\nFound {len(peak_indices)} peaks")
print(f"Peak times: {peak_times}")
print(f"Peak values: {peak_values}")

# Moving average (smoothing)
window_size = 5
smoothed = np.convolve(signal, np.ones(window_size)/window_size, mode='valid')
smoothed_time = time[window_size//2:len(smoothed)+window_size//2]

print(f"\nSmoothed signal length: {len(smoothed)}")
print(f"Smoothed signal std: {np.std(smoothed):.3f} (vs original: {np.std(signal):.3f})")

Time range: 0.0 to 10.0
Signal statistics:
  Mean: 0.014
  Std: 1.452
  Min: -2.694
  Max: 2.872

Found 14 peaks
Peak times: [0.202 1.212 2.121 2.323 3.333 4.343 5.253 6.263 6.465 7.273 8.081 8.283
 9.091 9.293]
Peak values: [2.051 2.689 2.126 2.38  1.382 1.952 2.872 2.151 1.149 2.561 1.6   2.788
 1.656 2.018]

Smoothed signal length: 96
Smoothed signal std: 0.907 (vs original: 1.452)


## Performance Tips

In [17]:
# Vectorized operations vs loops
import time

# Create large array
large_array = np.random.random(1000000)
print(large_array.shape)

(1000000,)


In [18]:

# Vectorized operation
start = time.time()
result_vectorized = np.sqrt(large_array)
time_vectorized = time.time() - start

# Loop operation (don't run this on large arrays!)
small_array = np.random.random(1000)
start = time.time()
result_loop = []
for x in small_array:
    result_loop.append(x**0.5)
result_loop = np.array(result_loop)
time_loop = time.time() - start

print(f"Vectorized (1M elements): {time_vectorized:.6f} seconds")
print(f"Loop (1K elements): {time_loop:.6f} seconds")
print(f"Estimated speedup: ~{time_loop * 1000 / time_vectorized:.0f}x")

# Memory efficiency
print(f"\nArray memory usage:")
print(f"1M float64 elements: {large_array.nbytes / 1024 / 1024:.1f} MB")
print(f"Data type: {large_array.dtype}")

# In-place operations save memory
array_copy = large_array.copy()
array_copy *= 2  # in-place
# vs: new_array = large_array * 2  # creates new array

Vectorized (1M elements): 0.006413 seconds
Loop (1K elements): 0.002295 seconds
Estimated speedup: ~358x

Array memory usage:
1M float64 elements: 7.6 MB
Data type: float64
