# NumPy Basics Part 2 - Advanced Numerical Computing

Advanced NumPy techniques for scientific computing and data analysis

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import linalg, optimize, signal, interpolate
import time

# Set random seed and print options
np.random.seed(42)
np.set_printoptions(precision=4, suppress=True)

## Advanced Array Operations and Broadcasting

In [None]:
# Advanced indexing with multiple conditions
data_2d = np.random.randn(10, 8)
print("Original 2D array shape:", data_2d.shape)
print("First few rows:")
print(data_2d[:3])

# Boolean indexing with multiple conditions
condition = (data_2d > 0.5) & (data_2d < 1.5)
values_in_range = data_2d[condition]
print(f"\nValues between 0.5 and 1.5: {len(values_in_range)} values")
print(f"Sample values: {values_in_range[:5]}")

# Advanced fancy indexing
row_indices = [1, 3, 7]
col_indices = [2, 5, 6]
selected_elements = data_2d[np.ix_(row_indices, col_indices)]
print(f"\nSelected submatrix shape: {selected_elements.shape}")
print(selected_elements)

# Broadcasting examples
print("\n=== Broadcasting Examples ===")

# Example 1: Normalize each row by its maximum
row_max = data_2d.max(axis=1, keepdims=True)
normalized_rows = data_2d / row_max
print(f"Original max per row: {data_2d.max(axis=1)[:3]}")
print(f"Normalized max per row: {normalized_rows.max(axis=1)[:3]}")

# Example 2: Center each column (subtract column mean)
col_means = data_2d.mean(axis=0)
centered_data = data_2d - col_means
print(f"\nOriginal column means: {data_2d.mean(axis=0)[:4]}")
print(f"Centered column means: {centered_data.mean(axis=0)[:4]}")

# Example 3: Distance matrix computation using broadcasting
points = np.random.rand(5, 2)  # 5 points in 2D
print(f"\nPoints shape: {points.shape}")
print("Points:")
print(points)

# Compute pairwise distances using broadcasting
diff = points[:, np.newaxis, :] - points[np.newaxis, :, :]
distances = np.sqrt(np.sum(diff**2, axis=2))
print(f"\nDistance matrix shape: {distances.shape}")
print("Distance matrix:")
print(distances)

# Verify diagonal is zero (distance from point to itself)
print(f"Diagonal elements (should be 0): {np.diag(distances)}")

## Advanced Linear Algebra and Decompositions

In [None]:
# Create sample data matrix (e.g., gene expression data)
n_genes, n_samples = 100, 20
expression_data = np.random.exponential(2, (n_genes, n_samples)) + np.random.normal(0, 0.1, (n_genes, n_samples))

print(f"Expression data shape: {expression_data.shape}")
print(f"Data range: {expression_data.min():.3f} to {expression_data.max():.3f}")

# Singular Value Decomposition (SVD)
print("\n=== Singular Value Decomposition ===")
U, s, Vt = np.linalg.svd(expression_data, full_matrices=False)

print(f"U shape (genes × components): {U.shape}")
print(f"Singular values shape: {s.shape}")
print(f"Vt shape (components × samples): {Vt.shape}")
print(f"First 5 singular values: {s[:5]}")

# Reconstruct data using different numbers of components
def reconstruct_svd(U, s, Vt, n_components):
    return U[:, :n_components] @ np.diag(s[:n_components]) @ Vt[:n_components, :]

# Calculate reconstruction error for different numbers of components
components_range = [1, 5, 10, 15, 20]
reconstruction_errors = []

for n_comp in components_range:
    reconstructed = reconstruct_svd(U, s, Vt, n_comp)
    error = np.linalg.norm(expression_data - reconstructed, 'fro')
    reconstruction_errors.append(error)
    print(f"Components: {n_comp:2d}, Reconstruction error: {error:.3f}")

# Explained variance ratio
explained_variance_ratio = s**2 / np.sum(s**2)
cumulative_variance = np.cumsum(explained_variance_ratio)

print(f"\nFirst 5 components explain {cumulative_variance[4]:.1%} of variance")
print(f"First 10 components explain {cumulative_variance[9]:.1%} of variance")

# Principal Component Analysis using SVD
print("\n=== Principal Component Analysis ===")

# Center the data (subtract mean)
centered_data = expression_data - np.mean(expression_data, axis=1, keepdims=True)

# PCA via SVD
U_pca, s_pca, Vt_pca = np.linalg.svd(centered_data, full_matrices=False)

# Principal component scores (projection onto PC space)
pc_scores = Vt_pca.T  # Each column is a PC, each row is a sample
print(f"PC scores shape: {pc_scores.shape}")
print(f"First 3 samples, first 3 PCs:")
print(pc_scores[:3, :3])

# Eigenvalue decomposition of covariance matrix
print("\n=== Eigenvalue Decomposition ===")

# Create covariance matrix
cov_matrix = np.cov(centered_data)
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

# Sort by eigenvalues (descending)
idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]

print(f"Covariance matrix shape: {cov_matrix.shape}")
print(f"Top 5 eigenvalues: {eigenvalues[:5]}")
print(f"Eigenvalue/SVD relationship check: {np.allclose(eigenvalues[:10], s_pca[:10]**2/(n_samples-1))}")

# QR Decomposition
print("\n=== QR Decomposition ===")
sample_matrix = np.random.randn(8, 6)
Q, R = np.linalg.qr(sample_matrix)

print(f"Original matrix shape: {sample_matrix.shape}")
print(f"Q shape: {Q.shape}, R shape: {R.shape}")
print(f"Q is orthogonal: {np.allclose(Q @ Q.T, np.eye(8))}")
print(f"R is upper triangular: {np.allclose(R, np.triu(R))}")
print(f"Reconstruction check: {np.allclose(sample_matrix, Q @ R)}")

# Cholesky Decomposition (for positive definite matrices)
print("\n=== Cholesky Decomposition ===")
# Create a positive definite matrix
A = np.random.randn(5, 5)
pos_def_matrix = A @ A.T + np.eye(5)  # Ensure positive definite

try:
    L = np.linalg.cholesky(pos_def_matrix)
    print(f"Cholesky factor shape: {L.shape}")
    print(f"L is lower triangular: {np.allclose(L, np.tril(L))}")
    print(f"Reconstruction check: {np.allclose(pos_def_matrix, L @ L.T)}")
except np.linalg.LinAlgError:
    print("Matrix is not positive definite")

## Signal Processing and Fourier Analysis

In [None]:
# Generate synthetic signal with multiple components
fs = 1000  # Sampling frequency
t = np.linspace(0, 2, 2*fs, endpoint=False)  # 2 seconds

# Create composite signal
freq1, freq2, freq3 = 50, 120, 200  # Frequencies in Hz
signal_clean = (np.sin(2*np.pi*freq1*t) + 
               0.5*np.sin(2*np.pi*freq2*t) + 
               0.3*np.sin(2*np.pi*freq3*t))

# Add noise
noise = 0.2 * np.random.randn(len(t))
signal_noisy = signal_clean + noise

print(f"Signal length: {len(signal_noisy)} samples")
print(f"Sampling frequency: {fs} Hz")
print(f"Duration: {len(signal_noisy)/fs} seconds")

# Fast Fourier Transform (FFT)
print("\n=== Fourier Analysis ===")

# Compute FFT
fft_result = np.fft.fft(signal_noisy)
frequencies = np.fft.fftfreq(len(signal_noisy), 1/fs)

# Take only positive frequencies
positive_freq_mask = frequencies > 0
frequencies_pos = frequencies[positive_freq_mask]
magnitude_pos = np.abs(fft_result[positive_freq_mask])
power_spectrum = magnitude_pos**2

# Find peaks in frequency domain
peak_indices = np.where(power_spectrum > 0.1 * np.max(power_spectrum))[0]
peak_frequencies = frequencies_pos[peak_indices]

print(f"Detected frequencies: {peak_frequencies[peak_frequencies < 300]}")
print(f"Expected frequencies: {[freq1, freq2, freq3]}")

# Power Spectral Density using Welch's method
from scipy.signal import welch

freqs_welch, psd_welch = welch(signal_noisy, fs, nperseg=512)
print(f"\nWelch PSD - Frequency resolution: {freqs_welch[1] - freqs_welch[0]:.2f} Hz")

# Filtering
print("\n=== Digital Filtering ===")

# Low-pass filter to remove high-frequency noise
from scipy.signal import butter, filtfilt

# Design Butterworth filter
cutoff_freq = 150  # Hz
nyquist_freq = fs / 2
normalized_cutoff = cutoff_freq / nyquist_freq

b, a = butter(4, normalized_cutoff, btype='low')
signal_filtered = filtfilt(b, a, signal_noisy)

print(f"Filter cutoff frequency: {cutoff_freq} Hz")
print(f"Original signal RMS: {np.sqrt(np.mean(signal_noisy**2)):.3f}")
print(f"Filtered signal RMS: {np.sqrt(np.mean(signal_filtered**2)):.3f}")

# Windowing for spectral analysis
print("\n=== Windowing Functions ===")

# Different window functions
window_length = 512
windows = {
    'Rectangular': np.ones(window_length),
    'Hanning': np.hanning(window_length),
    'Hamming': np.hamming(window_length),
    'Blackman': np.blackman(window_length)
}

# Apply windows to signal segment
signal_segment = signal_noisy[:window_length]
windowed_ffts = {}

for window_name, window in windows.items():
    windowed_signal = signal_segment * window
    windowed_fft = np.fft.fft(windowed_signal)
    windowed_ffts[window_name] = windowed_fft
    
    # Calculate spectral leakage (energy outside main peaks)
    magnitude = np.abs(windowed_fft)
    total_energy = np.sum(magnitude**2)
    print(f"{window_name:12s} - Total spectral energy: {total_energy:.0f}")

# Cross-correlation analysis
print("\n=== Cross-correlation ===")

# Create two related signals
signal1 = np.sin(2*np.pi*50*t[:1000])
signal2 = np.sin(2*np.pi*50*t[:1000] + np.pi/4)  # Phase-shifted

# Add different noise levels
signal1_noisy = signal1 + 0.1*np.random.randn(len(signal1))
signal2_noisy = signal2 + 0.1*np.random.randn(len(signal2))

# Compute cross-correlation
cross_corr = np.correlate(signal1_noisy, signal2_noisy, mode='full')
lags = np.arange(-len(signal2_noisy)+1, len(signal1_noisy))

# Find peak correlation and corresponding lag
max_corr_idx = np.argmax(np.abs(cross_corr))
max_corr_lag = lags[max_corr_idx]
max_corr_value = cross_corr[max_corr_idx]

print(f"Maximum correlation: {max_corr_value:.3f} at lag: {max_corr_lag}")
print(f"Expected phase shift: π/4 radians = {np.pi/4:.3f}")
print(f"Calculated phase shift: {max_corr_lag * 2*np.pi*50/fs:.3f} radians")

# Spectrogram (time-frequency analysis)
print("\n=== Time-Frequency Analysis ===")

# Create chirp signal (frequency changes over time)
t_chirp = np.linspace(0, 1, fs)
f0, f1 = 50, 200  # Start and end frequencies
chirp_signal = signal.chirp(t_chirp, f0, 1, f1)

# Compute spectrogram
frequencies_spec, times_spec, Sxx = signal.spectrogram(chirp_signal, fs, nperseg=256)

print(f"Spectrogram shape: {Sxx.shape}")
print(f"Time resolution: {times_spec[1] - times_spec[0]:.4f} seconds")
print(f"Frequency resolution: {frequencies_spec[1] - frequencies_spec[0]:.2f} Hz")
print(f"Frequency range: {frequencies_spec[0]:.1f} to {frequencies_spec[-1]:.1f} Hz")

## Advanced Statistical Operations and Random Sampling

In [None]:
# Advanced random number generation
print("=== Advanced Random Sampling ===")

# Set up random number generator for reproducibility
rng = np.random.RandomState(42)

# Generate samples from various distributions
n_samples = 10000

# Parametric distributions
samples_normal = rng.normal(loc=100, scale=15, size=n_samples)
samples_lognormal = rng.lognormal(mean=2, sigma=0.5, size=n_samples)
samples_gamma = rng.gamma(shape=2, scale=2, size=n_samples)
samples_beta = rng.beta(a=2, b=5, size=n_samples)
samples_poisson = rng.poisson(lam=3, size=n_samples)

distributions = {
    'Normal(100, 15)': samples_normal,
    'LogNormal(2, 0.5)': samples_lognormal,
    'Gamma(2, 2)': samples_gamma,
    'Beta(2, 5)': samples_beta,
    'Poisson(3)': samples_poisson.astype(float)
}

# Calculate statistics for each distribution
for dist_name, samples in distributions.items():
    mean_val = np.mean(samples)
    std_val = np.std(samples)
    skewness = np.mean(((samples - mean_val) / std_val) ** 3)
    kurtosis = np.mean(((samples - mean_val) / std_val) ** 4) - 3  # Excess kurtosis
    
    print(f"{dist_name:15s}: μ={mean_val:6.2f}, σ={std_val:6.2f}, skew={skewness:6.2f}, kurt={kurtosis:6.2f}")

# Bootstrap sampling for confidence intervals
print("\n=== Bootstrap Confidence Intervals ===")

# Original sample
original_sample = rng.normal(50, 10, 100)
original_mean = np.mean(original_sample)

# Bootstrap resampling
n_bootstrap = 10000
bootstrap_means = []

for _ in range(n_bootstrap):
    bootstrap_sample = rng.choice(original_sample, size=len(original_sample), replace=True)
    bootstrap_means.append(np.mean(bootstrap_sample))

bootstrap_means = np.array(bootstrap_means)

# Calculate confidence intervals
confidence_levels = [90, 95, 99]
for conf_level in confidence_levels:
    alpha = 100 - conf_level
    lower_percentile = alpha / 2
    upper_percentile = 100 - alpha / 2
    
    ci_lower = np.percentile(bootstrap_means, lower_percentile)
    ci_upper = np.percentile(bootstrap_means, upper_percentile)
    
    print(f"{conf_level}% CI: [{ci_lower:.3f}, {ci_upper:.3f}] (original mean: {original_mean:.3f})")

# Permutation testing
print("\n=== Permutation Testing ===")

# Create two groups with different means
group1 = rng.normal(100, 15, 50)
group2 = rng.normal(105, 15, 50)  # Slightly higher mean

observed_diff = np.mean(group1) - np.mean(group2)
print(f"Observed difference in means: {observed_diff:.3f}")

# Permutation test
combined_data = np.concatenate([group1, group2])
n_permutations = 10000
permuted_diffs = []

for _ in range(n_permutations):
    # Randomly shuffle and split
    shuffled = rng.permutation(combined_data)
    perm_group1 = shuffled[:len(group1)]
    perm_group2 = shuffled[len(group1):]
    
    perm_diff = np.mean(perm_group1) - np.mean(perm_group2)
    permuted_diffs.append(perm_diff)

permuted_diffs = np.array(permuted_diffs)

# Calculate p-value (two-tailed test)
p_value = np.mean(np.abs(permuted_diffs) >= np.abs(observed_diff))
print(f"Permutation test p-value: {p_value:.4f}")

# Monte Carlo integration
print("\n=== Monte Carlo Integration ===")

# Estimate π using Monte Carlo method
def estimate_pi(n_points):
    # Generate random points in [0,1] x [0,1]
    x = rng.uniform(0, 1, n_points)
    y = rng.uniform(0, 1, n_points)
    
    # Count points inside unit circle
    inside_circle = (x**2 + y**2) <= 1
    pi_estimate = 4 * np.mean(inside_circle)
    
    return pi_estimate

# Test with different sample sizes
sample_sizes = [1000, 10000, 100000, 1000000]
for n in sample_sizes:
    pi_est = estimate_pi(n)
    error = abs(pi_est - np.pi)
    print(f"n={n:7d}: π estimate = {pi_est:.6f}, error = {error:.6f}")

# Markov Chain Monte Carlo (simple random walk)
print("\n=== Simple MCMC Sampling ===")

def log_posterior(x, mu=0, sigma=1):
    """Log posterior for normal distribution"""
    return -0.5 * ((x - mu) / sigma)**2

def mcmc_sample(n_samples, initial_value=0, step_size=1):
    """Simple Metropolis-Hastings sampler"""
    samples = []
    current_x = initial_value
    current_log_prob = log_posterior(current_x)
    n_accepted = 0
    
    for _ in range(n_samples):
        # Propose new state
        proposed_x = current_x + rng.normal(0, step_size)
        proposed_log_prob = log_posterior(proposed_x)
        
        # Accept or reject
        log_ratio = proposed_log_prob - current_log_prob
        if log_ratio > 0 or rng.random() < np.exp(log_ratio):
            current_x = proposed_x
            current_log_prob = proposed_log_prob
            n_accepted += 1
        
        samples.append(current_x)
    
    return np.array(samples), n_accepted / n_samples

# Run MCMC
mcmc_samples, acceptance_rate = mcmc_sample(10000, step_size=1.5)

print(f"MCMC acceptance rate: {acceptance_rate:.3f}")
print(f"Sample mean: {np.mean(mcmc_samples):.3f} (expected: 0.000)")
print(f"Sample std: {np.std(mcmc_samples):.3f} (expected: 1.000)")

# Check convergence (autocorrelation)
def autocorrelation(x, max_lag=100):
    """Calculate autocorrelation function"""
    n = len(x)
    x_centered = x - np.mean(x)
    autocorr = np.correlate(x_centered, x_centered, mode='full')
    autocorr = autocorr[n-1:n-1+max_lag+1]
    autocorr = autocorr / autocorr[0]  # Normalize
    return autocorr

# Calculate effective sample size
autocorr = autocorrelation(mcmc_samples)
# Find first lag where autocorrelation drops below threshold
threshold = 0.1
effective_lags = np.where(autocorr < threshold)[0]
if len(effective_lags) > 0:
    autocorr_time = effective_lags[0]
    effective_n = len(mcmc_samples) / (2 * autocorr_time + 1)
    print(f"Autocorrelation time: {autocorr_time}")
    print(f"Effective sample size: {effective_n:.0f}")
else:
    print("High autocorrelation - need longer chain")

## Optimization and Root Finding

In [None]:
from scipy.optimize import minimize, root, curve_fit, differential_evolution

# Function optimization
print("=== Function Optimization ===")

# Define test functions
def rosenbrock(x):
    """Rosenbrock function - classic optimization test case"""
    return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)

def himmelblau(x):
    """Himmelblau's function - has four global minima"""
    return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2

# Optimize Rosenbrock function
print("Rosenbrock function optimization:")
x0 = np.array([0.0, 0.0])  # Starting point
result_rosenbrock = minimize(rosenbrock, x0, method='BFGS')

print(f"Success: {result_rosenbrock.success}")
print(f"Minimum found at: {result_rosenbrock.x}")
print(f"Function value: {result_rosenbrock.fun:.8f}")
print(f"Number of iterations: {result_rosenbrock.nit}")

# Compare different optimization methods
print("\nComparison of optimization methods for Himmelblau function:")
methods = ['BFGS', 'CG', 'Powell', 'Nelder-Mead']
x0 = np.array([0.0, 0.0])

for method in methods:
    result = minimize(himmelblau, x0, method=method)
    print(f"{method:12s}: x={result.x}, f={result.fun:.6f}, nit={result.nit:3d}")

# Global optimization with differential evolution
print("\nGlobal optimization (Differential Evolution):")
bounds = [(-5, 5), (-5, 5)]  # Search bounds
result_global = differential_evolution(himmelblau, bounds, seed=42)
print(f"Global minimum: x={result_global.x}, f={result_global.fun:.6f}")

# Constrained optimization
print("\n=== Constrained Optimization ===")

def objective(x):
    """Objective function to minimize"""
    return x[0]**2 + x[1]**2

def constraint1(x):
    """Equality constraint: x[0] + x[1] = 1"""
    return x[0] + x[1] - 1

def constraint2(x):
    """Inequality constraint: x[0] >= 0"""
    return x[0]

# Define constraints
constraints = [
    {'type': 'eq', 'fun': constraint1},
    {'type': 'ineq', 'fun': constraint2}
]

x0 = np.array([0.5, 0.5])
result_constrained = minimize(objective, x0, method='SLSQP', constraints=constraints)

print(f"Constrained minimum: x={result_constrained.x}")
print(f"Objective value: {result_constrained.fun:.6f}")
print(f"Constraint 1 (should be ~0): {constraint1(result_constrained.x):.6f}")
print(f"Constraint 2 (should be ≥0): {constraint2(result_constrained.x):.6f}")

# Root finding
print("\n=== Root Finding ===")

def equations(vars):
    """System of nonlinear equations"""
    x, y = vars
    eq1 = x**2 + y**2 - 1  # Circle
    eq2 = x - y**2         # Parabola
    return [eq1, eq2]

# Find roots
initial_guess = [0.5, 0.5]
solution = root(equations, initial_guess, method='hybr')

print(f"Root found: x={solution.x}")
print(f"Function value at root: {equations(solution.x)}")
print(f"Success: {solution.success}")

# Curve fitting
print("\n=== Curve Fitting ===")

# Generate synthetic data
def model_function(x, a, b, c):
    """Exponential decay model"""
    return a * np.exp(-b * x) + c

# True parameters
true_params = [10.0, 0.5, 2.0]
x_data = np.linspace(0, 5, 50)
y_true = model_function(x_data, *true_params)
y_data = y_true + 0.5 * np.random.randn(len(x_data))  # Add noise

# Fit curve
initial_guess = [8.0, 0.3, 1.5]  # Starting guess
fitted_params, covariance = curve_fit(model_function, x_data, y_data, p0=initial_guess)

# Calculate parameter uncertainties
param_errors = np.sqrt(np.diag(covariance))

print("Parameter fitting results:")
param_names = ['a', 'b', 'c']
for i, (name, true_val, fitted_val, error) in enumerate(zip(param_names, true_params, fitted_params, param_errors)):
    print(f"{name}: true={true_val:.3f}, fitted={fitted_val:.3f}±{error:.3f}")

# Calculate R-squared
y_fitted = model_function(x_data, *fitted_params)
ss_res = np.sum((y_data - y_fitted)**2)
ss_tot = np.sum((y_data - np.mean(y_data))**2)
r_squared = 1 - (ss_res / ss_tot)
print(f"R-squared: {r_squared:.4f}")

# Robust fitting (less sensitive to outliers)
print("\n=== Robust Curve Fitting ===")

# Add outliers to data
y_data_outliers = y_data.copy()
outlier_indices = [10, 25, 40]
y_data_outliers[outlier_indices] += np.array([3, -4, 2])  # Add outliers

def robust_objective(params, x, y):
    """Robust objective function using Huber loss"""
    residuals = y - model_function(x, *params)
    # Huber loss (less sensitive to outliers)
    delta = 1.0
    huber_loss = np.where(np.abs(residuals) <= delta,
                         0.5 * residuals**2,
                         delta * np.abs(residuals) - 0.5 * delta**2)
    return np.sum(huber_loss)

# Fit with and without robust method
normal_fit, _ = curve_fit(model_function, x_data, y_data_outliers, p0=initial_guess)
robust_result = minimize(robust_objective, initial_guess, args=(x_data, y_data_outliers))
robust_fit = robust_result.x

print("Comparison with outliers present:")
for i, (name, true_val, normal_val, robust_val) in enumerate(zip(param_names, true_params, normal_fit, robust_fit)):
    print(f"{name}: true={true_val:.3f}, normal={normal_val:.3f}, robust={robust_val:.3f}")

# Calculate RMSE for both methods
y_normal = model_function(x_data, *normal_fit)
y_robust = model_function(x_data, *robust_fit)
rmse_normal = np.sqrt(np.mean((y_true - y_normal)**2))
rmse_robust = np.sqrt(np.mean((y_true - y_robust)**2))

print(f"\nRMSE vs true function:")
print(f"Normal fit: {rmse_normal:.4f}")
print(f"Robust fit: {rmse_robust:.4f}")

## Memory Optimization and Performance

In [None]:
import sys
from scipy.sparse import csr_matrix, coo_matrix

# Memory-efficient data types
print("=== Memory Optimization ===")

# Compare memory usage of different data types
n = 1000000
data_types = {
    'float64': np.float64,
    'float32': np.float32,
    'float16': np.float16,
    'int64': np.int64,
    'int32': np.int32,
    'int16': np.int16,
    'int8': np.int8
}

print(f"Memory usage for {n:,} elements:")
for dtype_name, dtype in data_types.items():
    array = np.ones(n, dtype=dtype)
    memory_mb = array.nbytes / (1024**2)
    print(f"{dtype_name:8s}: {memory_mb:6.1f} MB")

# Sparse matrices for memory efficiency
print("\n=== Sparse Matrices ===")

# Create a large sparse matrix
matrix_size = 10000
density = 0.001  # 0.1% non-zero elements

# Generate random sparse matrix
n_nonzero = int(matrix_size**2 * density)
rows = np.random.randint(0, matrix_size, n_nonzero)
cols = np.random.randint(0, matrix_size, n_nonzero)
data = np.random.randn(n_nonzero)

# Create sparse matrix
sparse_matrix = coo_matrix((data, (rows, cols)), shape=(matrix_size, matrix_size))
sparse_csr = sparse_matrix.tocsr()  # Convert to CSR format for efficient operations

# Compare with dense matrix
dense_matrix = sparse_matrix.toarray()

print(f"Matrix size: {matrix_size} × {matrix_size}")
print(f"Density: {density:.1%}")
print(f"Non-zero elements: {n_nonzero:,}")
print(f"Dense matrix memory: {dense_matrix.nbytes / (1024**2):.1f} MB")
print(f"Sparse matrix memory: {(sparse_csr.data.nbytes + sparse_csr.indices.nbytes + sparse_csr.indptr.nbytes) / (1024**2):.1f} MB")
print(f"Memory savings: {100 * (1 - (sparse_csr.data.nbytes + sparse_csr.indices.nbytes + sparse_csr.indptr.nbytes) / dense_matrix.nbytes):.1f}%")

# Memory views and advanced indexing
print("\n=== Memory Views and Copy vs View ===")

# Create large array
large_array = np.random.randn(1000, 1000)
print(f"Original array memory: {large_array.nbytes / (1024**2):.1f} MB")

# Different ways to access subarrays
subarray_view = large_array[100:200, 200:300]  # Creates a view (no copy)
subarray_copy = large_array[100:200, 200:300].copy()  # Creates a copy
subarray_fancy = large_array[[100, 150, 199], :][:, [200, 250, 299]]  # Fancy indexing (creates copy)

print(f"Subarray view shares memory: {np.shares_memory(large_array, subarray_view)}")
print(f"Subarray copy shares memory: {np.shares_memory(large_array, subarray_copy)}")
print(f"Fancy indexed shares memory: {np.shares_memory(large_array, subarray_fancy)}")

# Memory mapping for large files
print("\n=== Memory Mapping ===")

# Create a large array and save to disk
large_data = np.random.randn(5000, 1000).astype(np.float32)
filename = 'large_data.npy'
np.save(filename, large_data)

# Load using memory mapping
mmap_array = np.load(filename, mmap_mode='r')  # Read-only memory map

print(f"Original array size: {large_data.nbytes / (1024**2):.1f} MB")
print(f"Memory mapped array uses minimal RAM")
print(f"Can access data: {mmap_array[0, 0]:.3f}")

# Performance comparison: in-place vs copy operations
print("\n=== Performance Optimization ===")

# Create test arrays
size = 1000000
a = np.random.randn(size)
b = np.random.randn(size)
c = np.zeros(size)

# Method 1: Copy operation
start_time = time.time()
result1 = a + b * 2.0
copy_time = time.time() - start_time

# Method 2: In-place operation
start_time = time.time()
np.multiply(b, 2.0, out=c)  # b * 2.0 -> c
np.add(a, c, out=c)         # a + c -> c
inplace_time = time.time() - start_time

print(f"Copy operation time: {copy_time:.4f} seconds")
print(f"In-place operation time: {inplace_time:.4f} seconds")
print(f"Speedup: {copy_time / inplace_time:.1f}x")
print(f"Results are equal: {np.allclose(result1, c)}")

# Vectorization vs loops
print("\n=== Vectorization Performance ===")

def compute_distances_loop(points):
    """Compute pairwise distances using loops"""
    n = len(points)
    distances = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            distances[i, j] = np.sqrt(np.sum((points[i] - points[j])**2))
    return distances

def compute_distances_vectorized(points):
    """Compute pairwise distances using broadcasting"""
    diff = points[:, np.newaxis, :] - points[np.newaxis, :, :]
    return np.sqrt(np.sum(diff**2, axis=2))

# Test with small dataset for timing
test_points = np.random.randn(100, 3)

# Loop version
start_time = time.time()
dist_loop = compute_distances_loop(test_points)
loop_time = time.time() - start_time

# Vectorized version
start_time = time.time()
dist_vectorized = compute_distances_vectorized(test_points)
vectorized_time = time.time() - start_time

print(f"Loop-based computation: {loop_time:.4f} seconds")
print(f"Vectorized computation: {vectorized_time:.4f} seconds")
print(f"Speedup: {loop_time / vectorized_time:.1f}x")
print(f"Results are equal: {np.allclose(dist_loop, dist_vectorized)}")

# NumPy vs pure Python performance
print("\n=== NumPy vs Pure Python ===")

def python_sum_squares(arr):
    """Pure Python implementation"""
    return sum(x**2 for x in arr)

def numpy_sum_squares(arr):
    """NumPy implementation"""
    return np.sum(arr**2)

# Test data
test_data = np.random.randn(100000)
python_list = test_data.tolist()

# Python version
start_time = time.time()
result_python = python_sum_squares(python_list)
python_time = time.time() - start_time

# NumPy version
start_time = time.time()
result_numpy = numpy_sum_squares(test_data)
numpy_time = time.time() - start_time

print(f"Pure Python: {python_time:.4f} seconds")
print(f"NumPy: {numpy_time:.4f} seconds")
print(f"NumPy speedup: {python_time / numpy_time:.1f}x")
print(f"Results match: {abs(result_python - result_numpy) < 1e-10}")

# Clean up
import os
if os.path.exists(filename):
    os.remove(filename)

print("\n=== Memory and Performance Summary ===")
print("1. Use appropriate data types (float32 vs float64, int32 vs int64)")
print("2. Leverage sparse matrices for sparse data")
print("3. Understand views vs copies")
print("4. Use memory mapping for large datasets")
print("5. Prefer in-place operations when possible")
print("6. Vectorize operations instead of loops")
print("7. NumPy is typically 10-100x faster than pure Python")