Oceanography python bootcamp, Winter 2025
# Week 3 notebook

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

In [None]:
import week3_magic as magic

## Numpy array

### Specifying data type

In [None]:
# numpy guessed that you want an integer array
X = np.array([1,2,3])
X.dtype

In [None]:
# explicitly create a floating-point array
Y = np.array([1,2,3], dtype=float)
Y.dtype

### Basic and advanced indexing

In [None]:
x = np.array(list(range(1,20,2)))

In [None]:
# Basic indexing using start:stop:step
y = x[1::5]
print(y)

In [None]:
# assign using slice

x[1::5] = [-1, -2]
print(x)

In [None]:
x = np.array(list(range(1,20,2)))

In [None]:
# advanced indexing by boolean
x[x % 5 == 3]

In [None]:
# NOTE that x % 5 == 1 is a boolean array
x % 5 == 1

In [None]:
# advanced indexing using an integer array
x[[2, 5, 2]]

In [None]:
# warning: memory sharing between original and sliced array

x = np.array(list(range(1, 20, 2)))
y = x[::3]
y[2:4] = [-10, -20]

print(x)

In [None]:
# check potential memory sharing
np.may_share_memory(x, y)

In [None]:
# avoid shared memory by copying
z = y.copy()
np.may_share_memory(x, z)

In [None]:
# in contrast, advanced indexing creates copy

x = np.array(list(range(1, 20, 2)))
y = x[[0, 3, 6, 9]]
y[2:4] = [-10, -20]

print(x)

In [None]:
x = np.array([1, 4, 9, 16, 25, 36])
y = x[x % 4 == 0]

y[:2] = 5

x[x % 4 == 0] = [-1, -4, -9]
x = x[:3]

print(x, y)

### Numpy creation functions

In [None]:
np.linspace(1, 10, 10)

In [None]:
np.geomspace(1, 10, 5)

In [None]:
np.arange(1, 5, 0.5)

In [None]:
np.zeros(5)

In [None]:
np.full(5, 10)

In [None]:
np.concatenate([np.arange(5), np.full(3, 5)])

### Numpy arithmetics

In [None]:
# between an array and a scalar
np.arange(5) * 5

In [None]:
# between two numpy array
np.arange(5) + np.geomspace(1, 100, 5)

In [None]:
# logical operation

x = np.arange(-5, 5)
np.logical_and(x > 0, x % 2==0)

In [None]:
# alternative form for logical operation
# Note that you needs the brackets to override default precedence
(x > 0) & (x % 2==0)

### numpy mapping

In [None]:
# exponential
np.exp(np.arange(5))

In [None]:
# algorithm
np.log(np.geomspace(1, 10, 5))

In [None]:
# trigonometric
np.sin(np.pi * np.arange(0, 2.05, 0.25))

In [None]:
# rounding
np.round(np.e * np.arange(10), 2)

### numpy reduction

In [None]:
# maximum
x = np.array([1, 3, 4, 0, -6, -8])

print(np.max(x), np.argmax(x))

In [None]:
# mean
x = np.array([2, 6, 4, 3])
np.mean(x)

### numpy miscellaneous

In [None]:
# sorting
x = np.array([1, -1, 2, -2, 3, -3])
np.sort(x)

In [None]:
# index in the original array of the sorted value
np.argsort(x)

In [None]:
# one-argument form of np.where
np.where(x > 0)

In [None]:
# three-arguments form of np.where
np.where(x > 0, 1, -1)

----

_**Code writing #2.**_ numpy functions

1. Use numpy to calculate the arithmetic mean of the geometric sequence [1, 2, …, 2<sup>10</sup>]
1. Use numpy to find the maximum of sin(n), where n = 1, …, 20. Find also the value of n where the maximum is attained
1. Use numpy to find the values of n for which cos(n) is positive, where n = 1, …, 20


In [None]:
# part 1

In [None]:
# part 2

In [None]:
# part 3

## Matplotlib basics

### Simple matplotlib plot

In [None]:
# set data

x = np.linspace(0, 1, 100)
y = np.sqrt(x)

In [None]:
# a very simple plot

fig = plt.figure()
ax = fig.add_subplot()

ax.plot(x, y)

plt.show(fig)

In [None]:
# use the alternative global interface
plt.plot(x, y)
plt.show()

### Adding plot elements

In [None]:
# set title, as well as x, and y labels
# change font size of title, labels, and ticks

fig = plt.figure(figsize=(8, 4))
ax = fig.add_subplot()

#### << START NEW CONTENT ####
ax.set_title("The square root function", loc="left", fontsize=20)
ax.set_xlabel("x", fontsize=16)
ax.set_ylabel("y = $\\sqrt{x}$", fontsize=16)

ax.set_xticks(0.1 * np.arange(0, 11), minor=True)
ax.set_yticks(0.1 * np.arange(0, 11), minor=True)
ax.tick_params("both", labelsize=14)
#### >> END NEW CONTENT ####

ax.plot(x, y)

plt.show(fig)

In [None]:
# add grid

fig = plt.figure()
ax = fig.add_subplot()

ax.set_title("The square root function", loc="left", fontsize=20)
ax.set_xlabel("x", fontsize=16)
ax.set_ylabel("y = $\\sqrt{x}$", fontsize=16)

ax.set_xticks(0.1 * np.arange(0, 11), minor=True)
ax.set_yticks(0.1 * np.arange(0, 11), minor=True)
ax.tick_params("both", labelsize=14)

#### << START NEW CONTENT ####
plt.grid(which="major", axis="both")
plt.grid(which="minor", axis="both", ls=":")
#### >> END NEW CONTENT ####

ax.plot(x, y)

plt.show(fig)

In [None]:
# change figure size

fig = plt.figure(figsize=(8,4)) #### NEW CONTENT THIS LINE ####
ax = fig.add_subplot()

ax.set_title("The square root function", loc="left", fontsize=20)
ax.set_xlabel("x", fontsize=16)
ax.set_ylabel("y = $\\sqrt{x}$", fontsize=16)

ax.set_xticks(0.1 * np.arange(0, 11), minor=True)
ax.set_yticks(0.1 * np.arange(0, 11), minor=True)
ax.tick_params("both", labelsize=14)

plt.grid(which="major", axis="both")
plt.grid(which="minor", axis="both", ls=":")

ax.plot(x, y)

plt.show(fig)

In [None]:
# change figure size

fig = plt.figure()
ax = fig.add_subplot()

ax.set_title("The square root function", loc="left", fontsize=20)
ax.set_xlabel("x", fontsize=16)
ax.set_ylabel("y = $\\sqrt{x}$", fontsize=16)

ax.set_xticks(0.1 * np.arange(0, 11), minor=True)
ax.set_yticks(0.1 * np.arange(0, 11), minor=True)
ax.tick_params("both", labelsize=14)

#### << START NEW CONTENT ####
ax.set_aspect("equal")
#### >> END NEW CONTENT ####

plt.grid(which="major", axis="both")
plt.grid(which="minor", axis="both", ls=":")

ax.plot(x, y)

plt.show(fig)

In [None]:
# setting axes limits

fig = plt.figure()
ax = fig.add_subplot()

ax.set_title("The square root function", loc="left", fontsize=20)
ax.set_xlabel("x", fontsize=16)
ax.set_ylabel("y = $\\sqrt{x}$", fontsize=16)

#### << START NEW CONTENT ####
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
#### >> END NEW CONTENT ####

ax.set_xticks(0.1 * np.arange(0, 11), minor=True)
ax.set_yticks(0.1 * np.arange(0, 11), minor=True)
ax.tick_params("both", labelsize=14)

ax.set_aspect("equal")

plt.grid(which="major", axis="both")
plt.grid(which="minor", axis="both", ls=":")

ax.plot(x, y)

plt.show(fig)

In [None]:
# plot and x and y axes in log-log scale

fig = plt.figure()
ax = fig.add_subplot()

#### << START NEW CONTENT ####
ax.set_xscale("log")
ax.set_yscale("log")
#### >> END NEW CONTENT ####

ax.set_title("The square root function", loc="left", fontsize=20)
ax.set_xlabel("log$_{10}$(x)", fontsize=16)
ax.set_ylabel("log$_{10}$(y)", fontsize=16)

ax.set_xticks(0.1 * np.arange(0, 11), minor=True)
ax.set_yticks(0.1 * np.arange(0, 11), minor=True)
ax.tick_params("both", labelsize=14)

plt.grid(which="major", axis="both")
plt.grid(which="minor", axis="both", ls=":")

ax.plot(x[1:], y[1:])

plt.show(fig)

In [None]:
# subscript and superscript

fig = plt.figure(figsize=(10, 1))
ax = fig.add_subplot()

ax.set_title('Text $^{superscript}_\\mathregular{subscript}$ using \\$', fontsize=20)
ax.set_xlabel("proportion CO$_2$", fontsize=16)

plt.show()

----

_**Code writing #3**_: Try your best to reproduce the plot below

![plot of sinusoidal function](https://github.com/wingho-uw/python-bootcamp-wk3/blob/main/img/sin_plot.svg?raw=true)

### Figure export

In [None]:
# make sure svg export retains text

plt.rcParams['svg.fonttype'] = 'none'

In [None]:
# set data

x = np.linspace(0, 1, 100)
y = np.sqrt(x)

In [None]:
# Export the square root plot

fig = plt.figure()
ax = fig.add_subplot()

ax.set_title("The square root function", loc="left", fontsize=20)
ax.set_xlabel("x", fontsize=16)
ax.set_ylabel("y = $\\sqrt{x}$", fontsize=16)

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)

ax.set_xticks(0.1 * np.arange(0, 11), minor=True)
ax.set_yticks(0.1 * np.arange(0, 11), minor=True)
ax.tick_params("both", labelsize=14)

ax.set_aspect("equal")

plt.grid(which="major", axis="both")
plt.grid(which="minor", axis="both", ls=":")

ax.plot(x, y)

#### << START NEW CONTENT ####
fig.savefig("square_root.svg", dpi=300)
#### >> END NEW CONTENT ####

plt.show(fig)

### Plot types

In [None]:
# line plot with markers

x_data = np.linspace(0, 2, 11)
y_data = np.sqrt(x_data)

fig = plt.figure()
ax = fig.add_subplot()

ax.plot(x_data, y_data, c="olive", marker="s", lw=2, ls="--")

plt.show(fig)

In [None]:
# scatter plot

x_data = magic.normal_2a.copy()
y_data = magic.normal_1a.copy()

fig = plt.figure()
ax = fig.add_subplot()

ax.scatter(x_data, y_data, s=10, c="green", marker="x")

plt.show(fig)

In [None]:
# error bar plot

x_data = np.arange(0, 5.1, 0.5)
y_data = np.sqrt(x_data)
y_err = np.array([
    [0.4, 0.3, 0.3, 0.2, 0.4, 0.4, 0.3, 0.2, 0.2, 0.3, 0.3], # lower
    [0.1, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, 0.2] 
])
x_err = 0.1

fig = plt.figure()
ax = fig.add_subplot()

plt.errorbar(
    x_data, y_data, yerr = y_err, xerr = x_err, 
    color="blue", marker="o", lw=1, ls="--", 
    ecolor="black", elinewidth=2, capsize=5.0
)

plt.show(fig)

In [None]:
# fill-between plot

x_data = np.linspace(0, 10, 101)
y_data = 3 + 0.5 * x_data

y_err = 0.3 + 0.02 * (x_data - 5)**2

fig = plt.figure()
ax = fig.add_subplot()

ax.fill_between(x_data, y_data - y_err, y_data + y_err, 
                color="blue", edgecolor="red", ls="--", lw=2, alpha=0.2)

plt.show(fig)

In [None]:
# bar plot with error bars
# different color for each bar

x_data = np.arange(0, 5)
y_data = np.array([6, 5, 9, 4, 7])

y_err = np.array([0.7, 0.4, 0.3, 0.5, 0.2])

fig = plt.figure()
ax = fig.add_subplot()

ax.bar(
    x_data, y_data, align="center", width=0.8,
    tick_label = ["apple", "banana", "lemon", "grape", "orange"],
    color=["tab:blue", "tab:orange", "tab:green", "tab:red", "tab:purple"],
    yerr = y_err, ecolor="black", capsize=5
)

plt.show(fig)

### Multiple data series; plot annotations

In [None]:
# multiple data series

x_fit = np.array(np.arange(0, 11))
y_fit = 2.0 + 0.8 * x_fit

x_samples = magic.uniform_10.copy()
y_samples = magic.linear.copy()

fig = plt.figure()
ax = fig.add_subplot()

ax.set_ylim(0, 11)

ax.plot(x_fit, y_fit, ls="--", c="blue", label="best fit")
ax.scatter(x_samples, y_samples, marker="x", c="orange", label="data")

ax.legend(loc=2)

plt.show(fig)

In [None]:
# Plot annotations: lines

xdata = magic.normal_2a.copy()
ydata = magic.normal_1a.copy()

fig = plt.figure()
ax = fig.add_subplot()

ax.set_aspect(1)
ax.set_xlim(-6, 6)
ax.set_ylim(-3, 3)

ax.axhline(color="gray", ls="-.")
ax.axvline(2, color="gray", ls=":")
ax.axline((-4, -2), (-2, 1), color="purple", lw=1)

ax.hlines(-1, -4, 0, color="red", lw=3)

ax.scatter(xdata, ydata)

plt.show(fig)

In [None]:
# Plot anootations: shades

fig = plt.figure()
ax = fig.add_subplot()

ax.set_aspect(1)
ax.set_xlim(-6, 6)
ax.set_ylim(-3, 3)

ax.axhspan(0, 1, color="yellow", edgecolor=None, alpha=0.5)
ax.axvspan(-2, -1, color="palegreen", edgecolor=None, alpha=0.5)

ax.scatter(xdata, ydata)

plt.show(fig)

In [None]:
# Plot annotations: text and arrow
x1_data = np.linspace(0, 2, 101)
y1_data = np.sqrt(x1_data)

fig = plt.figure()
ax = fig.add_subplot()

ax.scatter(1, 1, c="k")

# supported math fonts are 'dejavusans', 'dejavuserif', 
# 'cm', 'stix', 'stixsans'"
ax.text(
    1, 0.2, "$f(x) = \\sqrt{x}$", fontsize=16,
)

# for arrow style, see documentation on FancyArrowPatch
ax.annotate(
    "(1.0, 1.0)", xy=(1,1), xytext=(1.25, 0.8),
    arrowprops={"arrowstyle": "->", "connectionstyle": "Angle3"},
    fontsize=14
)

ax.plot(x1_data, y1_data)
plt.show()

In [None]:
# two different y scales on the same plot

x1_data = np.linspace(0, 10, 101)

y1_data = x1_data**2 * np.exp(x1_data)
y2_data = 3 * x1_data

fig = plt.figure()
ax1 = fig.add_subplot()

# new axes object that share the x scale
ax2 = ax1.twinx()

ax1.set_yscale("log")
ax1.set_xlabel("independent variable")
ax1.set_ylabel("log scale", color="red")
ax1.tick_params(axis="y", colors="red")
line1 = ax1.plot(x1_data, y1_data, c="red", ls="-.", label="$y = x^2 e^x$")

ax2.set_ylabel("linear scale", color="blue")
ax2.tick_params(axis="y", colors="blue")
line2 = ax2.plot(x1_data, y2_data, color="blue", label="$y = 3x$")

ax2.annotate("", xy=(3, 18), xytext=(3.6, 18),
    color="red", arrowprops={"arrowstyle": "->", "color": "red"}
)
ax2.annotate("", xy=(5, 12), xytext=(4.4, 12),
    color="red", arrowprops={"arrowstyle": "->", "color": "blue"}
)

lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2)

plt.show()

----

_**Exercise #4.**_ Plotting probability distribution

In [None]:
# load the probability mass function
pmf_data = magic.pmf_values.copy()

In [None]:
# Your code to calculate the cumulative distribution function

In [None]:
# Your code to plot both the pmf and the cdf
# Hint: one step at a time, first make sure you can plot just the pmf

### Insets and multiple axes

In [None]:
# Plot with an inset

x = np.linspace(-1, 3, 101)
y = magic.twopeaks(x)

x2 = np.linspace(1.4, 1.6, 101)
y2 = magic.twopeaks(x2)

fig = plt.figure()

ax = fig.add_axes([0.1, 0.15, 0.8, 0.7])
ax.plot(x, y)

ax2 = fig.add_axes([0.55, 0.5, 0.3, 0.3])
ax2.plot(x2, y2)

plt.show()

In [None]:
# multiple plots on the same figure using .subplots()

# loading data
x_data = magic.random.copy()
y00_data = magic.uncorr.copy() 
y10_data = magic.nonlinear.copy() 
y01_data = magic.positive.copy()
y11_data = magic.negative.copy()
y02_data = magic.Positive.copy()
y12_data = magic.Negative.copy()

# create figure and axes (axes a 2 x 3 array)
fig, axes = plt.subplots(
    2, 3, figsize=(9, 6),
    sharex = True, # share x-axes across all graphs
    sharey = True, # share y-axes across all graphs
)

# NOTE: another useful sharex, sharey setting is:
# sharex='col', # share x-axes within columns
# sharey='row', # share y-axes within rows

# figure level title
fig.suptitle("Correlations")

# set axes limit for all subplots
axes[0,0].set_xlim(-3, 3)
axes[0,0].set_ylim(-3, 3)

# set axes ticks for all subplots
axes[0,0].set_xticks(np.arange(-3, 3.1))
axes[0,0].set_yticks(np.arange(-3, 3.1))

# 1st row, 1st column
axes[0,0].set_title("uncorrelated")
axes[0,0].scatter(x_data, y00_data)

# 2nd row, 1st column
axes[1,0].set_title("non-linear")
axes[1,0].scatter(x_data, y10_data)

# 1st row, 2nd column
axes[0,1].set_title("weakly positive")
axes[0,1].scatter(x_data, y01_data)

# 2nd row, 2nd column
axes[1,1].set_title("weakly negative")
axes[1,1].scatter(x_data, y11_data)

# 1st row, 3rd column
axes[0,2].set_title("strongly positive")
axes[0,2].scatter(x_data, y02_data)

# 2nd row, 3rd column
axes[1,2].set_title("strongly negative")
axes[1,2].scatter(x_data, y12_data)

plt.show()

In [None]:
# multiple plots on the same figure using .Gridspec()

#### Defining / loading data to plot

x = np.linspace(0, 1, 101)
sqrtx = np.sqrt(x)
sinx = np.sin(4 * np.pi * x)

x_value = np.arange(1, 4)
x_label = ["apple", "lemon", "orange"]
x_color = ["red", "gold", "orange"]
y_count = np.array([5, 7, 4])

x_sample = magic.normal_2a.copy()
y_sample = magic.normal_2b.copy()

#### Plotting

# create figure and grid
fig = plt.figure(figsize=(9, 9))
gs = mpl.gridspec.GridSpec(3, 3)

# title for the entire figure
fig.suptitle("Subplots using GridSpec", fontsize=18)

# create axes and hang them on the grid
ax0 = fig.add_subplot(gs[0,0]) # top left, 1-by-1 space
ax1 = fig.add_subplot(gs[0,1:]) # top right, 1-by-2 space
ax2 = fig.add_subplot(gs[1:,:2]) # bottom left, 2-by-2 space
ax3 = fig.add_subplot(gs[1:,2]) # bottom right, 2-by-1 space

# top left plot
ax0.bar(x_value, y_count, tick_label=x_label,  color=x_color)

# top right plot
ax1.fill_between(x, 0, sqrtx, color="lime", edgecolor=None, alpha=0.5)
ax1.plot(x, sqrtx, c='g')

# bottom left plot
ax2.set_xlim(-6, 6)
ax2.set_ylim(-6, 6)
ax2.scatter(x_sample, y_sample, color="black", marker="x")

# bottom right plot
ax3.grid(which="both")
ax3.set_xticks(np.arange(-1, 1.1, 0.5))
ax3.set_yticks(np.arange(0, 1, 0.1), minor=True)
ax3.plot(sinx, x, c="b")

plt.show()

----

_**Code writing #5.**_ 2D-sample and histogram on the same figure


You are given a set of 2D samples (coordinates specified in `magic.samples_x` and `magic.samples_y`) and their bin counts (counts stored in `magic.x_count` and `magic.y_count`, bin edges stored in `magic.bins`). You would like to, in a single figure, plot the scatter plot of the samples at the bottom left, together with the x and y coordinates histogram on the top and right edges, respectively.

An example of the expected output, in the case when the samples are normal (which is **different** from your data!) is provided to you below for reference. (In addition to difference in data, your plot may also differ in details, e.g., you may or may not eliminate the empty figure on top right.)

![scatter_plot and x, y coordiante histograms on the same figure](https://github.com/wingho-uw/python-bootcamp-wk3/blob/main/img/scatter_and_histogram.svg?raw=true)

In [None]:
# starter code: loading the data

samples_x = magic.samples_x.copy()
samples_y = magic.samples_y.copy()

# note that bins has one more element than counts
bins = magic.bins.copy()
x_counts = magic.x_counts.copy()
y_counts = magic.y_counts.copy()

In [None]:
# create your figure here