# Supplementary #1: Static Data Visualization

In [1]:
# connect to Google Colab

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:

# import necessary libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Line
1. Straight line
2. Parabola
3. Sine
---

#### Straight Line
- y = mx + c; where m = slope, c = intercept

In [None]:
# define data values
x = np.array([1, 2, 3, 4])  # X-axis points
y = x*1  # Y-axis points
print('(x,y): ', x, y)

plt.plot(x, y)  # Plot the chart

plt.title("Line graph example")
plt.xlabel("X axis")
plt.ylabel("Y axis")
plt.grid()
plt.show()  # display

Update the slope to 3

In [None]:
y = x*3  # Y-axis points
print('(x,y): ', x, y)

plt.plot(x, y)  # Plot the chart

plt.title("Line graph with updated slope")
plt.xlabel("X axis")
plt.ylabel("Y axis")
plt.grid()
plt.show()  # display

Two lines in the same plot

In [None]:
y = x*1  # First line

plt.plot(x, y)  # Plot the chart
print('(x,y) of plot#1: ', x, y)
# -----------


y = x*3 # Second line
plt.plot(x, y, color = 'red')  # Plot the chart
print('(x,y) of plot#2: ', x, y)

plt.grid()
plt.show()  # display

Negative slope

In [None]:
# Set m = -1
y= x*(-1)
print('(x,y): ', x, y)

plt.plot(x, y)  # Plot the chart
plt.title("Line graph")
plt.xlabel("X axis")
plt.ylabel("Y axis")

plt.grid()
plt.show()  # display

Positive vs Negative slopes

In [None]:
# 2 lines in the same plot
y=x
plt.plot(x, y)  # Plot the chart
print('(x,y) of plot#1: ', x, y)

y=-x
plt.plot(x, -x, color = 'red', linestyle='-.')  # Plot the chart
print('(x,y) of plot#2: ', x, y)

plt.title("Line graph")
plt.xlabel("X axis")
plt.ylabel("Y axis")
plt.grid()
plt.show()  # display

### Parabola

- y = x^2

In [None]:
# data to be plotted
x = np.arange(1, 11)
y = x * x
print('(x,y): ', x, y)

# plotting
plt.title("Parabola graph")
plt.xlabel("X axis")
plt.ylabel("Y axis")
plt.plot(x, y, color ="pink")
plt.grid()
plt.show()

### Sine
Reference: [numpy.sin](https://numpy.org/doc/stable/reference/generated/numpy.sin.html)

In [None]:
#sine
x = np.linspace(-np.pi, np.pi, 201)
y = np.sin(x)
# print('(x,y): ', x, y)

plt.plot(x, y) #can substitute y with np.sin(x) to reduce space
plt.xlabel('Angle [rad]')
plt.ylabel('sin(x)')
plt.axis('tight')
plt.grid()
plt.show()

### Using Pandas

Demonstrate the plot of straight line

In [None]:
# Using pandas
pd_line = pd.Series(x)
# pd_line
pd_line.plot(grid=True)

Suitable for what kind of problem/data:
- Time series
- Trend analysis

## Bar
1. Bar graph
2. Histogram
---

#### Bar Graph
- Categorical data
- Represent value on the y-axis
- Represent category on the x-axis

In [None]:
# general Bar
years = range(2017, 2023)
boys = [35, 62, 91, 84, 65, 86]
girls = [42, 83, 79, 72, 60, 81]

plt.bar(years, boys)
plt.xlabel('Year')
plt.ylabel('Number of Students')
plt.grid()
plt.title("Enrolled Students (Boy)")

From vertical to horizontal

In [None]:
# From vertical to horizontal
plt.barh(years, boys)
plt.ylabel('Year')
plt.xlabel('Number of Students')
plt.grid()
plt.title("Enrolled Students")

Stacked bar graph

In [None]:
# stacked
plt.bar(years, boys)
plt.bar(years, girls, bottom=boys)
plt.xlabel('Year')
plt.ylabel('Number of Students')
plt.grid()
plt.title("Enrolled Students (Stacked)")

Compare bars side-by-side

In [None]:
# Compare side by side
years = range(2017, 2023)
boys = [35, 62, 91, 84, 65, 86]
girls = [42, 83, 79, 72, 60, 81]

x_axis = np.arange(len(years))
# avoid overlapping of bars in each group, the bars are shifted -0.2 units and +0.2 units from the X-axis.
plt.bar(x_axis-0.2, girls, 0.4, label='Girls') # The width of the bars of each group is taken as 0.4 units.
plt.bar(x_axis+0.2, boys, 0.4, label ='Boys')
plt.xticks(x_axis, years)

plt.xlabel("Year")
plt.ylabel("Number of Students")
plt.title("Number of Students in each group")
plt.legend()
plt.show()

#### Histogram
- Used to plot data over a range of values or frequency distribution
- Is a bar representation to show the data belonging to each range

In [None]:
# generate normal data distribution
x = np.random.normal(170, 10, 250)
print(x)

Call the function `hist()`

In [None]:
# use hist()function
plt.hist(x)
plt.show()

#### Using Pandas

Demonstrate the plot of bar graph and histogram

In [None]:
# Using pandas
pd_bar_data = {'years': years,
          'boys': boys,
          'girls': girls}
pd_bar = pd.DataFrame(data=pd_bar_data)
pd_bar


In [None]:
pd_bar.plot(x='years', y='boys', kind='bar', grid=True,
            ylabel='Number of Students', xlabel='Year')

In [None]:
pd_bar.plot(x='years', y='boys', kind='barh', grid=True,
            ylabel='Year', xlabel='Number of Students')

In [None]:
# Histogram
x = np.random.normal(170, 10, 250)
pd_hist = pd.DataFrame(data = x)
# pd_hist

pd_hist.hist(grid=False)

## Pie Chart

- Used to represent sample data
- Considered as an alternative to the stacked bar chart (only for one bar)

In [None]:
y = np.array([35, 25, 25, 15])
mylabels = ["Apples", "Bananas", "Cherries", "Dates"]

plt.pie(y, labels = mylabels) # default angle = 0
plt.show()

Set starting angle

In [None]:
y = np.array([35, 25, 25, 15])
mylabels = ["Apples", "Bananas", "Cherries", "Dates"]

plt.pie(y, labels = mylabels, startangle=90) # set angle = 90
plt.show()

Pie chart via Pandas

In [None]:
pd_pie = pd.DataFrame({'Data': mylabels, 'Labels': y},
                      index=mylabels)

pd_pie.plot.pie(y='Labels')
# pd_pie.plot(kind='pie',y='Labels') # this method also works

## Scatter Plot
- To display the relationship between two variables and observe the nature of the relationship
- To display what happens to one variable when another variable is changed

In [None]:
# generate normal data distribution
x = np.random.normal(170, 10, 250)
y = np.random.normal(170, 10, 250)

plt.scatter(x, y)
plt.show()


Scatter plot via Pandas

In [None]:
pd_scatter = pd.DataFrame({'x':x, 'y':y}, columns=['x', 'y'])
pd_scatter.plot.scatter(x='x', y='y')

Compare two scatter plots

In [None]:
# Compare

# set 1
# generate normal data distribution
x1 = np.random.normal(170, 10, 250)
y1 = np.random.normal(170, 10, 250)
plt.scatter(x1, y1)

# Set 2
x2 = np.random.normal(170, 10, 250)
y2 = np.random.normal(170, 10, 250)
plt.scatter(x2, y2, color = 'red')

plt.show()

Update color

Set cmap='viridis'

In [None]:
# generate normal data distribution
x = np.random.normal(170, 10, 250)
y = np.random.normal(170, 10, 250)
colors = np.arange(250)

# Nice color maps: viridis, plasma, gray, binary, seismic, gnuplot
plt.scatter(x, y, c=colors, cmap='viridis')
plt.colorbar()
plt.show()

Set cmap='plasma'

In [None]:
plt.scatter(x, y, c=colors, cmap='plasma')
plt.colorbar()
plt.show()

Set cmap='gray'

In [None]:
plt.scatter(x, y, c=colors, cmap='gray')
plt.colorbar()
plt.show()

Set cmap='binary'

In [None]:
plt.scatter(x, y, c=colors, cmap='binary')
plt.colorbar()
plt.show()

Set cmap='seismic'

In [None]:
plt.scatter(x, y, c=colors, cmap='seismic')
plt.colorbar()
plt.show()

Set cmap='gnuplot'

In [None]:
plt.scatter(x, y, c=colors, cmap='gnuplot')
plt.colorbar()
plt.show()

## Heatmap
- To see changes in behavior or gradual changes in data
- Different colors are used to represent different values

In [None]:
data = np.random.random((20,20))
plt.imshow(data)

plt.title("Heatmap Example")
plt.show()

Update color and add colorbar

In [None]:
plt.imshow(data, cmap='plasma')
plt.colorbar()
plt.title("Heatmap")
plt.show()

In [None]:
plt.imshow(data, cmap='autumn')
plt.colorbar()
plt.title("Heatmap")
plt.show()

Subplot

In [None]:
fig, ax = plt.subplots(2) #customize number to specify number of subplots
print(fig)
print(ax)

Add annotation

In [None]:
# with annotation

rand_num = np.random.randint(10, size=(5,5))
print(rand_num)
data = rand_num

fig, ax = plt.subplots()

# Loop over data dimensions and create text annotations.
for i in range(5):
    for j in range(5):
        text = ax.text(j, i, data[i, j], ha="center", va="center", color="w")
        # ha is horizontal alignment, va is vertical alignment

plt.imshow(data)

plt.colorbar()
plt.title("Heatmap")
plt.show()

Heeatmap via Pandas

In [None]:
pd_heat = pd.DataFrame(np.random.random((5,5)))


# use seaborn for visualization
import seaborn as sns
sns.heatmap(pd_heat, cmap ='RdYlGn', linewidths = 0.30, annot = True)