# Graficke knihovny - matplotlib, ggplot, plotly

- matplotlib: `pip install matplotlib`
- ggplot: `pip install plotnine`
- plotly: `pip install plotly`

### Poznámky

- `plotnine` není kompletní a některé věci, které znáte z `ggplot` v R v `plotnine` prostě nefungují. Aktuální verze je 0.8 a je tomu tak už delší dobu.
- `plotnine` (resp. `ggplot`) si dobře rozumí s `pandas` DataFrames, takže některé příklady, které tu ukazuji, jsou poněkud krkolomné a s využitím DataFrames budou nejspíš trochu snazší

## line plot a obecna nastaveni

In [None]:
# komplexni priklad s numpy
# vytvoreni figure a prislusnych axes
# postupne naplneni axes daty
# obarveni vybranou barevnou mapou
# legenda
# ulozeni s konkretnim rozlisenim

from waves import wave
import matplotlib.pyplot as plt
import numpy as np

size = (1920, 1920)
mydpi = 100
figsize = [s / mydpi for s in size]

fig,  axs = plt.subplots(3,2, figsize = figsize)

(ax1, ax2, ax3) = axs

a = 0.0
b = 10.0
N = 10
f0 = 1
x = np.linspace(a, b, 1000)

cmap = plt.get_cmap("hot")
colors = cmap(np.linspace(0, 1, N+1))



square = wave(x, a = a, b = b, f0 = f0, N = N, sum = False, kind = "square")
sawtooth = wave(x, a = a, b = b, f0 = f0, N = N, sum = False, kind = "sawtooth")
triangle = wave(x, a = a, b = b, f0 = f0, N = N, sum = False, kind = "triangle")

all_data = [square, sawtooth, triangle]

for i in range(0, N , 2):
    for ax, data in zip(axs, all_data):
        ax[0].plot(x, data[i], label = "N = %d" % (i+1), color = colors[i])
        ax[1].plot(x, np.sum(data[:i+1], 0), color = colors[i])

for ax in axs.flat:
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.label_outer()

axs[0,0].legend()

fig.suptitle("Wave generation")
plt.savefig("waves.png", figsize = figsize, dpi = mydpi)
plt.show()

## Scatterplot

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mc
import numpy as np

# predani dat pres slovnik (data)
#

data = {
    'a': np.arange(50),
    'c': np.random.randint(0, 50, 50),
    'd': np.random.randn(50)
}

data['b'] = data['a'] + 10 * np.random.randn(50)
data['d'] = np.abs(data['d']) * 100

fig = plt.Figure()


ax = plt.subplot(111)

# vlastni barevna mapa ze seznamu barev, barvy ve formatu (r, g, b, alpha), vse mezi 0 a 1
cmap = mc.LinearSegmentedColormap.from_list("mapa", [(1.0, 0.0, 0.0, 0.3), (0.0, 1.0, 0.0, 0.6), (0.0, 0.0, 1.0, 0.9)])

# c = pole barev (neni totez co color)
# s =  pole velikosti
sc = ax.scatter('a', 'b', c ='c', s ='d', data=data, cmap = cmap)

ax.set_xlabel('entry a')
ax.set_ylabel('entry b')
plt.colorbar(sc)
plt.show()

In [None]:
from plotnine import *

p = (
    ggplot()
    + aes(x = data['a'], y = data['b'])
    + geom_point(size = data['d'] * 0.1, mapping = aes(color = data['c']))
    + labs(color = "", x = "entry a", y = "entry b", title = "Scatterplot")
    + theme_bw()
    + scale_color_gradientn(colors = ["#FF0000", "#00FF00", "#0000FF"])
).draw()

## Category plot

In [None]:
names = ['group_a', 'group_b', 'group_c']
values = [1, 10, 100]

fig = plt.figure(figsize=(9, 3))

ax1 = plt.subplot(131)
ax2 = plt.subplot(132)
ax3 = plt.subplot(133)

ax1.bar(names, values)
ax1.set_ylabel("hodnoty")

ax2.scatter(names, values)
ax2.set_xlabel("kategorie")

ax3.plot(names, values)

fig.suptitle('Categorical Plotting')
plt.show()

- multiplot se v R obvykle řeší použitím dalších balíčků (např. patchwork). To zatím v plotnine chybí a můžeme tedy dělat pouze samostatné grafy

In [None]:
from plotnine import *
base = (
    ggplot()
    + aes(x = names, y = values)
)

ps = []
for geom in [geom_col, geom_point, geom_path]:
    ps.append(base + geom(aes(x = names, y = values), group = 1))
    
for p in ps:
    (p + theme_bw()).draw()

## Barplot a piechart (a vlastne taky category plot)

In [None]:
strany = ["KSČM", "ANO", "TOP09", "Piráti", "ČSSD", "Zelení"]
barvy = ["red", "blue", "purple", "black", "orange", "green"]

hlasy = np.random.randint(0,150, len(strany))

explode = [0] * len(strany)
explode[np.argmax(hlasy)] = 0.2

fig, axs = plt.subplots(1, 2, figsize = (16, 6))

axs[0].bar(strany, hlasy, color = barvy)
axs[0].set_ylabel("hlasy (tis.)")

axs[1].pie(hlasy, labels = strany, shadow = True, explode = explode, colors = barvy)

fig.suptitle("Parlamentní volby 2021")

plt.show()

- piechart v plotnine není

In [None]:
from plotnine.data import *

import pandas as pd

df = pd.DataFrame({
    "strany" : pd.Categorical(strany, categories = strany),
    "hlasy"  : hlasy,
    "barvy"  : barvy
    })

colormap = { s : b for s, b in zip(strany, barvy)}

p = (
    ggplot(df)
    + aes("strany", "hlasy", fill = "strany")
    + geom_col(show_legend = False)
    + labs(y = "hlasy (tis.)")
    # + coord_flip()
    + scale_fill_manual(values = colormap)
    + theme_classic()
).draw()

## Boxplot - statisticky prehled o souboru dat

In [None]:
def generate_random_set():
    spread = np.random.rand(50) * 100 # 50 nahodnych hodnot mezi 0 a 100
    center = np.ones(25) * 50 # 50 jednicek
    flier_high = np.random.rand(10) * 100 + 100 # 10 nahodnych hodnot mezi 100 a 200
    flier_low = np.random.rand(10) * -100 # 10 nahodnych hodnot mezi -100 a 0
    data = np.concatenate((spread, center, flier_high, flier_low)) # vsechno do jednoho pole
    return data

data = {x:generate_random_set() for x in ["dataset1", "dataset2", "dataset3"]}



In [None]:
fig, ax = plt.subplots()

ax.set_title('Boxplot')
ax.boxplot(list(data.values()),
    vert = True,
    notch = True,
    showfliers = True,
    meanline = True,
    showmeans = True,
    labels = list(data.keys())
)

plt.show()

In [None]:
from plotnine import *

import pandas as pd

# plotnine predpoklada vyznamne odlisnou strukturu dat (vykresli jednotlive datasety podle hodnoty v jinem sloupci)
# nepodarilo se mi to nakreslit jinak, nez pomoci nasledujiciho DataFrame
df = pd.concat([pd.DataFrame({"value" : val, "set" : key}) for key, val in data.items()])

p = (
    ggplot(df)
    + aes("set", "value")
    + geom_boxplot(notch = True)
    # + coord_flip() 
    + theme_matplotlib()
).draw()

## 2D pole hodnot - imshow

In [None]:
# prakticky vykresleni obrazku po jednotlivych pixelech
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(-np.pi, np.pi, 101)

# meshgrid vyrobi 2D pole nezavislych promennych. Uzitecne pro vycisleni funkci dvou promennych
XX, YY = np.meshgrid(x, x)
RR = np.sqrt(XX**2 + YY**2)
z = np.where(RR > 0, np.sin(3 * RR) / RR, 1.0)

cmap = plt.get_cmap("hot")

fig = plt.Figure()
plt.imshow(z, cmap = cmap)

plt.show()

- toto se mi v plotnine nepovedlo