# Wstęp do analizy danych i uczenia maszynowego
## 1. Wykresy w matplotlib i seaborn

In [None]:
# Import bibliotek
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="darkgrid")

import warnings
warnings.filterwarnings("ignore")

In [None]:
diamonds = sns.load_dataset("diamonds")
diamonds.head()

In [None]:
diamonds.info()

Dokumentacje matplotlib i seaborn:
- https://matplotlib.org/stable/contents.html
- https://seaborn.pydata.org/

### Środowisko subplots

In [None]:
plt.figure(figsize=(6, 5))
sns.histplot(data=diamonds, x='carat', bins=30, kde=True)
plt.title("Rozkład masy diamentów")
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
sns.histplot(data=diamonds, x='carat', bins=30, kde=True, ax=ax)
ax.set_title("Rozkład masy diamentów")
ax.set_xlabel("Masa (carat)")
ax.set_ylabel("Liczba diamentów")
plt.show()

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(12, 10))

for i, cut in enumerate(diamonds['cut'].unique()):
    subset = diamonds[diamonds['cut'] == cut]
    sns.histplot(subset, x='carat', bins=30, kde=True, ax=ax[i//2, i%2])
    ax[i//2, i%2].set_title(f"Rozkład masy diamentów o szlifie: {cut}")
    ax[i//2, i%2].set_xlabel("Masa (carat)")
    ax[i//2, i%2].set_ylabel("Liczba diamentów")

plt.tight_layout()
plt.show()

### Wykresy

In [None]:
# Box plot
fig, ax = plt.subplots(figsize=(6, 5))
sns.boxplot(data=diamonds, x='cut', y='price', ax=ax)
ax.set_title("Box plot ceny diamentów w zależności od szlifu")
ax.set_xlabel("Szlif")
ax.set_ylabel("Cena")
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(14, 5))
sns.boxplot(data=diamonds, x='cut', y='price', hue='color', ax=ax)
ax.set_title("Box plot ceny diamentów w zależności od szlifu", fontsize=16)
ax.set_xlabel("Szlif")
ax.set_ylabel("Cena")
ax.legend(title='Kolor', bbox_to_anchor=(1, 1), loc='upper left')
plt.show()

In [None]:
# Violin plot
fig, ax = plt.subplots(figsize=(6, 5))
sns.violinplot(data=diamonds, y='depth', ax=ax)
ax.set_title("Violin plot głębokości diamentów")
ax.set_ylabel("Głębokość (%)")
plt.show()

In [None]:
# Scatter plot
fig, ax = plt.subplots(figsize=(6, 5))
sns.scatterplot(data=diamonds, x='carat', y='price', hue='cut', ax=ax)
ax.set_title("Scatter plot: Masa vs Cena diamentów")
ax.set_xlabel("Masa (carat)")
ax.set_ylabel("Cena")
plt.show()

In [None]:
fig, ax = plt.subplots(4, 2, figsize=(8, 16))

for i, color in enumerate(sorted(diamonds['color'].unique())):
    subset = diamonds[diamonds['color'] == color]
    sns.scatterplot(data=subset, x='carat', y='price', hue='cut', ax=ax[i//2, i%2])
    ax[i//2, i%2].set_title(f"Kolor: {color}")
    ax[i//2, i%2].set_xlabel("Masa (carat)")
    ax[i//2, i%2].set_ylabel("Cena")
    ax[i//2, i%2].get_legend().remove()

plt.suptitle("Scatter plot: Masa vs Cena diamentów w zależności od koloru", fontsize=16, y=1.02)
handles, labels = ax[0,0].get_legend_handles_labels()
fig.legend(handles, labels, title='Szlif', ncol=5, loc='upper center', bbox_to_anchor=(0.5, 1.01))
plt.tight_layout()
plt.show()