In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

print("Seaborn version:", sns.__version__)

# Set style
sns.set_style("whitegrid")

---

## Loading Sample Datasets

Seaborn comes with built-in datasets for practice.

In [None]:
# Load tips dataset
tips = sns.load_dataset('tips')
print("Tips dataset:")
tips.head()

In [None]:
# Load iris dataset
iris = sns.load_dataset('iris')
print("Iris dataset:")
iris.head()

In [None]:
# Load titanic dataset
titanic = sns.load_dataset('titanic')
print("Titanic dataset:")
titanic.head()

---

## 1. Relational Plots

Show relationships between variables using:
- `scatterplot()` - for scatter plots
- `lineplot()` - for line plots
- `relplot()` - figure-level function for both

### 1.1 Scatter Plots

In [None]:
# Basic scatter plot
plt.figure(figsize=(8, 5))
sns.scatterplot(data=tips, x='total_bill', y='tip')
plt.title('Total Bill vs Tip')
plt.show()

In [None]:
# Add color by category (hue)
plt.figure(figsize=(8, 5))
sns.scatterplot(data=tips, x='total_bill', y='tip', hue='time')
plt.title('Total Bill vs Tip (by Time)')
plt.show()

In [None]:
# Add color and style
plt.figure(figsize=(8, 5))
sns.scatterplot(data=tips, x='total_bill', y='tip', 
                hue='time', style='smoker', size='size',
                sizes=(50, 200))
plt.title('Multi-dimensional Scatter Plot')
plt.show()

In [None]:
# Using relplot for faceted plots
g = sns.relplot(data=tips, x='total_bill', y='tip', 
                hue='smoker', col='time',
                kind='scatter', height=4, aspect=1.2)
g.fig.suptitle('Tips by Time of Day', y=1.02)
plt.show()

### 1.2 Line Plots

In [None]:
# Create time series data
np.random.seed(42)
dates = pd.date_range('2024-01-01', periods=100, freq='D')
ts_data = pd.DataFrame({
    'date': dates,
    'value': np.cumsum(np.random.randn(100)),
    'category': np.tile(['A', 'B'], 50)
})

plt.figure(figsize=(12, 5))
sns.lineplot(data=ts_data, x='date', y='value')
plt.title('Time Series Plot')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Line plot with confidence interval
fmri = sns.load_dataset('fmri')

plt.figure(figsize=(10, 5))
sns.lineplot(data=fmri, x='timepoint', y='signal', hue='event')
plt.title('FMRI Signal (with confidence interval)')
plt.show()

---

## 2. Distribution Plots

Visualize the distribution of data:
- `histplot()` - Histogram
- `kdeplot()` - Kernel Density Estimate
- `displot()` - Figure-level distribution plot

### 2.1 Histograms

In [None]:
# Basic histogram
plt.figure(figsize=(8, 5))
sns.histplot(data=tips, x='total_bill', bins=20)
plt.title('Distribution of Total Bill')
plt.show()

In [None]:
# Histogram with KDE
plt.figure(figsize=(8, 5))
sns.histplot(data=tips, x='total_bill', kde=True, bins=20)
plt.title('Distribution with KDE')
plt.show()

In [None]:
# Stacked histogram by category
plt.figure(figsize=(8, 5))
sns.histplot(data=tips, x='total_bill', hue='time', multiple='stack')
plt.title('Stacked Histogram by Time')
plt.show()

### 2.2 KDE Plots

In [None]:
# KDE plot
plt.figure(figsize=(8, 5))
sns.kdeplot(data=tips, x='total_bill', hue='time', fill=True, alpha=0.5)
plt.title('KDE by Time of Day')
plt.show()

In [None]:
# 2D KDE plot
plt.figure(figsize=(8, 6))
sns.kdeplot(data=tips, x='total_bill', y='tip', cmap='Blues', fill=True)
plt.title('2D KDE: Total Bill vs Tip')
plt.show()

### 2.3 Joint Plots

In [None]:
# Joint plot - scatter + marginal distributions
g = sns.jointplot(data=tips, x='total_bill', y='tip', kind='scatter')
g.fig.suptitle('Joint Plot', y=1.02)
plt.show()

In [None]:
# Joint plot with regression line
g = sns.jointplot(data=tips, x='total_bill', y='tip', kind='reg')
g.fig.suptitle('Joint Plot with Regression', y=1.02)
plt.show()

In [None]:
# Hexbin joint plot
g = sns.jointplot(data=tips, x='total_bill', y='tip', kind='hex')
g.fig.suptitle('Hexbin Joint Plot', y=1.02)
plt.show()

### 2.4 Pair Plots

In [None]:
# Pair plot - all pairwise relationships
g = sns.pairplot(iris, hue='species', height=2.5)
g.fig.suptitle('Iris Dataset Pair Plot', y=1.02)
plt.show()

---

## 3. Categorical Plots

Visualize categorical data:
- **Scatter**: `stripplot()`, `swarmplot()`
- **Distribution**: `boxplot()`, `violinplot()`, `boxenplot()`
- **Estimate**: `barplot()`, `countplot()`, `pointplot()`

### 3.1 Strip and Swarm Plots

In [None]:
# Strip plot (jittered scatter)
plt.figure(figsize=(8, 5))
sns.stripplot(data=tips, x='day', y='total_bill', hue='sex')
plt.title('Strip Plot')
plt.show()

In [None]:
# Swarm plot (non-overlapping)
plt.figure(figsize=(8, 5))
sns.swarmplot(data=tips, x='day', y='total_bill', hue='sex')
plt.title('Swarm Plot')
plt.show()

### 3.2 Box and Violin Plots

In [None]:
# Box plot
plt.figure(figsize=(8, 5))
sns.boxplot(data=tips, x='day', y='total_bill', hue='sex')
plt.title('Box Plot')
plt.show()

In [None]:
# Violin plot (box + KDE)
plt.figure(figsize=(8, 5))
sns.violinplot(data=tips, x='day', y='total_bill', hue='sex', split=True)
plt.title('Split Violin Plot')
plt.show()

In [None]:
# Combining violin and swarm
plt.figure(figsize=(10, 5))
sns.violinplot(data=tips, x='day', y='total_bill', inner=None, color='lightgray')
sns.swarmplot(data=tips, x='day', y='total_bill', color='black', size=3)
plt.title('Violin + Swarm Plot')
plt.show()

### 3.3 Bar and Count Plots

In [None]:
# Bar plot (shows mean with confidence interval)
plt.figure(figsize=(8, 5))
sns.barplot(data=tips, x='day', y='total_bill', hue='sex')
plt.title('Bar Plot (Mean with CI)')
plt.show()

In [None]:
# Count plot (frequency)
plt.figure(figsize=(8, 5))
sns.countplot(data=tips, x='day', hue='sex')
plt.title('Count Plot')
plt.show()

In [None]:
# Point plot (shows mean with lines)
plt.figure(figsize=(8, 5))
sns.pointplot(data=tips, x='day', y='total_bill', hue='sex', 
              markers=['o', 's'], linestyles=['-', '--'])
plt.title('Point Plot')
plt.show()

### 3.4 Catplot - Figure-level Categorical

In [None]:
# Using catplot for faceted categorical plots
g = sns.catplot(data=tips, x='day', y='total_bill', 
                hue='sex', col='time',
                kind='box', height=5, aspect=0.8)
g.fig.suptitle('Faceted Box Plots', y=1.02)
plt.show()

---

## 4. Heatmaps

In [None]:
# Correlation heatmap
# Select numeric columns from tips
tips_numeric = tips[['total_bill', 'tip', 'size']]
correlation = tips_numeric.corr()

plt.figure(figsize=(8, 6))
sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0,
            square=True, linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Pivot table heatmap
pivot = tips.pivot_table(values='tip', index='day', columns='time', aggfunc='mean')

plt.figure(figsize=(8, 5))
sns.heatmap(pivot, annot=True, fmt='.2f', cmap='YlGnBu')
plt.title('Average Tip by Day and Time')
plt.show()

---

## 5. Regression Plots

In [None]:
# Regression plot
plt.figure(figsize=(8, 5))
sns.regplot(data=tips, x='total_bill', y='tip')
plt.title('Regression Plot')
plt.show()

In [None]:
# Regression with confidence interval
plt.figure(figsize=(8, 5))
sns.regplot(data=tips, x='total_bill', y='tip', 
            ci=95, scatter_kws={'alpha': 0.5})
plt.title('Regression with 95% CI')
plt.show()

In [None]:
# lmplot - faceted regression
g = sns.lmplot(data=tips, x='total_bill', y='tip', 
               hue='smoker', col='time',
               height=4, aspect=1.2)
g.fig.suptitle('Faceted Regression Plots', y=1.02)
plt.show()

---

## 6. Styling and Themes

In [None]:
# Available styles
styles = ['darkgrid', 'whitegrid', 'dark', 'white', 'ticks']

fig, axes = plt.subplots(1, 5, figsize=(20, 4))

for ax, style in zip(axes, styles):
    with sns.axes_style(style):
        ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
        ax.set_title(style)

plt.tight_layout()
plt.show()

In [None]:
# Color palettes
palettes = ['deep', 'muted', 'pastel', 'bright', 'dark', 'colorblind']

fig, axes = plt.subplots(2, 3, figsize=(15, 8))

for ax, palette in zip(axes.flat, palettes):
    colors = sns.color_palette(palette)
    for i, color in enumerate(colors):
        ax.bar(i, 1, color=color)
    ax.set_title(palette)
    ax.set_xlim(-0.5, len(colors) - 0.5)

plt.tight_layout()
plt.show()

---

## üìù Practice Problems

### Problem 1: Iris Visualization
Using the iris dataset:
1. Create a scatter plot of sepal_length vs sepal_width, colored by species
2. Create box plots of petal_length by species
3. Create a violin plot of petal_width by species

In [None]:
# Your solution here

### Problem 2: Tips Analysis
Using the tips dataset:
1. Create a catplot showing average tip by day, with separate columns for smoker/non-smoker
2. Create a heatmap showing average total_bill by day and time

In [None]:
# Your solution here

### Problem 3: Distribution Comparison
Using the tips dataset:
1. Create overlapping KDE plots of total_bill for Lunch vs Dinner
2. Create a joint plot of total_bill vs tip with a regression line

In [None]:
# Your solution here

---

## ‚úÖ Solutions

### Solution 1: Iris Visualization

In [None]:
# Solution 1
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# 1. Scatter plot
sns.scatterplot(data=iris, x='sepal_length', y='sepal_width', 
                hue='species', ax=axes[0])
axes[0].set_title('Sepal Length vs Width')

# 2. Box plot
sns.boxplot(data=iris, x='species', y='petal_length', ax=axes[1])
axes[1].set_title('Petal Length by Species')

# 3. Violin plot
sns.violinplot(data=iris, x='species', y='petal_width', ax=axes[2])
axes[2].set_title('Petal Width by Species')

plt.tight_layout()
plt.show()

### Solution 2: Tips Analysis

In [None]:
# Solution 2

# 1. Catplot
g = sns.catplot(data=tips, x='day', y='tip', 
                col='smoker', kind='bar',
                height=5, aspect=0.8)
g.fig.suptitle('Average Tip by Day and Smoker Status', y=1.02)
plt.show()

# 2. Heatmap
pivot = tips.pivot_table(values='total_bill', index='day', columns='time', aggfunc='mean')

plt.figure(figsize=(8, 5))
sns.heatmap(pivot, annot=True, fmt='.2f', cmap='RdYlGn')
plt.title('Average Total Bill by Day and Time')
plt.show()

### Solution 3: Distribution Comparison

In [None]:
# Solution 3

# 1. Overlapping KDE
plt.figure(figsize=(10, 5))
sns.kdeplot(data=tips, x='total_bill', hue='time', fill=True, alpha=0.5)
plt.title('Distribution of Total Bill: Lunch vs Dinner')
plt.show()

# 2. Joint plot with regression
g = sns.jointplot(data=tips, x='total_bill', y='tip', kind='reg',
                  height=7, scatter_kws={'alpha': 0.5})
g.fig.suptitle('Total Bill vs Tip with Regression', y=1.02)
plt.show()

---

## üìå Summary

| Plot Type | Function | Use Case |
|-----------|----------|----------|
| **Relational** | `scatterplot()`, `lineplot()`, `relplot()` | Show relationships |
| **Distribution** | `histplot()`, `kdeplot()`, `displot()` | Show distributions |
| **Categorical** | `boxplot()`, `violinplot()`, `barplot()`, `catplot()` | Compare categories |
| **Matrix** | `heatmap()`, `clustermap()` | Show patterns in matrices |
| **Regression** | `regplot()`, `lmplot()` | Show linear relationships |
| **Pairwise** | `pairplot()`, `jointplot()` | Multiple relationships |

### Key Parameters:
- `hue` - Color by category
- `style` - Marker style by category
- `size` - Size by value
- `col`, `row` - Create faceted plots

**Next:** [30_linear_regression.ipynb](30_linear_regression.ipynb) - Linear regression with visualization