Timestamp and author, course texts, common on all notebooks.

#Plots and Charts

Matplotlib and Seaborn are Python's most commonly used plotting libraries.

Matplotlib is a low-level, highly flexible library that provides full control over plot elements and is well suited for custom, publication-quality charts.

Seaborn is built on top of Matplotlib and offers a higher-level interface with attractive default styles and strong integration with pandas, making it ideal for statistical visualizations and exploratory data analysis.

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import style
plt.style.use(['dark_background'])

import seaborn as sns
sns.set(color_codes=True)

In [None]:
style.use('default')
sns.set_style('ticks')
sns.set(color_codes=True)

# Tabulation

In [None]:
url = 'https://api.covid19india.org/states_daily.json'

In [None]:
import urllib.request

In [None]:
urllib.request.urlretrieve(url, 'data.json');

In [None]:
covid_data = pd.read_json('data.json')

In [None]:
covid_data

In [None]:
import json

In [None]:
with open('data.json') as f:
    data = json.load(f)

In [None]:
data = data['states_daily']

In [None]:
covid_data = pd.json_normalize(data)

In [None]:
covid_data

In [None]:
df = covid_data

In [None]:
df.date = pd.to_datetime(df.date)

In [None]:
df = df[df.status == 'Confirmed']

In [None]:
df.drop('status', axis=1, inplace=True)

In [None]:
df.set_index('date', inplace=True)

In [None]:
df

In [None]:
df.info()

In [None]:
df.tn

In [None]:
pd.to_numeric(df.tn)

In [None]:
df = df.drop('dateymd', axis=1)
df = df.apply(pd.to_numeric)

In [None]:
df.info()

In [None]:
df.tail(7)

## Styling tabulation

In [None]:
df = df.tail(7)

In [None]:
df.style

In [None]:
def colour_red_negative(x):
    color = 'red' if x < 0 else 'white'
    return 'color: ' + color

In [None]:
df.style.applymap(colour_red_negative)

In [None]:
df.drop('un', axis=1, inplace=True)

In [None]:
df.style.applymap(colour_red_negative)

In [None]:
df.style.highlight_max(color='red')

In [None]:
df.drop(['dd', 'ld'], axis=1,inplace=True)

In [None]:
df.style.highlight_max(color='red').highlight_min(color='green')

In [None]:
df.drop('tt', axis=1, inplace=True)

In [None]:
def bold_max_value(x):
    is_max = (x == x.max())
    return ['font-weight: bold' if y else '' for y in is_max]

In [None]:
df.style.apply(bold_max_value)

In [None]:
df.style.apply(bold_max_value).highlight_min(color='green')

In [None]:
df.style.apply(bold_max_value).highlight_min(color='green', axis=1)

In [None]:
df.style.apply(bold_max_value).highlight_max(color='red', axis=1)

In [None]:
df.style.background_gradient(cmap='Reds')

In [None]:
df.style.background_gradient(cmap='Reds', axis=1)

In [None]:
df.style.background_gradient(cmap='Reds', subset=['mh', 'tn', 'dl'])

In [None]:
df.style.bar()

In [None]:
df.style.bar(subset=['mh', 'tn', 'dl'])

In [None]:
df[['mh', 'tn', 'dl']].style.bar()

In [None]:
df[['mh', 'tn', 'dl']].style.bar(subset=['mh'], color='red').bar(subset=['tn'], color='orange').bar(subset=['dl'], color='yellow')

# Distribution of data

## Distribution of a single continuous variable

### Histogram

In [None]:
x = np.random.normal(size=1000)

In [None]:
sns.distplot(x);

In [None]:
sns.distplot(x, kde=False);

In [None]:
sns.distplot(x, kde=False, rug=True);

In [None]:
sns.distplot(x, kde=False, rug=True, bins=50);

In [None]:
sns.kdeplot(x);

In [None]:
sns.kdeplot(x, fill=True);

In [None]:
y = np.random.uniform(size=1000)

In [None]:
sns.kdeplot(x,fill=True)
sns.kdeplot(y,fill=True);

In [None]:
d = sns.load_dataset('diamonds')

In [None]:
d

In [None]:
d.info()

In [None]:
sns.distplot(d.carat);

In [None]:
sns.distplot(d.price);

In [None]:
sns.distplot(d.x);

In [None]:
sns.distplot(d.x, rug=True);

In [None]:
sns.distplot(d.sample(1000).x, rug=True, bins=50);

In [None]:
sns.kdeplot(d.x, fill=True)
sns.kdeplot(d.y, fill=True)
sns.kdeplot(d.z, fill=True);

### Box plot

In [None]:
x = np.random.normal(size=1000)

In [None]:
sns.boxplot(x)

In [None]:
sns.kdeplot(x);

In [None]:
x = np.random.uniform(size=1000)

In [None]:
sns.boxplot(x);

In [None]:
sns.boxplot(x, whis=0.2)

In [None]:
x = np.random.normal(size=1000)

In [None]:
sns.boxplot(x, whis=0.5);

In [None]:
sns.boxplot(x, whis=0.5, fliersize=1);

In [None]:
sns.boxplot(x, whis=0.5, fliersize=1, orient='v');

In [None]:
sns.boxplot(d.price);

In [None]:
sns.kdeplot(d.price);

In [None]:
sns.boxplot(d.x);

In [None]:
sns.distplot(d.x);

In [None]:
sns.distplot(d.carat)

In [None]:
sns.boxplot(d.carat)

### Boxen plots

In [None]:
sns.boxplot(d.sample(5000).carat);

In [None]:
sns.boxenplot(d.sample(5000).carat);

In [None]:
sns.boxenplot(x = 'island', y = 'body_mass_g', data = p);

## Distribution of a categorical variable

### Bar plots

In [None]:
c = d.groupby('cut')['cut'].count()

In [None]:
sns.barplot(x=c.index, y=c.values)

In [None]:
c = d.groupby('clarity')['clarity'].count()

In [None]:
sns.barplot(x=c.index, y=c.values);

In [None]:
c = d.groupby('color')['color'].count()

In [None]:
sns.barplot(x=c.index, y=c.values);

## Joint distribution of two variables

### Jointplot

In [None]:
x = np.random.normal(size=1000)
y = np.random.normal(size=1000)

In [None]:
df = pd.DataFrame({'x': x, 'y': y})

In [None]:
df

In [None]:
sns.jointplot(x="x", y="y", data=df);

In [None]:
sns.jointplot(x="x", y="y", data=df, kind='kde');

In [None]:
x = np.random.normal(size=1000)
y = 3 * x + np.random.normal(size=1000)/5

In [None]:
df = pd.DataFrame({'x': x, 'y': y})

In [None]:
sns.jointplot(x="x", y="y", data=df, kind='kde');

In [None]:
sns.jointplot(x='carat', y='price', data=d, kind='kde');

In [None]:
sns.jointplot(x='carat', y='price', data=d.sample(500));

In [None]:
sns.jointplot(x='x', y='price', data=d.sample(500));

In [None]:
sns.jointplot(x='x', y='price', data=d.sample(500), kind='kde');

### Swarm plot

In [None]:
sns.swarmplot(d.sample(1000).carat);

In [None]:
sns.swarmplot(d.sample(100).price);

In [None]:
d.info()

In [None]:
sns.swarmplot(x='cut', y='price', data=d.sample(1000));

In [None]:
sns.swarmplot(x='color', y='price', data=d.sample(1000));

In [None]:
sns.swarmplot(x='clarity', y='price', data=d.sample(1000));

In [None]:
sns.swarmplot(x='clarity', y='price', data=d.sample(1000));

In [None]:
p = sns.load_dataset('penguins')

In [None]:
p

In [None]:
sns.swarmplot(x='species', y='body_mass_g', data=p);

In [None]:
sns.swarmplot(x='island', y='body_mass_g', data=p);

In [None]:
sns.swarmplot(x='body_mass_g', data=p);

### Violin plot

In [None]:
sns.violinplot(x='body_mass_g', data=p);

In [None]:
sns.boxplot(x='body_mass_g', data=p);

In [None]:
sns.kdeplot(p.body_mass_g, fill=True);

In [None]:
fig, axs = plt.subplots(nrows=4)
sns.swarmplot(x='body_mass_g', data=p, ax=axs[0]);
sns.violinplot(x='body_mass_g', data=p, ax=axs[1]);
sns.boxplot(x='body_mass_g', data=p, ax=axs[2]);
sns.kdeplot(p.body_mass_g, fill=True, ax=axs[3]);

In [None]:
fig, axs = plt.subplots(nrows=4)
fig.set_size_inches(5, 10);
sns.swarmplot(x='body_mass_g', data=p, ax=axs[0]);
sns.violinplot(x='body_mass_g', data=p, ax=axs[1]);
sns.boxplot(x='body_mass_g', data=p, ax=axs[2]);
sns.kdeplot(p.body_mass_g, fill=True, ax=axs[3]);

In [None]:
fig, axs = plt.subplots(nrows=4)
fig.set_size_inches(5, 10);
p1 = sns.swarmplot(x='body_mass_g', data=p, ax=axs[0]);
p1.set(xlim=(2000, 7500));
p2 = sns.violinplot(x='body_mass_g', data=p, ax=axs[1]);
p2.set(xlim=(2000, 7500));
p3 = sns.boxplot(x='body_mass_g', data=p, ax=axs[2]);
p3.set(xlim=(2000, 7500));
p4 = sns.kdeplot(p.body_mass_g, fill=True, ax=axs[3]);
p4.set(xlim=(2000, 7500));

In [None]:
sns.violinplot(x='body_mass_g', data=p);

In [None]:
sns.violinplot(x='body_mass_g', data=p, orient='v');

In [None]:
sns.violinplot(x='species', y='body_mass_g', data=p);

In [None]:
p.head()

In [None]:
sns.violinplot(x='species', y='flipper_length_mm', data=p);

In [None]:
sns.violinplot(x='island', y='flipper_length_mm', data=p);

In [None]:
sns.violinplot(x='sex', y='flipper_length_mm', data=p);

In [None]:
sns.violinplot(x='island', y='flipper_length_mm', data=p);

In [None]:
sns.swarmplot(x='island', y='flipper_length_mm', data=p);

In [None]:
sns.swarmplot(x='island', y='flipper_length_mm', hue='sex', data=p);

In [None]:
sns.swarmplot(x='island', y='flipper_length_mm', hue='species', data=p);

In [None]:
sns.swarmplot(x='cut', y='price', data=d.sample(1000));

In [None]:
sns.swarmplot(x='cut', y='price', hue='color', data=d.sample(1000));

In [None]:
sns.violinplot(x='island', y='flipper_length_mm', data=p[p.sex=='MALE']);

In [None]:
sns.violinplot(x='island', y='flipper_length_mm', data=p[p.sex=='FEMALE']);

In [None]:
sns.violinplot(x='island', y='flipper_length_mm', hue='sex', split=True, data=p);

In [None]:
sns.violinplot(x='island', y='flipper_length_mm',
               hue='sex', split=True, inner='quartile', data=p);

In [None]:
sns.violinplot(x='island', y='flipper_length_mm',
               hue='species', split=True, inner='quartile', data=p);

In [None]:
sns.violinplot(x='island', y='flipper_length_mm',
               hue='species', data=p);

In [None]:
p['binary_species'] = p.species.apply(lambda x: 0 if x == 'Gentoo' else 1)

In [None]:
p

In [None]:
sns.violinplot(x='island', y='flipper_length_mm',
               hue='binary_species', split=True, inner='quartile', data=p);

In [None]:
p['binary_species'] = p.species.apply(lambda x: 'Gentoo' if x == 'Gentoo' else 'Adelie | Chinstrap')

In [None]:
sns.violinplot(x='island', y='flipper_length_mm',
               hue='binary_species', split=True, inner='quartile', data=p);

### Faceted plotting

In [None]:
sns.kdeplot(p.flipper_length_mm, fill=True);

In [None]:
sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, fill=True);

In [None]:
sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, fill=True);
sns.kdeplot(p[p.species == 'Adelie'].flipper_length_mm, fill=True);
sns.kdeplot(p[p.species == 'Chinstrap'].flipper_length_mm, fill=True);

In [None]:
sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, fill=True);
sns.kdeplot(p[p.species == 'Adelie'].flipper_length_mm, fill=True);
sns.kdeplot(p[p.species == 'Chinstrap'].flipper_length_mm, fill=True);
plt.legend(title='Species', labels=['Gentoo', 'Adelie', 'Chinstrap']);


In [None]:
sns.boxplot(p[p.species == 'Gentoo'].flipper_length_mm);
sns.boxplot(p[p.species == 'Adelie'].flipper_length_mm);
sns.boxplot(p[p.species == 'Chinstrap'].flipper_length_mm);
plt.legend(title='Species', labels=['Gentoo', 'Adelie', 'Chinstrap']);


In [None]:
fig, axs = plt.subplots(nrows=3);
sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, fill=True, ax=axs[0]);
sns.kdeplot(p[p.species == 'Adelie'].flipper_length_mm, fill=True, ax=axs[1]);
sns.kdeplot(p[p.species == 'Chinstrap'].flipper_length_mm, fill=True, ax=axs[2]);
# plt.legend(title='Species', labels=['Gentoo', 'Adelie', 'Chinstrap']);

In [None]:
fig, axs = plt.subplots(nrows=3);
sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, fill=True, ax=axs[0]);
sns.kdeplot(p[p.species == 'Adelie'].flipper_length_mm, fill=True, ax=axs[1]);
sns.kdeplot(p[p.species == 'Chinstrap'].flipper_length_mm, fill=True, ax=axs[2]);
plt.tight_layout()
# plt.legend(title='Species', labels=['Gentoo', 'Adelie', 'Chinstrap']);

In [None]:
column_name = 'species'
nrows = len(p[column_name].unique())
fig, axs = plt.subplots(nrows=nrows);
i = 0
for c_v in p[column_name].unique():
    pl = sns.kdeplot(p[p[column_name] == c_v].flipper_length_mm,
                fill=True, ax=axs[i]);
    pl.set_title(c_v);
    i += 1
plt.tight_layout()

In [None]:
g = sns.FacetGrid(p, row='species');
g.map(sns.kdeplot, 'flipper_length_mm', fill=True);

In [None]:
g = sns.FacetGrid(p, col='species');
g.map(sns.kdeplot, 'flipper_length_mm', fill=True);

In [None]:
g = sns.FacetGrid(p, col='island');
g.map(sns.kdeplot, 'flipper_length_mm', fill=True);

In [None]:
g = sns.FacetGrid(p, col='island');
g.map(sns.distplot, 'flipper_length_mm');

In [None]:
g = sns.FacetGrid(p, col='island', row='sex');
g.map(sns.distplot, 'flipper_length_mm');

In [None]:
g = sns.FacetGrid(p, col='island', row='sex');
g.map(sns.kdeplot, 'flipper_length_mm');

In [None]:
g = sns.FacetGrid(p, col='island', row='sex');
g.map(sns.violinplot, 'flipper_length_mm');

### Pair plot

In [None]:
sns.jointplot(x=p.body_mass_g, y=p.flipper_length_mm);

In [None]:
sns.pairplot(p);

In [None]:
sns.pairplot(p, hue='sex');

In [None]:
sns.pairplot(p, hue='species');

In [None]:
sns.pairplot(d.sample(1000));

In [None]:
sns.pairplot(d.sample(1000), hue='cut');

In [None]:
sns.pairplot(d.sample(1000), hue='cut', corner=True);