# 1.1 - Introduction to Seaborn

#### > Pandas

In [None]:
import pandas as pd
df = pd.read_csv("wines.csv")
df['alcohol'].plot.hist()

#### > Matplotlib

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("wines.csv")
fig, ax = plt.subplots()
ax.hist(df['alcohol'])

#### > Seaborn

In [None]:
import seaborn as sns
sns.distplot(df['alcohol'])

#### > Histogram vs. Distplot

In [None]:
df['alcohol'].plot.hist()
#Actual frequency of observations
#No automatic labels
#Wide bins

sns.distplot(df['alcohol'])
#Automatic label on x axis
#Muted color palette
#KDE plot
#Narrow bins

# 1.2 - Using the distribution plot

In [None]:
sns.distplot(df['alcohol'], kde=False, bins=10)

#### > Alternative data distributions

In [None]:
sns.distplot(df['alcohol'], hist=False, rug=True)

#### > Further Customizations

In [None]:
sns.distplot(df['alcohol'], hist=False, rug=True, kde_kws={'shade':True})

# 1.3 - Regression Plots in Seaborn

In [None]:
sns.regplot(x="alcohol", y="pH", data=df)

#### > lmplot() builds on top ofthe base regplot()

In [None]:
#regplot - low level
sns.regplot(x="alcohol",y="quality",data=df)

In [None]:
#lmplot - high level
sns.lmplot(x="alcohol",y="quality",data=df)

#### > lmplot faceting

In [None]:
#Organize data by colors (hue )
sns.lmplot(x="quality", y="alcohol", data=df, hue="type")

#Organize data by columns (col )
sns.lmplot(x="quality", y="alcohol", data=df, col="type")

# 2.1 - Using Seaborn Styles

In [None]:
sns.set()
df['Tuition'].plot.hist()

#### > Theme examples with sns.set_style()

In [None]:
for style in ['white','dark','whitegrid','darkgrid','ticks']:
    sns.set_style(style)
    sns.distplot(df['Tuition'])
    plt.show()

#### > Removing axes with despine()

In [None]:
sns.set_style('white')
sns.distplot(df['Tuition'])
sns.despine(left=True)

# 2.2 - Colors in Seaborn

#### > Defining a color for a plot

In [None]:
sns.set(color_codes=True)
sns.distplot(df['Tuition'], color='g')

#### > Palettes

In [None]:
for p in sns.palettes.SEABORN_PALETTES:
    sns.set_palette(p)
    sns.distplot(df['Tuition'])

#### > Displaying Palettes

In [None]:
for p in sns.palettes.SEABORN_PALETTES:
    sns.set_palette(p)
    sns.palplot(sns.color_palette())
    plt.show()

#### > Defining Custom Palettes

In [None]:
sns.palplot(sns.color_palette("Paired", 12))
sns.palplot(sns.color_palette("BrBG", 12))
sns.palplot(sns.color_palette("Blues", 12))

# 2.3 - Customizing with matplotlib

#### > Matplotlib Axes

In [None]:
fig, ax = plt.subplots()
sns.distplot(df['Tuition'], ax=ax)
ax.set(xlabel="Tuition 2013-14")

#### > Further Customizations

In [None]:
fig, ax = plt.subplots()
sns.distplot(df['Tuition'], ax=ax)
ax.set(xlabel="Tuition 2013-14",ylabel="Distribution", xlim=(0, 50000), title="2013-14 Tuition and Fees Distribution")

#### > Combining Plots

In [None]:
fig, (ax0, ax1) = plt.subplots(
nrows=1,ncols=2, sharey=True, figsize=(7,4))

sns.distplot(df['Tuition'], ax=ax0)
sns.distplot(df.query('State =="MN"')['Tuition'], ax=ax1)

ax1.set(xlabel="Tuition (MN)", xlim=(0, 70000))
ax1.axvline(x=20000, label='My Budget', linestyle='--')
ax1.legend()

# 3.1 - Categorical Plot Types

#### > Plots of each observation - stripplot

In [None]:
sns.stripplot(data=df, y="DRG Definition",x="Average Covered Charges",jitter=True)

#### > Plots of each observation - swarmplot

In [None]:
sns.swarmplot(data=df, y="DRG Definition",x="Average Covered Charges")

#### > Abstract representations - boxplot

In [None]:
sns.boxplot(data=df, y="DRG Definition",x="Average Covered Charges")

#### > Abstract representation - violinplot

In [None]:
sns.violinplot(data=df, y="DRG Definition",x="Average Covered Charges")

#### > Abstract representation - lvplot

In [None]:
sns.lvplot(data=df, y= "DRG Definition",x="Average Covered Charges")

#### > Statistical estimates - barplot

In [None]:
sns.barplot(data=df, y="DRG Definition",x="Average Covered Charges",hue="Region")

#### > Statistical estimates - pointplot

In [None]:
sns.pointplot(data=df, y="DRG Definition",x="Average Covered Charges",hue="Region")

#### > Statistical estimates - countplot

In [None]:
sns.countplot(data=df, y="DRG_Code", hue="Region")

# 3.2 - Regression Plots

#### > Plotting with regplot()

In [None]:
sns.regplot(data=df, x='temp',y='total_rentals', marker='+')

#### > Evaluating regression with residplot()

In [None]:
sns.residplot(data=df, x='temp', y='total_rentals')

#### > Polynomial regression

In [None]:
sns.regplot(data=df, x='temp',y='total_rentals', order=2)

#### > Residplot with polynomial regression

In [None]:
sns.residplot(data=df, x='temp',y='total_rentals', order=2)

#### > Categorical values

In [None]:
sns.regplot(data=df, x='mnth', y='total_rentals',x_jitter=.1, order=2)

#### > Estimators

In [None]:
sns.regplot(data=df, x='mnth', y='total_rentals',x_estimator=np.mean, order=2)

#### > Binning the data

In [None]:
sns.regplot(data=df,x='temp',y='total_rentals',x_bins=4)

# 3.3 - Matrix Plots

#### > Getting data in the rightformat

In [None]:
pd.crosstab(df["mnth"], df["weekday"],
values=df["total_rentals"],aggfunc='mean').round(0)

#### > Build a heatmap

In [None]:
sns.heatmap(pd.crosstab(df["mnth"], df["weekday"],
values=df["total_rentals"], aggfunc='mean'))

#### > Customize a heatmap

In [None]:
sns.heatmap(df_crosstab, annot=True, fmt="d",cmap="YlGnBu", cbar=False, linewidths=.5)

#### > Centering a heatmap

In [None]:
sns.heatmap(df_crosstab, annot=True, fmt="d",cmap="YlGnBu", cbar=True, center=df_crosstab.loc[9, 6])

#### > Plotting a correlation matrix

In [None]:
sns.heatmap(df.corr())

# 4.1 - Using FacetGrid,factorplot and lmplot

#### > FacetGrid Categorical Example

In [None]:
g = sns.FacetGrid(df, col="HIGHDEG")
g.map(sns.boxplot,'Tuition',order=['1','2','3','4'])

#### > factorplot()

In [None]:
sns.factorplot(x="Tuition", data=df,col="HIGHDEG", kind='box')

#### > FacetGrid for regression

In [None]:
g = sns.FacetGrid(df, col="HIGHDEG")
g.map(plt.scatter,'Tuition','SAT_AVG_ALL')

#### > lmplot

In [None]:
sns.lmplot(data=df, x="Tuition", y="SAT_AVG_ALL",col="HIGHDEG", fit_reg=False)

#### > lmplot with regression

In [None]:
sns.lmplot(data=df, x="Tuition", y="SAT_AVG_ALL",col="HIGHDEG", row='REGION')

# 4.2 - Using PairGrid and pairplot

#### > Creating a PairGrid

In [None]:
g = sns.PairGrid(df, vars=["Fair_Mrkt_Rent","Median_Income"])
g = g.map(plt.scatter)

#### > Customizing the PairGrid diagonals

In [None]:
g = sns.PairGrid(df, vars=["Fair_Mrkt_Rent","Median_Income"])
g = g.map_diag(plt.hist)
g = g.map_offdiag(plt.scatter)

#### > Pairplot

In [None]:
sns.pairplot(df, vars=["Fair_Mrkt_Rent","Median_Income"], kind='reg',diag_kind='hist')

#### > Customizing a pairplot

In [None]:
sns.pairplot(df.query('BEDRMS < 3'),vars=["Fair_Mrkt_Rent","Median_Income", "UTILITY"],hue='BEDRMS', palette='husl',
plot_kws={'alpha': 0.5})

# 4.3 - Using JointGrid and jointplot

#### > Basic JointGrid

In [None]:
g = sns.JointGrid(data=df, x="Tuition",y="ADM_RATE_ALL")
g.plot(sns.regplot, sns.distplot)

#### > Advanced JointGrid

In [None]:
g = sns.JointGrid(data=df, x="Tuition",y="ADM_RATE_ALL")
g = g.plot_joint(sns.kdeplot)
g = g.plot_marginals(sns.kdeplot, shade=True)
g = g.annotate(stats.pearsonr)

#### > jointplot()

In [None]:
sns.jointplot(data=df, x="Tuition",y="ADM_RATE_ALL", kind='hex')

#### > Customizing a jointplot

In [None]:
g = (sns.jointplot(x="Tuition",y="ADM_RATE_ALL", kind='scatter',xlim=(0, 25000),marginal_kws=dict(bins=15,rug=True),
data=df.query('UG < 2500 & Ownership == "Public"')).plot_joint(sns.kdeplot))

# 4.4 - Selecting Seaborn Plots