In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd

sns.set_style('whitegrid')

## Let's set a nice palette
[Choosing a seaborn palette](https://seaborn.pydata.org/tutorial/color_palettes.html).

In [None]:
# First let's display a couple of options
# our current palette
sns.palplot(sns.color_palette())

# and some other options
sns.palplot(sns.color_palette('RdBu_r'))
sns.palplot(sns.color_palette('husl'))
sns.palplot(sns.color_palette('Spectral'))

# I like husl
sns.set_palette('husl')

In [None]:
in_file = '../data/inhibition_rois.csv'
df = pd.read_csv(in_file)
df.head()

In [None]:
# Let's divide this DataFrame based on modality
behav_df = df[['subject_id', 'HIV', 'CB',
               'n_correct_nogo', 'n_incorrect_nogo',
               'n_nogo_aware', 'n_nogo_unaware']]
betas_df = df[['subject_id', 'HIV', 'CB',
               'mPFC_cor-err', 'PCC_cor-err', 'L_precuneus_cor-err',
               'mPFC_cor', 'PCC_cor', 'L_precuneus_cor',
               'mPFC_err', 'PCC_err', 'L_precuneus_err']]

## Data wrangling
Seaborn requires long-form DataFrames, even though most of us typically store our data in wide-form.

We must wrangle our data into the right format! Get familiar with [DataFrame.melt()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.melt.html#pandas.DataFrame.melt).

Appendix:
- **Wide-form**: One row for every participant. Each variable/observation gets its own column.
- **Long-form**: One row for every observation. Participants with multiple observations/variables get multiple rows.

In [None]:
behav_df2 = behav_df.melt(
    id_vars=['subject_id', 'HIV', 'CB'],
    value_vars=['n_correct_nogo', 'n_incorrect_nogo', 'n_nogo_aware', 'n_nogo_unaware'],
    var_name='behavioral metric',
    value_name='count')
behav_df2 = behav_df2.sort_values(by=['subject_id', 'behavioral metric'])

behav_df2[['HIV', 'CB']]
behav_df2['HIV-status'] = behav_df2['HIV'].map({1: 'HIV negative', 2: 'HIV positive'})
behav_df2['CB-status'] = behav_df2['CB'].map({1: 'cannabis non-user', 2: 'cannabis user'})
behav_df2['group'] = behav_df2['HIV-status'] + ', ' + behav_df2['CB-status']

## A basic single-variable distribution plot
Luckily, [seaborn.distplot](https://seaborn.pydata.org/generated/seaborn.distplot.html) can take a numpy array or a pandas Series (one column from a DataFrame), so we don't need long-form data.

In [None]:
fig, ax = plt.subplots(figsize=(12, 3))
sns.distplot(df['n_incorrect_nogo'], ax=ax)
fig.show()

In [None]:
# We can do pretty much anything in a loop too
for col in ['mPFC_cor-err', 'PCC_cor-err', 'L_precuneus_cor-err']:
    fig, ax = plt.subplots(figsize=(12, 3))
    sns.distplot(df[col], ax=ax)
    fig.show()

## Plotting categorical data
Let's start with an easy one- a bar plot.

In [None]:
fig, ax = plt.subplots(figsize=(16, 6))
sns.barplot(x='behavioral metric', y='count', hue='group', data=behav_df2, ax=ax)
fig.show()

In [None]:
# We can add in a strip plot (the individual points) by simply plotting another plot on the same axis object
fig, ax = plt.subplots(figsize=(16, 6))
sns.barplot(x='behavioral metric', y='count', hue='group', data=behav_df2, ax=ax)
sns.stripplot(x='behavioral metric', y='count', hue='group', data=behav_df2, ax=ax,
              jitter=True, dodge=True, linewidth=1, edgecolor='white')
fig.show()

## [#barbarplots](https://barbarplots.github.io)

Bar plots (or even dynamite plots) assume normally-distributed data, which we aren't able to evaluate very easily from the figure itself.

Let's plot our distributions, at minimum, using a boxplot, boxenplot, violinplot, swarmplot, etc.

In [None]:
fig, ax = plt.subplots(figsize=(16, 6))
sns.boxenplot(x='behavioral metric', y='count', hue='group', data=behav_df2, ax=ax)
sns.stripplot(x='behavioral metric', y='count', hue='group', data=behav_df2, ax=ax,
              dodge=True, linewidth=1, edgecolor='white', alpha=0.4)

# Remove the dots from the legend
handles, labels = ax.get_legend_handles_labels()
leg = ax.legend(handles=handles[:4], labels=labels[:4])

fig.show()

## Making scatterplots
Use [seaborn.regplot](http://seaborn.pydata.org/generated/seaborn.regplot.html).

There are a lot of options if you don't want a linear fit, including `logx`, `logistic`, `lowess`, and `order`.

In [None]:
# Regression plots take wide-form data.
fig, ax = plt.subplots(figsize=(10, 8))
sns.regplot(data=df, x='n_incorrect_nogo', y='PCC_err')
fig.show()

I want to see the distribution though.

That's what [jointplots](https://seaborn.pydata.org/generated/seaborn.jointplot.html#seaborn.jointplot) are good for.

In [None]:
jgrid = sns.jointplot(x='n_incorrect_nogo', y='PCC_err', data=df, kind='reg',
                      height=6)
fig = jgrid.fig
fig.show()

It's more complicated to have multiple groups in the same plot

In [None]:
# We need a separate dataframe for each group
hiv_pos_df = df.loc[df['HIV'] == 2]
hiv_neg_df = df.loc[df['HIV'] == 1]

# Then we make the jointplot for one of the groups only
jgrid = sns.jointplot(x='n_incorrect_nogo', y='n_nogo_aware', 
                      data=hiv_pos_df, kind='reg',
                      height=6, color='purple', label='HIV positive',
                      marginal_kws=dict(hist=False, kde_kws={'shade': True}),  # you can control axes with dicts
                      xlim=(-100, 500), ylim=(-100, 500))  # note the huge limits
fig = jgrid.fig
ax_joint = jgrid.ax_joint
ax_x = jgrid.ax_marg_x
ax_y = jgrid.ax_marg_y

# Add the HIV-negative central regression plot to the joint axis
sns.regplot(x='n_incorrect_nogo', y='n_nogo_aware', data=hiv_neg_df, ax=ax_joint,
            color='green', label='HIV negative')

# And the HIV-negative histograms to the corresponding axes
sns.distplot(hiv_neg_df['n_incorrect_nogo'], color='green',
             hist=False, ax=ax_x,
             kde_kws={'shade': True})
sns.distplot(hiv_neg_df['n_nogo_aware'], color='green',
             hist=False, ax=ax_y,
             kde_kws={'shade': True},
             vertical=True)

# Clean up the axes
ax_y.set_ylabel(None)
ax_x.set_xlabel(None)
ax_joint.set_xlim(-50, 250)
ax_joint.set_ylim(-50, 250)
ax_joint.tick_params(axis='both', which='major', labelsize=14)

# Add a legend
ax_joint.legend(fontsize=16)

ax_joint.set_xlabel('# Incorrect No-Go Trials', fontsize=18)
ax_joint.set_ylabel('# Error-Aware No-Go Trials', fontsize=18)
fig.set_size_inches((8, 6))

fig.tight_layout()
fig.show()

What about visualizing all pairs of variables in one figure, just to get it out of the way?

In [None]:
# Let's start by making a wide-form DataFrame with just the variables we want to compare
betas_df = df[['mPFC_cor-err', 'PCC_cor-err', 'L_precuneus_cor-err']]

g = sns.PairGrid(betas_df)
fig, axes = g.fig, g.axes

g = g.map_diag(sns.distplot)
g = g.map_upper(sns.regplot)
g = g.map_lower(sns.regplot)
fig.set_size_inches((10, 10))
fig.show()

In [None]:
# Maybe we don't want duplicate regplots and we don't like KDE plots,
# so we might as well get rid of the upper triangle.
# From this: https://stackoverflow.com/a/55834340/2589328

# We need a tiny function
def hide_current_axis(*args, **kwds):
    plt.gca().set_visible(False)

g = sns.PairGrid(betas_df)
fig, axes = g.fig, g.axes

g = g.map_diag(sns.distplot)
g = g.map_lower(sns.regplot)
g = g.map_upper(hide_current_axis)

fig.set_size_inches((10, 10))
fig.show()

### The package [ptitprince](https://github.com/pog87/PtitPrince) implements raincloud plots
ptitprince wraps around seaborn. The seaborn developers [don't wanna do it themselves](https://github.com/mwaskom/seaborn/issues/1572#issuecomment-424452104).

In [None]:
import ptitprince as pt

In [None]:
# let's just look at one behav metric across groups
nogo_inc_df = behav_df2.loc[behav_df2['behavioral metric'] == 'n_incorrect_nogo']

# and make the raincloud plot
fig, ax = plt.subplots(figsize=(16, 6))
pt.RainCloud(data=nogo_inc_df, x='group', y='count',
             ax=ax, orient='h', palette=sns.color_palette())
ax.set_ylabel('')
ax.set_xlabel('Count')
fig.show()