# Python test notebook for custom conda environments

Tested with *dev2018*, *dev2021*, and *dev2023a*.

If rpy2 calls to R fail (e.g., warning "Unable to initialize JIT"), set `R_HOME` path in your local file *kernel.json* as explained in *condaenv.md*. Alternatively, you can use the hot-fix below.

## Show paths of Python.exe

In [None]:
import sys
print(sys.executable)

## Show import paths

In [None]:
print(sys.path)

## Test Python imports

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import statsmodels as sm
from statsmodels.sandbox.distributions.extras import pdf_moments
import sklearn as sk
import nltk as nltk
from IPython.display import display
from IPython.display import display_html

In [None]:
print('numpy version: {}.'.format(np.version.version))
print('pandas version: {}.'.format(pd.__version__))
print('seaborn version: {}.'.format(sns.__version__))
print('statsmodels version: {}.'.format(sm.__version__))
print('nltk version: {}.'.format(nltk.__version__))
print('scikit-learn version: {}.'.format(sk.__version__))

## Test rpy2 import

Should this cell cause notebook to crash, see *condaenv.md* section *Using conda environments* for troubleshooting tips!

In [None]:
import rpy2
print('rpy2 version: {}.'.format(rpy2.__version__))

# Hot-fix for setting R_HOME
#os.environ['R_HOME'] = '~/Anaconda3/envs/<env_name>/lib/R'

# R magic using rpy2
%load_ext rpy2.ipython

## Test rpy2 R magic

### R paths

In [None]:
%%R -o out1,out2
out1 = .libPaths()
out2 = file.path(R.home("bin"), "R")

In [None]:
print(out1)
print(out2)

### Export data frame from R to Python

In [None]:
%%R -o gg,df
gg = file.path(R.home("bin"), "R")
df = data.frame(x=1:5,y=5:1,z=rep(5,5))

In [None]:
display(gg)
print('-'*30)
display(df)

## Test matplotlib

In [None]:
fig = plt.figure(figsize=(12,6))
ax = fig.add_subplot(1,1,1)
_ = ax.plot(np.arange(0,15,1), np.random.exponential(scale=0.5, size=15))

## Test seaborn

### Face grid test with tips data

In [None]:
tips = sns.load_dataset("tips")

bins = np.arange(0, 65, 5)
#g = sns.FacetGrid(tips, col = "time", row = "smoker", size = 3, aspect = 1)
#g = (g.map(plt.hist, "total_bill", color = 'r', bins = bins)
#        .set_titles("{col_name}, {row_name}")
#        .set_axis_labels("Total bill (US Dollars)", "No of occurences")
#    )

pal = dict(Yes = "seagreen", No = "gray")
g2 = sns.FacetGrid(tips, col = "time",  hue = "smoker", palette = pal)
g2 = (g2.map(plt.scatter, "total_bill", "tip", edgecolor="w")
          .add_legend())

### kde plot

In [None]:
df = pd.DataFrame([
    np.random.normal(0,1,1000)
    ,np.random.normal(5,2,1000)       
])
df = df.T
df.columns = ['first', 'second']


v3 = np.concatenate((df['first'], df['second']))
fig = plt.figure()
ax = fig.add_subplot(111)
_ = sns.kdeplot(v3, shade = True, ax = ax)

## Test statsmodels and scipy

In [None]:
# Choices
p = 0.12
a = 1.5

# Other variables
bins=300
sigma = 1
b = -a * (p / (1-p))
x, dx = np.linspace(-20, 20, bins, retstep=True)
n_draws = 8000

# Parameters
kurtosis = 3*((1-np.power(a,2))*p-1) / (p-1)
scale1 = sigma * np.sqrt(1+a)
scale2 = sigma * np.sqrt(1+b)
print("scale1: {}".format(scale1))
print("scale2: {}".format(scale2))
print("kurtosis: {}".format(kurtosis))

# pdfs
sng = sp.stats.norm()
scale1g = sp.stats.norm(loc=0, scale=scale1)
scale2g = sp.stats.norm(loc=0, scale=scale2)
mixedg = pdf_moments([0, sigma, 0, kurtosis])

# Into frame
df = pd.DataFrame()
df['pdf_sng'] = sng.pdf(x)
df['pdf_scale1g'] = scale1g.pdf(x)
df['pdf_scale2g'] = scale2g.pdf(x)
df['pdf_mixedg'] = mixedg(x)
df.index = x

print("Min value of {} is above zero? {}".format('pdf_mixedg',np.min(df['pdf_mixedg']) > 0))

# Plot
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(121)
_ = df[['pdf_sng','pdf_mixedg']].plot(ax=ax)
ax.set_xlim(-5,5)
ax = fig.add_subplot(122)
_ = df.plot(ax=ax)

## Test scikit learn

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

random_state = 1337

# Load iris dataset into data frame
iris = sk.datasets.load_iris()
print('Variable ''iris'' is of type ' + str(type(iris)))
columns = list(iris.feature_names ) + ['target']
iris_df = pd.DataFrame(np.concatenate((iris.data, np.array([iris.target]).T), axis=1), columns = columns)

# train and test data
y = iris_df['target']
iris_df_temp = iris_df.copy(); del iris_df_temp['target']
X = iris_df_temp
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = random_state)

# z-score scaler
standardscaler = StandardScaler()

# fit scaler with train data
standardscaler.fit(X_train)

# Scaled train and test exogenous variable data 
X_train_stand = standardscaler.transform(X_train)
X_test_stand = standardscaler.transform(X_test)

# Fit PCA, reduced dimensions of train data
model_pca = PCA(n_components=2).fit(X_train_stand)
X_train_pca = model_pca.transform(X_train_stand)

# Reduce dimension of test data
X_test_pca = model_pca.transform(X_test_stand)

display(pd.DataFrame(X_train_stand).head(2))
display(pd.DataFrame(X_train_pca).head(2))