# Inherited packages
    'altair=4.1.*'\
    'beautifulsoup4=4.8.*' \
    'conda-forge::blas=*=openblas' \
    'bokeh=1.4.*' \
    'cloudpickle=1.3.*' \
    'cython=0.29.*' \
    'dask=2.11.*' \
    'dill=0.3.*' \
    'h5py=2.10.*' \
    'hdf5=1.10.*' \
    'ipywidgets=7.5.*' \
    'ipympl=0.5.*'\
    'matplotlib-base=3.1.*' \
    'numba=0.48.*' \
    'numexpr=2.7.*' \
    'pandas=1.0.*' \
    'patsy=0.5.*' \
    'protobuf=3.11.*' \
    'scikit-image=0.16.*' \
    'scikit-learn=0.22.*' \
    'scipy=1.4.*' \
    'seaborn=0.10.*' \
    'sqlalchemy=1.3.*' \
    'statsmodels=0.11.*' \
    'sympy=1.5.*' \
    'widgetsnbextension=3.5.*'\
    'xlrd' \

In [None]:
import os

TESTDIR = '/usr/share/datahub/tests/datahub-base-notebook'

In [None]:
# these libraries need a UI test
import ipywidgets
import ipympl
import widgetsnbextension
import notebook
import jupyterhub
import jupyterlab
import nbresuse
import nbgitpuller
import ipywidgets
import nbgrader

In [None]:
# beautifulsoup4, adapted from https://www.crummy.com/software/BeautifulSoup/bs4/doc/
from bs4 import BeautifulSoup

html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""
soup = BeautifulSoup(html_doc, 'html.parser')

soup.title

In [None]:
from bokeh.plotting import figure, output_file, show

# prepare some data
x = [1, 2, 3, 4, 5]
y = [6, 7, 2, 4, 5]

# output to static HTML file
output_file(os.path.join(TESTDIR, "lines.html"))

# create a new plot with a title and axis labels
p = figure(title="simple line example", x_axis_label='x', y_axis_label='y')

# add a line renderer with legend and line thickness
p.line(x, y, legend_label="Temp.", line_width=2)

# show the results
show(p)

In [None]:
# cloudpickle, referenced from https://github.com/cloudpipe/cloudpickle
import cloudpickle
squared = lambda x: x ** 2
pickled_lambda = cloudpickle.dumps(squared)

import pickle
new_squared = pickle.loads(pickled_lambda)
assert new_squared(2) == 4

In [None]:
# dask, referenced from https://examples.dask.org/array.html
from dask.distributed import Client, progress
client = Client(processes=False, threads_per_worker=4,
                n_workers=1, memory_limit='2GB')

import dask.array as da
x = da.random.random((10000, 10000), chunks=(1000, 1000))

In [None]:
import dill
import pandas as pd

assert float(pd.__version__[:3]) >= 1.1

names = ["John", "Mary", "Mary", "Suzanne", "John", "Suzanne"]
scores = [80, 90, 90, 92, 95, 100]

records = pd.DataFrame({"name": names, "score": scores})
means = records.groupby('name').mean()

import dill as pickle
with open('name_model.pkl', 'wb') as file:
    pickle.dump(means, file)

with open('name_model.pkl', 'rb') as file:
    B = pickle.load(file)

def name_score_function(record):
    if record in names:
        return(means.loc[record, 'score'])

assert name_score_function("John") == 87.5

In [None]:
# matplotlib, referenced from https://matplotlib.org/tutorials/introductory/pyplot.html
import matplotlib
import matplotlib.pyplot as plt
plt.plot([1, 2, 3, 4])
plt.ylabel('some numbers')
plt.show()

savepath = os.path.join(TESTDIR, 'testplot.png')
plt.savefig(savepath)
assert os.path.isfile(savepath)

In [None]:
# skimage
from skimage import data
import os


IMG = os.path.join(TESTDIR, 'testfig.png')

matplotlib.rcParams['font.size'] = 18
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
ax = axes.ravel()

images = data.stereo_motorcycle()
ax[0].imshow(images[0])
ax[1].imshow(images[1])

fig.tight_layout()
plt.show()
plt.savefig(IMG)

assert os.path.isfile(IMG)

In [None]:
from sympy import symbols
x, y = symbols('x y')
expr = x + 2*y

In [None]:
# h5py, referenced from https://support.hdfgroup.org/ftp/HDF5/examples/Py/h5_crtdat.py
import h5py
filename = 'dset.h5'
file = h5py.File(os.path.join(TESTDIR, filename),'w')
dataset = file.create_dataset("dset",(4, 6), h5py.h5t.STD_I32BE)
file.close()

assert os.path.isfile(os.path.join(TESTDIR, filename))

In [None]:
# pandas, referenced from https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html
import pandas as pd

dates = pd.date_range('20130101', periods=6)

In [None]:
from patsy import dmatrices, dmatrix, demo_data

data = demo_data("a", "b", "x1", "x2", "y", "z column")
dmatrices("y ~ x1 + x2", data)
outcome, predictors = dmatrices("y ~ x1 + x2", data)

In [None]:
# sklearn, referenced from https://scikit-learn.org/stable/tutorial/basic/tutorial.html
from sklearn import datasets, svm
iris = datasets.load_iris()
digits = datasets.load_digits()
clf = svm.SVC(gamma=0.001, C=100.)
clf.fit(digits.data[:-1], digits.target[:-1])
clf.predict(digits.data[-1:])

In [None]:
# scipy, adapted from https://docs.scipy.org/doc/scipy/reference/tutorial/optimize.html
import numpy as np
from scipy.optimize import minimize

def rosen(x):
    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)

x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
res = minimize(rosen, x0, method='nelder-mead', options={'xatol': 1e-8, 'disp': True})

In [None]:
# seaborn, https://seaborn.pydata.org/introduction.html and https://stackoverflow.com/questions/32244753/how-to-save-a-seaborn-plot-into-a-file
import seaborn as sns
sns.set()
tips = sns.load_dataset("tips")
sns.relplot(x="total_bill", y="tip", col="time",
            hue="smoker", style="smoker", size="size",
            data=tips)

savefile = os.path.join(TESTDIR, 'sns.png')
df = sns.load_dataset('iris')
sns_plot = sns.pairplot(df, hue='species', height=2.5)
sns_plot.savefig(savefile)

assert os.path.isfile(savefile)

In [None]:
# sqlalchemy, adapted from https://leportella.com/english/2019/01/10/sqlalchemy-basics-tutorial.html
from sqlalchemy import create_engine

engine = create_engine('sqlite:///:memory:', echo=True)
conn = engine.connect()
trans = conn.begin()
conn.execute('CREATE TABLE EX1 (name)')
conn.execute('INSERT INTO "EX1" (name) '
             'VALUES ("Hello")')
trans.commit()

In [None]:
# statsmodels, referenced from https://www.statsmodels.org/stable/examples/notebooks/generated/ols.html
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.sandbox.regression.predstd import wls_prediction_std

np.random.seed(9876789)
nsample = 100
x = np.linspace(0, 10, 100)
X = np.column_stack((x, x**2))
beta = np.array([1, 0.1, 10])
e = np.random.normal(size=nsample)
X = sm.add_constant(X)
y = np.dot(X, beta) + e

model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

In [None]:
# altair, https://altair-viz.github.io/
import altair as alt

ldata = pd.DataFrame({'a': list('CCCDDDEEE'),
                     'b': [2, 7, 4, 1, 2, 6, 8, 4, 7]})
chart = alt.Chart(data)
alt.Chart(data).mark_bar().encode(
    y='a',
    x='average(b)'
)

In [None]:
# xlrd, https://blogs.harvard.edu/rprasad/2014/06/16/reading-excel-with-python-xlrd/
# commenting this block out because xlrd stopped supporting .xlsx files
'''
import xlrd

fname = os.path.join(TESTDIR, 'excel_example.xlsx')

# Open the workbook
xl_workbook = xlrd.open_workbook(fname)
'''