In [22]:
import numpy as np
import pandas as pd
import MDAnalysis
%matplotlib inline
import matplotlib.pyplot as plt
import nglview as nv

In [23]:
import warnings
warnings.filterwarnings('ignore')

In [24]:
u = MDAnalysis.Universe('./trajectory_analysis/top.psf',
                        './trajectory_analysis/example_data/LJCFS-CYL-NH_1_RAD_6.0_TEMP_0.72871_DENS_0.45_NPART_4000_.xtc',
                       convert_units=False)

In [25]:
import nglview as nv

# mask layers
r = u.trajectory[0].positions
region = u.trajectory[0].dimensions[:3]
x = np.sqrt(r[:,0]**2 + r[:,1]**2)
m = (x>4.7) 
w0 = np.where(m)[0]
w1 = np.where(~m)[0]

s0 = ' '.join(map(str,w0))
s1 = ' '.join(map(str,w1))

p = u.select_atoms('all')

w = nv.show_mdanalysis(p)
#w.representations = []
w.add_representation('ball+stick', selection=s0, color='red')
w.add_representation('ball+stick', selection=s1)

# A quick introduction to using python for simulation data analysis.

# Or

# Save yourself a headache and stop using Excel

# The basic problem

 * Run a parameter sweep of simulations 
 * Accumulate 
 * My case: MD simulation of confined fluid with variable tube-radius $R$, temperature $T$, and fluid density $\rho$
 * Goals: accumulate dynamic properties (e.g., self-diffusion), thermodynamics, static-structure, etc for all state points

In [26]:
w

# for example, analyze diffusion files

In [27]:
%%bash
find ./example_data/NPART4000 -name '*.diffusion.nc' | head -n 8

./example_data/NPART4000/Radius4.0/dens0.025/T0.72871/LJCFS-CYL-NH_1_RAD_4.0_TEMP_0.72871_DENS_0.025_NPART_4000_.diffusion.nc
./example_data/NPART4000/Radius4.0/dens0.025/T0.72871/LJCFS-CYL-NH_2_RAD_4.0_TEMP_0.72871_DENS_0.025_NPART_4000_.diffusion.nc
./example_data/NPART4000/Radius4.0/dens0.05/T0.72871/LJCFS-CYL-NH_1_RAD_4.0_TEMP_0.72871_DENS_0.05_NPART_4000_.diffusion.nc
./example_data/NPART4000/Radius4.0/dens0.05/T0.72871/LJCFS-CYL-NH_2_RAD_4.0_TEMP_0.72871_DENS_0.05_NPART_4000_.diffusion.nc
./example_data/NPART4000/Radius4.0/dens0.075/T0.72871/LJCFS-CYL-NH_1_RAD_4.0_TEMP_0.72871_DENS_0.075_NPART_4000_.diffusion.nc
./example_data/NPART4000/Radius4.0/dens0.075/T0.72871/LJCFS-CYL-NH_2_RAD_4.0_TEMP_0.72871_DENS_0.075_NPART_4000_.diffusion.nc
./example_data/NPART4000/Radius4.0/dens0.1/T0.72871/LJCFS-CYL-NH_1_RAD_4.0_TEMP_0.72871_DENS_0.1_NPART_4000_.diffusion.nc
./example_data/NPART4000/Radius4.0/dens0.1/T0.72871/LJCFS-CYL-NH_2_RAD_4.0_TEMP_0.72871_DENS_0.1_NPART_4000_.diffusion.nc


# How I used to do this

 * Scripts (bash/awk) to collect data
 * Scripts/Excel to analyze data
 * Scripts to generate plots (xmgrace/gnuplot)
 

## There's nothing wrong with this, but...
 
 * An increasingly larger pain to keep track of everything

 * What I really wanted was a unified approach to collecting/analyzing/plotting data
 
 * Python (and it's many libraries) does all the things!
 

 * Note: R is another good canidate. Great for stats.
 
 * Same for matlab.  Great for linear algebra.


# Getting things installed

 * The power of python is the extension libraries
 * These used to be a pain to install
 * Thankfully, there are now package managers like:
   - [Anaconda](https://www.continuum.io/downloads)
   - Enthought
   - Python(x,y)
   - WinPython
   - Pyzo

 * My goto is Anaconda
  - Free
  - Cross Platform
  - Very widely used (so good support)
  - Free version uses MKL! 

conda create --name demo python=2.7
source activate demo
conda install numpy

# Useful libraries

 * [scipy stack](https://www.scipy.org/stackspec.html)
    * [jupyter](http://jupyter.org/)
    * [numpy](http://www.numpy.org/)
    * [scipy](https://www.scipy.org/)
    
    * [pandas](http://pandas.pydata.org/)
    * [matplotlib](http://matplotlib.org/)
        
    * [xarray](http://xarray.pydata.org/en/stable/) [very new, but very cool]    
    
 * Trajectory analysis
   
    * [MDAnalysis](http://www.mdanalysis.org/)
    * [mdtraj](http://mdtraj.org/1.7.2/)


# Interacting with python

 * Scripts are great, and still my goto
 * But, science is inherently interactive
 * Wanted to stop re-running scripts every time I wanted to investigate something in a slightly different way
 * Enter the [jupyter notebook!](./new_blank.ipynb)

 * This presentation was made in the notebook.
 * Note just python. Kernals for R, matlab, octave, julia, bash ...

# Lets do some analysis!

 * [pandas demo](./pandas_demo.ipynb)

 * [calculate diffusion](./get_diffusion.ipynb)
 
 * [Trajectory analysis](./trajectory_analysis/trajectory_analysis.ipynb)

# other topics
 * [multiprocessing](./multiprocessing/demo.ipynb)
 

# other other
* [click](http://click.pocoo.org/5/)
* [pytest](http://pytest.org/latest/)
 