# Brief Tutorial on iPython Notebooks
+ What is an iPython Notebook?

Briefly, an iPython notebook is a format for running and visualizing code. It takes advantage of your browser's ability to render attractive HTML documents in order to weave together words, code fragments and output.

+ What is it good for?

Teaching, since they allow for notes to be interspersed in the code. Visualizing and sharing notes.


Which is what we're going to do today.

In [None]:
import dendropy
from dendropy.calculate import treemeasure
import pandas
import sys
import glob
import numpy as np

In [None]:
right_vecmin = []
right_vecmax = []

Above, I created a couple global variables. I thought I might need them.

In [None]:
def initializer():
#Load Tree
	tree = dendropy.Tree.get(path='P3P511/7.SCL.tre', schema="nexus", rooting="default-unrooted")
#Get Edges from tree
	edges = [edge.length for edge in tree.preorder_edge_iter()]
	edges[0] = 0
#Start a pandas dataframe
	df = pandas.DataFrame(pandas.Series(edges, edges),columns=['true'])
#Use the correct edges as the header
	return(df)

Above, I read in a tree from standard input (the command line), extracted the branch lengths, and loaded them into a pandas dataframe

In [None]:
initializer()

In [None]:
def get_files():
	container = [file for file in glob.glob('test/*.con')]
	for file in container:
		print("processing file %s" % file)	
		tree = dendropy.Tree.get(path=file, schema="nexus", extract_comment_metadata=True, rooting="default-unrooted")
		node_hpd = [nd.annotations.findall(name='length_hpd95') for nd in tree.preorder_node_iter()]
		node_med = [nd.annotations.findall(name='length_median') for nd in tree.preorder_node_iter()]
	return(file, node_hpd, node_med)

Above, what we do is use glob to find all the files with a certain extension. Then, we iterate over those files, reading them in with Dendropy and parsing any annotations on them. Finally, we return the filename and the annotations to use in the next function

In [None]:
get_files()

In [None]:
def make_df(node_hpd, df):
	kvs = [nd.values_as_dict() for nd in node_hpd]
	gnocchi = [kv.values() for kv in kvs]
	max = [line[0][1] for line in gnocchi]
	min = [line[0][0] for line in gnocchi]
	df['min'] = pandas.Series(min, index=df.index)
	df[['min']] = df[['min']].astype(float)
	df['max'] = pandas.Series(max, index=df.index)
	df[['max']] = df[['max']].astype(float)
	df['boolcol'] = df['min'] < df['true']
	per_min = (np.sum(df[['boolcol']]))/len(df[['boolcol']])
	per_max = (np.sum(df[['boolcol']]))/len(df[['boolcol']])
	right_vecmin.append(per_min)
	right_vecmax.append(per_max)
	return(df, right_vecmin, right_vecmax)

This takes the values in node_hpd and breaks them apart into individual lists of values.

Then we crunch them into a pandas dataframe and create two column, which are boolean. 

In [None]:
def add_med(node_med, df):	
	kvs = [nd.values_as_dict() for nd in node_med]
	gnocchi = [kv.values() for kv in kvs]
	med = [float(line[0]) for line in gnocchi]
	df['med'] = pandas.Series(med, index=df.index)
	df['devcol'] = df['med'] - df['true']
	return(df)

In [None]:
def io_time(df, file):
	df.to_csv("%s.csv" % file)

So there, all our functions are defined. Now we can call them all.

In [None]:
if __name__ == "__main__":
	df = initializer()
	file, node_hpd, node_med = get_files()
	df, right_vecmin, right_vecmax = make_df(node_hpd, df)
	export_data =add_med(node_med, df)
	io_time(export_data, file)

In [21]:
print(df)

              true           min        max boolcol       med    devcol
0.000000  0.000000  0.000000e+00   0.000000   False  0.000000  0.000000
0.270591  0.270591  8.275043e+00  10.564130   False  9.285030  9.014439
6.771450  6.771450  8.275043e+00  10.564130   False  9.285030  2.513580
0.542096  0.542096  8.380176e+00  12.342090   False  9.978896  9.436800
6.229350  6.229350  1.476971e-01   2.238679    True  1.082332 -5.147018
1.261880  1.261880  7.626370e+00   8.818500   False  8.208650  6.946770
4.131570  4.131570  1.115550e+00   2.694770    True  1.928980 -2.202590
0.835902  0.835902  7.712150e-02   1.346300    True  0.687868 -0.148034
0.835902  0.835902  2.645940e+00   3.761650   False  3.179720  2.343818
1.134480  1.134480  6.014360e-02   0.586637    True  0.256691 -0.877789
1.660510  1.660510  4.056030e-06   0.146360    True  0.056308 -1.604201
2.172480  2.172480  5.823260e-08   0.113640    True  0.030346 -2.142134
0.914163  0.914163  1.820560e+00   2.209940   False  1.994710  1