In [None]:
# %load topics.py
import pandas as pd
import psutil

pd.set_option("display.max_colwidth" , 300)

df_high_level = pd.DataFrame(
    data=[
        {'day': 'Monday', 'Topic': 'Check-In, recaps and functions'},
        {'day': 'Tuesday', 'Topic': 'Coding philosophy, data flow and some more useful std modules'},
        {'day': 'Wednesday', 'Topic': 'Test driven development, python module, sphinx'},
        {'day': 'Thursday', 'Topic': 'OOP - Object oriented programming'},
        {'day': 'Friday', 'Topic': 'Q&A and code clean up'},
        {'day': '', 'Topic': ''},
        {'day': 'Monday', 'Topic': ''},
        {'day': 'Tuesday', 'Topic': ''},
        {'day': 'Wednesday', 'Topic': ''},
        {'day': 'Thursday', 'Topic': ''},
        {'day': 'Friday', 'Topic': 'Q&A and Tutorium'},


    ]
)

df_details = pd.DataFrame(
    data=[
        {'day': 1, 'Topic': 'Check-in'},
        {'day': 1, 'Topic': 'Procedural stuff'},
        {'day': 1, 'Topic': "python basic in 5'"},
        {'day': 1, 'Topic': 'lists and generators'},
        {'day': 1, 'Topic': 'bisect module'},
        # ----------------------------
        {'day': 2, 'Topic': 'Functions'},
        {'day': 2, 'Topic': 'Zen of Python and general coding philosophy'},
        {'day': 2, 'Topic': 'csv module'},
        {'day': 2, 'Topic': 'Collections module'},
        {'day': 2, 'Topic': 'Exercises 1 & 2'},
        # ----------------------------
        {'day': 3, 'Topic': 'Discussion of Excercises 1 & 2'},
        {'day': 3, 'Topic': 'Basic plotting with plotly'},
        {'day': 3, 'Topic': 'Exercises 3'},
        # -----------------------------
        {'day': 4, 'Topic': 'Discussion of Excercises 3'},
        {'day': 4, 'Topic': "String format"},
        {'day': 4, 'Topic': 'dicts'},
        {'day': 4, 'Topic': 'itertools'},
        # -----------------------------
        {'day': 5, 'Topic': "OOP"},
        # {'day': 3, 'Topic': 'data flow'},
        {'day': 6, 'Topic': "Basic Python package"},
        {'day': 6, 'Topic': "Test Driven development"},
        {'day': 6, 'Topic': "Auto documentation with Sphinx"},
        # -----------------------------
    ]
)


def display_topics(day=1, df=None):
    if df is None:
        df = df_details
    return df[df['day'] == day][['day', 'Topic']].head(20)


# Days 3
## Overview

In [None]:
display_topics(day=3)

# Discssion @ Excersises 



# A building block for excersises 1 & 2

In [None]:
seqs = {}
current_id = None
current_seq = ""
with open("../data/uniprot-filtered-proteome%3AUP000005640+AND+reviewed%3Ayes+AND+organism%3A%22Hom--.fasta") as fasta_file:
    for line in fasta_file:
        if line.startswith(">"):
            if current_id is not None:
                seqs[current_id] = current_seq
            current_seq = ""
            current_id = line.strip()
        else:
            current_seq += line.strip()
    seqs[current_id] = current_seq
    

# Eval building block for excersise 2

In [None]:
longest = None
longest_sequence = -1 
for identifier, sequence in seqs.items():
    if len(sequence) > longest_sequence:
        longest = identifier
        longest_sequence = len(sequence)

In [None]:
longest

Say hello to [Titin](https://en.wikipedia.org/wiki/Titin) /Taitin/ 

# Plotting with plotly

Plotly is an interactive plotting library supporting many programming languages. Plotlys plots are based on javascript build on top of d3.js and stack.gl.

Advantages comapred to ggplot, matplotlib (and derivitates, seaborn ..), bokeh, pygal are:
* interactive (mouse over, zoom ...)
* directly embeddable as html
* realtime plots

In [None]:
# basic setup
import plotly
import plotly.graph_objs as go
display(plotly.__version__)

In [None]:
# reading our small amino acid data set
# we will discuss pandas in the second week ...
aa_df = pd.read_csv("../data/amino_acid_properties.csv")

In [None]:
aa_df.head()

In [None]:
aa_df.tail()

In [None]:
aa_df.dropna(how="all", inplace=True) # all means drop those that have NaN in every column
aa_df.tail()

In [None]:
import plotly.graph_objects as go

data = [
    go.Bar(
        x=aa_df["1-letter code"],
        y=aa_df.pka1
    )
]

fig = go.Figure(data=data)
fig.show()


In [None]:
import plotly.graph_objects as go

data = [
    go.Pie(
        labels=aa_df["1-letter code"],
        values=aa_df["Accessible surface"],
        hole=0.3
    )
]

fig = go.Figure(data=data)
fig.show()



In [None]:
import numpy as np
data = [
    go.Scatter(
        x=aa_df["1-letter code"],
        y=aa_df["pI"],
        mode="markers",
        marker={
            "size": aa_df["Accessible surface"]
        }
    )
]

fig = go.Figure(data=data)
fig.show()

In [None]:
import plotly.graph_objects as go

data = [
    go.Bar(
        x=aa_df["1-letter code"],
        y=aa_df.pka1
    )
]

fig = go.Figure(
    data=data,
    layout={
        "title": {
            "text": "amino acid pka1's",
            "font_size": 20
        }
    }
)
fig.show()


In [None]:
# works also with dot annotations
data = [
    go.Bar(
        x=aa_df["1-letter code"],
        y=aa_df.pka1
    )
]

fig = go.Figure(data=data)
fig.layout.title.text = "amino acid pka1's"
fig.layout.title.font.size = 40
fig.show()
# not quite consistent and api is still evolving

In [None]:
# works also with dot annotations
data = [
    go.Bar(
        x=aa_df["1-letter code"],
        y=aa_df.pka1
    )
]

fig = go.Figure(data=data)
fig.layout.title.text = "amino acid pka1's"
fig.layout.title.font.size = 40
fig.data[0].marker.line.width = 5
fig.data[0].marker.line.color = "black"
fig.show()
# not quite consistent and api is still evolving

In [None]:
# works also with dot annotations
data = [
    go.Bar(
        x=aa_df["1-letter code"],
        y=aa_df.pka1,
        hovertext=aa_df["Name"]
    )
]

fig = go.Figure(data=data)
fig.layout.title.text = "amino acid pka1's"
fig.layout.title.font.size = 30
fig.data[0].marker.line.width = 5
fig.data[0].marker.line.color = "black"
fig.show()
# not quite consistent and api is still evolving

In [None]:
# pio.templates.default = "plotly"

data = [
    go.Bar(
        x=aa_df["1-letter code"],
        y=aa_df.pka1,
        hovertext=aa_df["Name"],
        name="pka1"
    ),
    go.Bar(
        x=aa_df["1-letter code"],
        y=aa_df.pka2,
        hovertext=aa_df["Name"],
        name="pka2"
    ),
    go.Bar(
        x=aa_df["1-letter code"],
        y=aa_df.pkaX,
        hovertext=aa_df["Name"],
        name="pkaX"
    ),
    go.Bar(
        x=aa_df["1-letter code"],
        y=aa_df.pI,
        hovertext=aa_df["Name"],
        name="pI"
    )

]

In [None]:
layout = {
    "title": {
        "text": "amino acid pks",
        "font_family": "Courier",
        "font_size": 30,
    },
    "plot_bgcolor": "rgba(0, 0, 0, 0)",
    "yaxis": {
        "showgrid": True,
        "gridwidth": 1,
        "gridcolor": "rgba(0, 0, 0, 0.2)",
        "color": "rgba(0,0,0,1)",
        "ticks": "outside",
        "tickvals": [1, 5, 10],
        "title": {
            "text": "pka",
            "font_family": "Courier",
        },
        "showline": True,
        "linewidth": 1,
        "linecolor": "black",
        "mirror": True,
        "type": "log"
    }
}

fig = go.Figure(data=data, layout=layout)
fig.update_xaxes(showline=True, linewidth=1, linecolor="black")
# ^--- what happens if the order of the two lines are reversed

fig.show()

more details about axis can be found [here](https://plot.ly/python/axes/)

Plot examples can be found [here](https://plot.ly/python/)

## Questions ?

## Let's plot something

Anyone go ahead, take the amino_acid_visualization and plot something!

# Excersise No. 3
* use a sliding window of size 5 to calculate the average hydropathy for each given position for each of those proteins:
   * TMEM63B
   * TM9SF
   * MS4A1
* Create a visualization for each protein: residue position vs average hydropathy index (slideing window, maybe even different sizes ?)

Can you explain the observed average?
