In [None]:
import json
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from json_to_dataframes import json_to_dataframes

dataframes = json_to_dataframes("data/2022_mix2_rep1.json")

fragments_dataframe = dataframes[0]
spectra_dataframe = dataframes[1]

fragments_dataframe.head()
#fragments_dataframe.keys()
#spectra_dataframe.keys()

In [None]:
##### DO NOT EXECUTE
json_file = "data/2022_mix2_rep1.json"
with open(json_file, "r", encoding = "utf-8") as f:
        data = json.load(f)
        f.close()
        

# Summarize fragment ions over all spectra

all_ions = pd.DataFrame()
for i in data[1:10000]:
    all_ions = pd.concat([all_ions, pd.DataFrame.from_dict(i['annotation'])])
all_ions.info()
all_ions

# separate ion types, ...
all_ions[['frag_types', 'frag_rest']] = all_ions.theoretical_code.str.split("@", expand = True)
all_ions[['frag_type_1', 'frag_type_2']] = all_ions.frag_types.str.split(":", expand = True)
all_ions[['frag_length', 'frag_rest']] = all_ions.frag_rest.str.split("(", expand = True)
all_ions[['position_frag_type_1', 'position_frag_type_2']] = all_ions.frag_length.str.split(":", expand = True)
all_ions['frag_length'] =  all_ions.position_frag_type_2.astype(float) - all_ions.position_frag_type_1.astype(float) + 1





In [None]:
#### Visualizations
common_type = fragments_dataframe['frag_type1'].astype(str).str.cat(fragments_dataframe['frag_type2'], sep='-')
common_type = common_type.replace("n", "not annotated", regex=True)
common_type = common_type.replace("t-","",regex=True)
common_type = common_type.replace("-t","",regex=True)
counts = common_type.value_counts()
fragments_dataframe['frag_types'] = common_type
# Frequency of ion types
fig = go.Figure([go.Histogram(x=common_type)])
fig.show()
fig = go.Figure([go.Pie(labels=counts.keys(), values=counts)])
fig.show()



In [None]:
# intensity distribution of different ions
# density or probability
histnorm = "probability"
types = fragments_dataframe["frag_types"].unique() 
print(types)
histograms = list()
for t in types:
    histograms.append(go.Histogram(x=np.log(fragments_dataframe[fragments_dataframe.frag_types == t].frag_intensity),histnorm=histnorm, name=t, nbinsx=50))
fig = go.Figure(histograms)
fig.update_layout(
    barmode='group',
    title="Histograms of logarithmic intensities per ion type",
    xaxis_title="log2(intensity)",
    yaxis_title=histnorm)
fig.show()


## Try ridgelines
fig = go.Figure()
for t in types:
    fig.add_trace(go.Violin(x=np.log(fragments_dataframe[fragments_dataframe.frag_types == t].frag_intensity),name=t))
fig.update_traces(orientation='h', side='positive', width=3, points=False)    
fig.update_layout(
    barmode='group',
    title="Ridgelines of logarithmic intensities per ion type",
    xaxis_title="log2(intensity)",
    yaxis_title=histnorm)
fig.show()


# relative intensity to total intensity distribution of different ions
# density or probability
histnorm = "probability"
types = fragments_dataframe["frag_types"].unique() 
print(types)
histograms = list()
for t in types:
    histograms.append(go.Histogram(x=fragments_dataframe[fragments_dataframe.frag_types == t].perc_of_total_intensity,histnorm=histnorm, name=t, nbinsx=50))
fig = go.Figure(histograms)
fig.update_layout(
    barmode='group',
    title="Histograms of relative intensities per ion type",
    xaxis_title="intensity",
    yaxis_title=histnorm)
fig.show()

## Try ridgelines
fig = go.Figure()
for t in types:
    fig.add_trace(go.Violin(x=fragments_dataframe[fragments_dataframe.frag_types == t].perc_of_total_intensity,name=t))
fig.update_traces(orientation='h', side='positive', width=3, points=False)    
fig.update_layout(
    barmode='group',
    title="Ridgelines of relative intensities per ion type",
    xaxis_title="intensity",
    yaxis_title=histnorm)
fig.show()


# relative intensity to total intensity distribution of different ions
# density or probability
histnorm = "probability"
types = fragments_dataframe["frag_types"].unique() 
print(types)
histograms = list()
for t in types:
    histograms.append(go.Histogram(x=fragments_dataframe[fragments_dataframe.frag_types == t].prop_intensity_to_base_peak,histnorm=histnorm, name=t, nbinsx=50))
fig = go.Figure(histograms)
fig.update_layout(
    barmode='group',
    title="Histograms of relative intensities to base peak per ion type",
    xaxis_title="Percentage per base peak",
    yaxis_title=histnorm)
fig.show()

## Try ridgelines
fig = go.Figure()
for t in types:
    fig.add_trace(go.Violin(x=fragments_dataframe[fragments_dataframe.frag_types == t].prop_intensity_to_base_peak,name=t))
fig.update_traces(orientation='h', side='positive', width=3, points=False)    
fig.update_layout(
    barmode='group',
    title="Ridgelines of relative intensities to base peak per ion type",
    xaxis_title="intensity",
    yaxis_title=histnorm)
fig.show()





In [None]:
# mz distribution of different ion types
# density or probability
histnorm = "probability"
types = fragments_dataframe["frag_types"].unique() 
print(types)
histograms = list()
for t in types:
    histograms.append(go.Histogram(x=fragments_dataframe[fragments_dataframe.frag_types == t].frag_mz, histnorm=histnorm, name=t, nbinsx=50))
fig = go.Figure(histograms)
fig.update_layout(
    barmode='group',
    title="Histograms of mz values per ion type",
    xaxis_title="mz",
    yaxis_title=histnorm)
fig.show()



In [None]:
### Percentages of different ion types per spectrum
histnorm = "probability"
types = ["internal","terminal","other"]
print(types)
histograms = list()
for t in types:
    histograms.append(go.Histogram(x=spectra_dataframe["perc_" + t], histnorm=histnorm, name=t, nbinsx=50))
fig = go.Figure(histograms)
fig.update_layout(
    barmode='group',
    title="Histograms of percentages of ion type per spectrum",
    xaxis_title="Percentage",
    yaxis_title=histnorm)
fig.show()

### Same for intensities
histograms = list()
for t in types:
    histograms.append(go.Histogram(x=spectra_dataframe["total_int_" + t], histnorm=histnorm, name=t, nbinsx=50))
fig = go.Figure(histograms)
fig.update_layout(
    barmode='group',
    title="Histograms of percentages of ion type per spectrum",
    xaxis_title="Percentage",
    yaxis_title=histnorm)
fig.show()



In [None]:
## Distributions of total intensitis of ion types in spectra
# Filter for all single amino acid fragments
fragments_dataframe[fragments_dataframe['frag_seq'].str.len() == 1 and fragments_dataframe.modification.empty()]



In [None]:
all_ions.frag_typ_1

In [None]:
from Bio import motifs

# Define the set of peptide sequences to analyze
sequences = ["SVQTLYEQAAARRNRAAAPAQTTTDYKYAP", "KAWELHKDITYLNNRAAAEYEKGEYETAIS", "TEMQIQSPTAVLIARAAAAQDEITGDGTTT"]

# Create a Motif object
m = motifs.create(sequences)

# Perform motif discovery
motifs = m.degenerate_consensus

# Print the discovered motifs
print(motifs)


