# Welcome to the interactive Galaxy IPython Notebook.

You can access your data via the dataset number. Using a Python kernel, you can access dataset number 42 with ``handle = open(get(42), 'r')``.
To save data, write your data to a file, and then call ``put('filename.txt')``. The dataset will then be available in your galaxy history.
<br>When using a non-Python kernel, ``get`` and ``put`` are available as command-line tools, which can be accessed using system calls in R, Julia, and Ruby. For example, to read dataset number 42 into R, you can write ```handle <- file(system('get -i 42', intern = TRUE))```.
To save data in R, write the data to a file and then call ``system('put -p filename.txt')``.
Notebooks can be saved to Galaxy by clicking the large green button at the top right of the IPython interface.<br>
More help and informations can be found on the project [website](https://github.com/bgruening/docker-jupyter-notebook).

# Imports

In [1]:
from bioblend.galaxy import GalaxyInstance
import pandas as pd
from io import StringIO
import os
import requests
import pip
import matplotlib.pyplot as plt
pip.main(["install", "openpyxl"])

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.


0

# Reading Galaxy_Input tabulars to dataframes

In [None]:
vfs_amrs_counts_db = pd.read_table('galaxy_inputs/vfandamrcounts/vfs_amrs_count_table.tabular')
vfs_amrs_counts_db.set_index("key", inplace=True)
metadata_db = pd.read_table('galaxy_inputs/metadata/metadata.tabular')
metadata_db.set_index("Galaxy ID", inplace=True)
metadata_db.index.name = "key"
hosts_db = pd.read_table('galaxy_inputs/hostremovedcount/removed_hosts_percentage_tabular.tabular')
hosts_db.set_index("key", inplace=True)
heatmap_db = pd.read_table('galaxy_inputs/heatmap/heatmap_table.tabular')
heatmap_db.set_index("key", inplace=True)

# Mapping VFs and AMRs count to the Metadata Tabular

In [None]:
frames = [vfs_amrs_counts_db, hosts_db ,metadata_db]
result = pd.concat(frames, axis = 1)
print(result)

# Plots

In [5]:
outpath = "outputs"

## Bar Plot VFs/AMRs count vs. Enrichment

In [None]:
import matplotlib.pyplot as plt
import numpy as np

selection = result[(result["amrs_count"] != 0) | (result["vfs_count"] != 0)]
selection = selection.dropna(subset = ["amrs_count","vfs_count"])

selection.set_index(["Enrichment"], drop=True, inplace = True)
selection.plot( kind = "bar",
                y = ["amrs_count","vfs_count"], 
                figsize=(10,6), 
                )
plt.savefig(os.path.join(outpath, "bar_plot_vfs_amrs_counts_vs_enrichment.svg"))
plt.savefig(os.path.join(outpath, "bar_plot_vfs_amrs_counts_vs_enrichment.png"))
plt.show()

## Percentages of removed QC hosts reads vs Sample information

In [None]:
import numpy as np
import seaborn as sns


ax = sns.barplot(data=result, x="removed_hosts_percentage", y="Information")
ax.set_ylabel("Samples Informtion")
ax.set_xlabel("Reomoved QC Hosts Reads %")
plt.xticks(rotation=30, horizontalalignment="center")
plt.savefig(os.path.join(outpath, "removed_hosts_percentages_per_sample.svg"))
plt.savefig(os.path.join(outpath, "removed_hosts_percentages_per_sample.png"))
plt.show()

## Number of QC all reads vs samples information

In [None]:
ax = sns.barplot(data=result, x="quality_retained_all_reads", y="Information")
ax.set_ylabel("Samples Informtion")
ax.set_xlabel("Quality controlled all reads count")
plt.xticks(rotation=30, horizontalalignment="center")
plt.savefig(os.path.join(outpath, "quality_controlled_all_reads_count.svg"))
plt.savefig(os.path.join(outpath, "quality_controlled_all_reads_count.png"))
plt.show()

## Number of QC host reads vs samples information

In [None]:
ax = sns.barplot(data=result, x="quality_retained_hosts_reads", y="Information")
ax.set_ylabel("Samples Informtion")
ax.set_xlabel("Quality controlled host reads count")
plt.xticks(rotation=30, horizontalalignment="center")
plt.savefig(os.path.join(outpath, "quality_controlled_host_reads_count.svg"))
plt.savefig(os.path.join(outpath, "quality_controlled_host_reads_count.png"))
plt.show()

## Violin Plots

### for VFs vs Enrichment

In [None]:
ax = sns.violinplot(data=result, x="Enrichment", y="vfs_count")
ax.set_title("VFs count vs Pathogen Enrichment")
ax.set_ylabel("VFs Count")
plt.savefig(os.path.join(outpath, "violin_plot_vfs_count_vs_enrichment.svg"))
plt.savefig(os.path.join(outpath, "violin_plot_vfs_count_vs_enrichment.png"))
plt.show()

### for AMRS vs Enrichment


In [None]:
ax = sns.violinplot(data=result, x="Enrichment", y="amrs_count")
ax.set_title("AMRs count vs Pathogen Enrichment")
ax.set_ylabel("AMRs Count")
plt.savefig(os.path.join(outpath, "violin_plot_amrs_count_vs_enrichment.svg"))
plt.savefig(os.path.join(outpath, "violin_plot_amrs_count_vs_enrichment.png"))
plt.show()

### for QC Hosts Removed Reads Percentages vs Deplition

In [None]:
ax = sns.violinplot(data=result, x="Deplition", y="removed_hosts_percentage")
ax.set_title("Reomoved QC Hosts Reads Percentages vs Chicken Deplition")
ax.set_ylabel("Reomoved QC Hosts Reads %")
plt.savefig(os.path.join(outpath, "violin_plot_removed_hosts_percentage_vs_deplition.svg"))
plt.savefig(os.path.join(outpath, "violin_plot_removed_hosts_percentage_vs_deplition.png"))
plt.show()

### No. of QC Chicken reads vs Deplition

In [None]:
ax = sns.violinplot(data=result, x="Deplition", y="quality_retained_hosts_reads")
ax.set_title("Number of QC Hosts Reads vs Chicken Deplition")
ax.set_ylabel("Number of QC Hosts Reads")
plt.savefig(os.path.join(outpath, "violin_plot_removed_hosts_reads_count_vs_deplition.svg"))
plt.savefig(os.path.join(outpath, "violin_plot_removed_hosts_reads_count_vs_deplition.png"))
plt.show()

### No. of QC Total Reads vs Deplition

In [None]:
ax = sns.violinplot(data=result, x="Deplition", y="quality_retained_all_reads")
ax.set_title("Number of QC Total Reads vs Chicken Deplition")
ax.set_ylabel("Number of QC Total Reads")
plt.savefig(os.path.join(outpath, "violin_plot_QC total_reads_count_vs_deplition.svg"))
plt.savefig(os.path.join(outpath, "violin_plot_QC total_reads_count_vs_deplition.png"))
plt.show()

## Scattered Plot

### VFs and AMRs Counts vs CT Value

In [None]:
selection = selection.dropna(subset = ["amrs_count","vfs_count"])
#selection.set_index(["Enrichment"], drop=True, inplace = True)

ax = selection.plot(kind='scatter', x='CT', y='amrs_count', color='r', marker='v', s=25, label="AMR")    
selection.plot.scatter(
                x = 'CT',
                y = 'vfs_count',
                ax=ax, label="VFs")
ax.set_ylabel("# of VFs or AMR")
ax.set_xlabel("CT")
plt.savefig(os.path.join(outpath, "scattered_plot_vfs_amrs_counts_vs_ct.svg"))
plt.savefig(os.path.join(outpath, "scattered_plot_vfs_amrs_counts_vs_ct.png"))
plt.show()

### VFs and AMRs Counts vs CFU value

In [None]:
ax = selection.plot(kind='scatter', x='CFU', y='amrs_count', color='r', marker='v', s=25, label="AMR")    
selection.plot.scatter(
                x = 'CFU',
                y = 'vfs_count',
                ax=ax, label="VFs")
ax.set_ylabel("# of VFs or AMR")
ax.set_xlabel("CFU")
ax.set_xscale('log')
plt.savefig(os.path.join(outpath, "scattered_plot_vfs_amrs_counts_vs_cfu.svg"))
plt.savefig(os.path.join(outpath, "scattered_plot_vfs_amrs_counts_vs_cfu.png"))
plt.show()

## HeatMap

In [None]:
import scipy
heatmap_db
sns.set(font_scale=1)
g = sns.clustermap(heatmap_db, cmap="Reds", xticklabels=True, row_cluster=True, figsize=(15,15), cbar_pos=(0.01, .05, .02, .3), linewidths=0.5)
ax = g.ax_heatmap
ax.set_ylabel("Pathogenic Genes")
ax.set_xlabel("Samples")
# fig = g.get_figure()
plt.savefig(os.path.join(outpath, "clustermap_pathogenic_genes_vs_samples.svg"))
plt.savefig(os.path.join(outpath, "clustermap_pathogenic_genes_vs_samples.png"))