# Downloads for JunD binding prediction

This notebook illustrates the JunD binding prediction model.

To run the notebook, bedtools, samtools and wget are required

In [1]:
#!conda install --yes -c bioconda bedtools samtools

In [2]:
import os
from pybedtools import BedTool

In [3]:
output = '../data'
os.makedirs(output, exist_ok=True)

## Downloading the data for JunD prediction


In [4]:
!wget https://www.encodeproject.org/files/ENCFF446WOD/@@download/ENCFF446WOD.bed.gz -O {output}/jund_peaks.narrowPeak.gz
!wget https://www.encodeproject.org/files/ENCFF546PJU/@@download/ENCFF546PJU.bam -O  {output}/dnase_stam_encode.bam
!wget https://www.encodeproject.org/files/ENCFF059BEU/@@download/ENCFF059BEU.bam -O  {output}/dnase_stam_roadmap.bam

!wget http://mitra.stanford.edu/kundaje/akundaje/release/blacklists/hg38-human/hg38.blacklist.bed.gz -O  {output}/hg38.blacklisted.bed.gz
!gunzip -f  {output}/hg38.blacklisted.bed.gz

# human genome sequence hg38
!wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz -O  {output}/hg38.fa.gz
!gunzip -f  {output}/hg38.fa.gz

!wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.chrom.sizes -O {output}/hg38.chrom.sizes

--2019-06-14 14:38:10--  https://www.encodeproject.org/files/ENCFF446WOD/@@download/ENCFF446WOD.bed.gz
Resolving www.encodeproject.org (www.encodeproject.org)... 34.211.244.144
Connecting to www.encodeproject.org (www.encodeproject.org)|34.211.244.144|:443... connected.
HTTP request sent, awaiting response... 307 Temporary Redirect
Location: https://download.encodeproject.org/https://encode-public.s3.amazonaws.com/2016/12/14/5643001d-fae4-43c3-8c6f-de56aa3e19a8/ENCFF446WOD.bed.gz?response-content-disposition=attachment%3B%20filename%3DENCFF446WOD.bed.gz&Signature=kXqA98I2Ukle8sOf4tG3ByTcAa4%3D&x-amz-security-token=AgoJb3JpZ2luX2VjEMT%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLXdlc3QtMiJHMEUCIBECuWysenOZdkDKbB6XfmcaQ8zul6azEkuNauJcIB91AiEA6%2FLuT9kpu%2BIvggBn3Dp7mJBxg92zGio1NlitzdjrJ08q4wMI7f%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAAGgwyMjA3NDg3MTQ4NjMiDCYx83zFw7htd3syviq3AzISSluwceByIAxIcFaxcorsp2pKt8ZqsPUcgjsIeWJR2CaHnhyxGLXV3ydF8C4CmxYYbgepXvPKTYy6i0UASGK8YJf2mPlzGoXjdiRZo%2BpjTJmhVO59CYgV%2B%2BoJ

Connecting to download.encodeproject.org (download.encodeproject.org)|34.211.244.144|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://encode-public.s3.amazonaws.com/2017/09/18/a18dd311-b609-424b-987b-f7cd9f06e715/ENCFF059BEU.bam?response-content-disposition=attachment%3B%20filename%3DENCFF059BEU.bam&Expires=1560645495&x-amz-security-token=AgoJb3JpZ2luX2VjEMT%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLXdlc3QtMiJHMEUCIBECuWysenOZdkDKbB6XfmcaQ8zul6azEkuNauJcIB91AiEA6%2FLuT9kpu%2BIvggBn3Dp7mJBxg92zGio1NlitzdjrJ08q4wMI7f%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAAGgwyMjA3NDg3MTQ4NjMiDCYx83zFw7htd3syviq3AzISSluwceByIAxIcFaxcorsp2pKt8ZqsPUcgjsIeWJR2CaHnhyxGLXV3ydF8C4CmxYYbgepXvPKTYy6i0UASGK8YJf2mPlzGoXjdiRZo%2BpjTJmhVO59CYgV%2B%2BoJqHYOkYqEotyfNT%2BOYxaMum0qzeOfpWjrApXqNyrBkkrowu6Ng85eDNjmZTO0DZWGJICZm%2FiBEi%2B6sewE3aGBLdTGiaKhmbDsKjuNUNcBUwZfYfjBt0OwAASJZrC6C%2BjVSfiLjgBOaeY8pXqKmZ1X218AjX6vVzCSnriYvsLY1fmgCdu66ifTUU5GL5NqyDmS76UVO1PoviFSctLBU%2B%2BJtQBWwk%

Bam files need to be indexed

In [6]:
!samtools index {output}/dnase_stam_encode.bam
!samtools index {output}/dnase_stam_roadmap.bam

Prepare the JunD peaks

In [6]:
BedTool(os.path.join(output, 'jund_peaks.narrowPeak.gz')).sort().merge().saveas(
    os.path.join(output, 'jund_raw_peaks.bed'))

<BedTool(../datareprod/jund_raw_peaks.bed)>

Create region of interest (ROI)

In [7]:
BedTool(os.path.join(output, 'jund_raw_peaks.bed')).slop(b=10000, 
                                                               g=os.path.join(output, 'hg38.chrom.sizes')) \
 .sort().merge().subtract(os.path.join(output, 'hg38.blacklisted.bed'))\
.saveas(os.path.join(output, 'roi_jund_extended.bed'))



<BedTool(../datareprod/roi_jund_extended.bed)>

In [8]:
!janggu-trim {output}/roi_jund_extended.bed {output}/trim_roi_jund_extended.bed -divby 200

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
