# CAL51_p300_troubleshoot - chip_seq
This notebook will create all the necessary files, scripts and folders to create trackhub for the aforementioned project. The end goal is to gather tracks of the ChIP-seq experiment and files that are necessary for most downstream analyses. 

---
### Trackhub

In [7]:
%%writefile /data/reddylab/Hazel/troubleshoot/processing/chip_seq/scripts/create_chipseq_tracks.CAL51.py
#!/usr/bin/env python

from trackhub import Hub, GenomesFile, Genome, TrackDb, Track, ViewTrack, \
    SuperTrack, AggregateTrack, CompositeTrack, SubGroupDefinition
import os
from operator import add
from palettable.colorbrewer.sequential import Reds_6_r, Blues_6_r, Greens_6_r, Purples_6_r, Oranges_6_r, Greys_6_r

colors_palettes=[Purples_6_r, Oranges_6_r, Greens_6_r]

OUTDIR='/data/reddylab/Hazel/troubleshoot/processing/chip_seq/trackhub/CAL51'
URLBASE = 'http://trackhub.genome.duke.edu/reddylab/'
REMOTE_DATA_DIR='/nfs/trackhub/reddylab/validations/CAL51/'
LOCAL_DATA_DIR='/data/reddylab/Hazel/troubleshoot/processing/chip_seq/CAL51-se-narrow/'
GENOME = 'hg38'

hub = Hub(
    hub='ChIP_seq_CAL51_p300',
    short_label='ChIP_seq_CAL51_p300',
    long_label='ChIP_seq_CAL51_p300',
    email='xa2@duke.edu')

hub.local_fn = '%s/hub.txt' % OUTDIR
genomes_file = GenomesFile()
genome = Genome(GENOME)
trackdb = TrackDb()
trackdb.local_fn = '%s/%s/trackDb.txt' % (OUTDIR, GENOME)

# OCI_AML2.H3K27me3.CST.rep1.masked.dedup.sorted.rpkm.bw
factors = [
    'DMSO','JQ1','THZ531'
]
replicates = 3
tracks = []
supertrack = SuperTrack(
    name="CAL51_tracks",
    short_label="CAL51_tracks",
    long_label="CAL51_tracks")

fi = 0 

for fi, factor in enumerate(factors):
    aggregate_track = AggregateTrack(
        name="aggregated%s" % (factor),
        short_label="%02d_%s" % (fi, factor),
        long_label="%02d_%s" % (fi, factor),
        tracktype='bigWig',
        showSubtrackColorOnUi='on',
        visibility='full',
        autoScale='on',
        maxHeightPixels='100:32:8',
        alwaysZero='on',
        aggregate='transparentOverlay',
        subgroups={'factor': factor}
    )
    for rep in xrange(1, replicates+1):
        if 'input' in factor and rep > 1: 
            break
        sample = "CAL51.p300.%s.rep%d.masked.dedup.sorted.rpkm.bw" % (factor, rep)
        sample_name = "%02d_%s_rep%d" % (fi, factor, rep)
        tr = Track(
                name=sample_name,
                short_label=sample_name,
                long_label=sample_name,
                local_fn=os.path.join(LOCAL_DATA_DIR, sample),
                remote_fn=os.path.join(REMOTE_DATA_DIR, sample),
                url=os.path.join(URLBASE, 
                                 'validations',
                                 'CAL51', 
                                 GENOME,
                                 'data',
                                 sample),
                tracktype='bigWig',
                color=','.join([str(cc) for cc in colors_palettes[fi].colors[1]]),
                visibility='full',
                maxHeightPixels='100:32:8',
            )
        aggregate_track.add_subtrack(tr)
        
        # Add peaks in bigBed format
        sample = sample.replace("sorted.rpkm.bw", "sorted_peaks.trunked_scores.broadPeak.bb")
        bigbed_peaks = Track(
            name="%s_peaks" % sample_name,
            short_label="%s_peaks" % sample_name,
            long_label="%s_peaks" % sample_name,
            url=os.path.join(URLBASE, 
                             'validations',
                             'CAL51', 
                             GENOME,
                             'data',
                             sample),
            tracktype='bigBed 6 .',
            visibility='dense',
            color='0,0,128'
        )
        supertrack.add_track(bigbed_peaks)
    supertrack.add_track(aggregate_track)


print supertrack
trackdb.add_tracks(supertrack)

genome.add_trackdb(trackdb)
genomes_file.add_genome(genome)
hub.add_genomes_file(genomes_file)

hub.render()

Overwriting /data/reddylab/Hazel/troubleshoot/processing/chip_seq/scripts/create_chipseq_tracks.CAL51.py


In [8]:
%%bash
source /data/reddylab/software/miniconda2/bin/activate alex
mkdir -p /data/reddylab/Hazel/troubleshoot/processing/chip_seq/trackhub/CAL51/hg38/data
sbatch -o /data/reddylab/Hazel/troubleshoot/processing/chip_seq/logs/trackhub.CAL51.out \
    -p new,all \
    --wrap="python /data/reddylab/Hazel/troubleshoot/processing/chip_seq/scripts/create_chipseq_tracks.CAL51.py"

Submitted batch job 2630590


In [3]:
%%bash
cd /data/reddylab/Hazel/troubleshoot/processing/chip_seq/trackhub/CAL51/hg38/data
ln -s /data/reddylab/Hazel/troubleshoot/processing/chip_seq/CAL51_p300_ts-se/*rep*.masked.dedup.sorted.rpkm.bw ./
ln -s /data/reddylab/Hazel/troubleshoot/processing/chip_seq/CAL51_p300_ts-se/*rep*.trunked_scores.broadPeak.bb ./

http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg38&hubUrl=http://trackhub.genome.duke.edu/reddylab/validations/CAL51/hub.txt

In [9]:
%%bash
ssh hardac-xfer.genome.duke.edu
cd /data/reddylab/Hazel/troubleshoot/processing/chip_seq/trackhub/CAL51
rsync -rvz --copy-links -e ssh --update \
    * \
    trackhub.genome.duke.edu:/nfs/trackhub/reddylab/validations/CAL51


sending incremental file list
ChIP_seq_CAL51_p300.genomes.txt
hub.txt
hg38/trackDb.txt

sent 2074 bytes  received 107 bytes  1454.00 bytes/sec
total size is 171562398  speedup is 78662.26


Pseudo-terminal will not be allocated because stdin is not a terminal.
