#!/bin/bash
# FILE: example.sh
# AUTHOR: <Richard Sandstrom>sull@u.washington.edu

# this script will take the example tagAlign file extend the tags to the specificed length and count coverage at a nucleotide resolution.


# tagsize, this is for "tag extention" (for DNase data we set this to "1" to indicate just the cleavage site)
tagsize=300

# tag data input data
tagdata="tag-align.bed"

# region to be mapped to (in this example the region data is generated from the input coordinates)
chrombed="regions.bed"

# output directory
outdir="."


# smooting params, no smoothing (I actually removed most of the smoothing functionality from this example, it just adds confusion
bin=1
window=0


# get rid of this and it's usage if you add the utilities' bin directory to your path
utilities="../bin"

# Prepare the tag data for signalmapping
# extend tag length to fragment size and give each tag a value of "1"
# lexigraphically sort bed items
cat $tagdata \
| awk -v tz=$tagsize 'BEGIN{FS="\t"; OFS="\t"}{if($6=="+"){str=$2; end=$2+tz}else if($6=="-"){str=$3-tz; end=$3}; print $1,str,end, $4, "1"}'  \
| $utilities/sort-bed -\
> $outdir/mapping.$tagsize.$tagdata

# merge and split up to nucleotide resolution
$utilities/setops -m mapping.$tagsize.$tagdata \
| awk -v bin=$bin '{ for(i = $2; i < $3; i += bin) { print $1"\t"i "\t"i + 1 }}' \
> $outdir/map.$tagsize.$chrombed

echo "signalmap -sum -0% $outdir/map.$tagsize.$chrombed $outdir/mapping.$tagsize.$tagdata"

# sum tag coverage counts
$utilities/signalmap -sum -% 0 $outdir/map.$tagsize.$chrombed $outdir/mapping.$tagsize.$tagdata \
    | paste $outdir/map.$tagsize.$chrombed - \
    | cut -f1-4 \
    | sed -e 's/NAN/0/' \
    | awk -v bin=$bin \
           'BEGIN{OFS="\t"} {print $1,$2,$3,"id-"NR,$4}' \
    > $outdir/mapped.$tagsize.$tagdata

# itermediate files will need clean up, leaving them for instructional value, the final output is the "mapped.$tagsize.$tagdata" file
