#! /bin/bash

## Pass 2 hotspots: locate shadowed peaks by searching in 50kb windows
## around hotspots within which we've called z-scored thresholded
## peaks.

gnom=_GENOME_

tags=_TAGS_
# Program for uncompressing tags file.  If file is already uncompressed, use "cat".
uncompr=_UNCOMPR_

mappable=_MAPPABLE_FILE_
umap=_MAPPABLE_10KB_FILE_
mpblgenome=`awk '{t+=$3}END{print t}' $umap`
hotspot=_HOTSPOT_
check=_CHECK_
chkchr=_CHKCHR_

# Duplicate tags OK?  (Set to T if yes - for DNaseI data, for instance; anything else - for ChIP, for instance - means no.)
dupok=_DUPOK_

echo "Hotspot pass 2..."

name=`basename $tags`
outd=${name}-pass2
mkdir -p $outd
cd $outd
if [ -e $name.pass2.hotspot.out ]; then
    test=`grep chr$chkchr $name.pass2.hotspot.out | head -1`
    if [ $check == "T" ] && [ ${#test} == 0 ]; then 
	echo "$name pass 2 already computed; skipping"
	cd ..
	continue
    fi
fi
hot=../${name}-pass1/$name.merge150.wgt10.zgt2.wig
ntag=`grep TotalTagCount ../${name}-pass1/$name.stdout | cut -d" " -f2`

lib=$name.50kbpad.nohotspot.lib.txt
libbed=$name.50kbpad.nohotspot.lib.bed

## Take 50kb window on either side of each hotspot, 
## merge,
## subtract off the hotspots,
## intersect with the mappable regions of the genome.
echo "generating 50kb pad set..."
awk '(NR>1){left=$2-50000; if(left < 0) left = 0; print $1"\t"left"\t"$2"\n"$1"\t"$3"\t"$3+50000}' $hot \
    | setops -m - \
    | setops -d - $hot \
    | setops -i - $mappable \
    > $name.50kbpad.nohotspot.bed

## Now find the tags falling in these regions and run hotspot on the results.
echo "extracting tags..."
if [ $dupok == "T" ]; then
    $uncompr $tags \
	| setops -e - $name.50kbpad.nohotspot.bed \
	| awk '{print $1"\t"$2"\t"$2+1"\tI\t1"}' - \
	| tee $libbed \
	| bed2hotspot - \
	> $lib
else
    $uncompr $tags \
	| setops -e - $name.50kbpad.nohotspot.bed \
	| awk '{print $1"\t"$2"\t"$2+1"\tI\t1"}' - \
	| uniq - \
	| tee $libbed \
	| bed2hotspot - \
	> $lib
fi
echo "running hotspot..."
$hotspot -fuzzy -range 200 300 50 -o $name.pass2.hotspot.out -i $lib -k $umap -gendw -bckntags $ntag -bckgnmsize $mpblgenome > $name.pass2.stdout
cd ..
