#!/bin/bash

# Merge and threshold hotspots from pass 1.  These will be used in the second pass.

# miminum z-score threshold
thresh=_THRESH_
# hotspots within this distance will be merged
maxWin=_MAX_WIN_
pad=`echo $maxWin/2 | bc`
# minimum hotspot width (bp)
minSize=_MINSIZE_
# Check if these results have already been computed?
check=_CHECK_
chkchr=_CHKCHR_

tags=_TAGS_

name=`basename $tags`
dir=${name}-pass1
hot=$name.hotspot.out
out1=$name.wgt$minSize.zgt$thresh.wig
out2=$name.merge$maxWin.wgt$minSize.zgt$thresh.wig

echo "Merging and thresholding hotspots..."

cd $dir

if [ ! -e $hot ]; then
    echo "$hot does not exist; Check results. Skipping"
    cd ..
    exit
fi
test=`cut -f1 $hot | grep $chkchr - | head -1`
if [ $check == "T" ] && [ ${#test} == 0 ]; then 
    echo "$hot does not contain data for $chkchr; Problem? Skipping."
    cd ..
    exit
fi

if [ $check == "T" ] && [ -e $out2 ]; then
    test=`grep chr$chkchr $out2 | head -1`
    if [ ${#test} != 0 ]; then 
	echo "$out2 already computed; skipping"
	cd ..
	exit
    fi
fi

echo "track visibility=dense name=${name}_zgt$thresh" > $out1

echo "extracting hotspot data..."
awk -v minSize=$minSize -v thresh=$thresh \
    'NR>1 {if(($7 - $6 + 1) >= minSize && $8 > thresh) {print "chr"$1"\t"$6"\t"$7+1"\tid-"NR"\t"$8} else {next}}' $hot \
    | sort-bed - \
    | grep -vi inf \
    | grep -vi nan \
    >> $out1

echo "merging..."
awk -v pad=$pad '(NR>1) {left=$2 - pad; if(left < 1) left = 0; print $1"\t" left "\t" $3 + pad}' $out1 \
    | setops -m - \
    | awk -v pad=$pad '{if($2 == 0) left=0; else left=$2 + pad; print $1"\t" left "\t" $3 - pad}' - \
    | sort-bed - \
    > new.bed

echo "track visibility=dense name=${name}_merge_${maxWin}_zgt$thresh" > $out2

echo "recovering z-scores for merged hotspots..."
awk '(NR>1)' $out1 \
    | signalmap -max new.bed - \
    | paste new.bed - \
    | cut -f1-4 \
    >> $out2

rm new.bed

cd ..
