#! /bin/bash

## Re-score first and second-pass hotspots using the same background.
gnom=_GENOME_

tags=_TAGS_

mappable=_MAPPABLE_FILE_
umap=_MAPPABLE_10KB_FILE_
mpblgenome=`awk '{t+=$3}END{print t}' $umap`
thresh=_THRESH_
minSize=_MINSIZE_

check=_CHECK_
chkchr=_CHKCHR_

echo "Hotspot rescoring both passes..."

name=`basename $tags`
outd=$name-both-passes
mkdir -p $outd
cd $outd

pass1dir=../${name}-pass1
pass2dir=../${name}-pass2
pass1hot=$pass1dir/$name.hotspot.out
pass2hot=$pass2dir/$name.pass2.hotspot.out
libbed=$pass2dir/$name.50kbpad.nohotspot.lib.bed
ntag=`grep TotalTagCount $pass1dir/$name.stdout | cut -d" " -f2`
bckmappable=$pass2dir/$name.50kbpad.nohotspot.bed
wign=$name.hotspot.twopass.zscore
zwig=$wign.wig
outp=$name.hotspot.twopass.pval.txt

if [ ! -e $pass2hot ]; then
    echo "$pass2hot does not exist; Check results. Skipping"
    cd ..
    exit
fi
test=`cut -f1 $pass2hot | grep $chkchr - | head -1`
if [ $check == "T" ] && [ ${#test} == 0 ]; then 
    echo "$pass2hot does not contain data for chr$chkchr; Problem? Skipping."
    cd ..
    exit
fi

if [ -e $zwig ]; then
    test=`grep chr$chkchr $zwig | head -1`
    if [ $check == "T" ] && [ ${#test} == 0 ]; then 
	echo "$zwig already computed; skipping"
	cd ..
	exit
    fi
fi

      ## Get hotspot cluster counts for each hotspot from both passes.
cat $pass1hot $pass2hot \
    | awk -v minSize=$minSize -v thresh=$thresh \
    'NR>1 {if(($7 - $6 + 1) >= minSize && $8 > thresh) {pad=int(.5+$5/2); left=$2-pad; if(left < 0) left=0; print "chr"$1"\t"left"\t"$2+pad"\tid-"NR"\t"$3"\t"$6"\t"$7+1} else {next}}' - \
    | sort-bed - \
    | grep -vi inf \
    | grep -vi nan \
    | tee tmp.bed \
    | cut -f1-5 - \
    > hotspot.both.passes.cluster.counts.bed
cut -f6-7 tmp.bed > hotspot.start.stop.txt
      ## Left and right flank files.
awk '{left=$2-25000; if(left < 0) left = 0; print $1"\t"left"\t"$2}' hotspot.both.passes.cluster.counts.bed > left.flank.bed
awk '{print $1"\t"$3"\t"$3+25000}' hotspot.both.passes.cluster.counts.bed > right.flank.bed
      ## Counts of background tags in each flanking region.
signalmap -sum left.flank.bed $libbed \
    | cut -f1 \
    > left.flank.bgtags.txt
signalmap -sum right.flank.bed $libbed \
    | cut -f1 \
    > right.flank.bgtags.txt
      ## Number of *background* mappable bases in each flanking region (use signalmap just to get the number of bps overlapping)
signalmap -max -% 0 left.flank.bed $bckmappable \
    | cut -f2 \
    > left.flank.mappable.txt
signalmap -max -% 0 right.flank.bed $bckmappable \
    | cut -f2 \
    > right.flank.mappable.txt
      ## Number of mappable bases in the hotspots. Here we do not want to restrict to background bases, since they may not be
      ## part of the hotspots we are trying to score.  
signalmap -max -% 0 hotspot.both.passes.cluster.counts.bed $mappable \
    | cut -f2 \
    > hotspot.mappable.txt
      ## Now we are ready to compute z-scores.
paste hotspot.both.passes.cluster.counts.bed left.flank.bgtags.txt right.flank.bgtags.txt left.flank.mappable.txt right.flank.mappable.txt hotspot.mappable.txt hotspot.start.stop.txt \
    | awk -v ntags=$ntag -v mpblgnm=$mpblgenome \
    '{mpblLg=$8+$9+$10; if(mpblLg==0) p=0; else p=$10/mpblLg; cntsLg=$5+$6+$7;  mu=cntsLg*p; sd=(cntsLg*p*(1-p))^.5; if(sd == 0) z="NA"; else z=($5-mu)/sd; pgw=$10/mpblgnm; mugw=ntags*pgw; sdgw=(ntags*pgw*(1-pgw))^.5; if(sdgw==0) zgw="NA"; else zgw=($5-mugw)/sdgw; if(z!="NA" && zgw!="NA") {outz=zgw; if(z < zgw) outz=z; print $1"\t"$11"\t"$12"\tID\t"outz"\t"p"\t"pgw"\t"cntsLg"\t"$5}}' - \
    | tee tmp.all \
    | cut -f1-5 - \
    > tmp.bed
setops -m tmp.bed \
    > tmp.merge.bed
echo "track type=wiggle_0 visibility=full name=$wign" > $zwig
signalmap -max -% 0 tmp.merge.bed tmp.bed \
    | cut -f1 - \
    | paste tmp.merge.bed - \
    >> $zwig

rm left.flank.bed right.flank.bed left.flank.mappable.txt right.flank.mappable.txt left.flank.bgtags.txt right.flank.bgtags.txt \
    hotspot.mappable.txt tmp.bed tmp.merge.bed hotspot.both.passes.cluster.counts.bed hotspot.start.stop.txt tmp.all
cd ..