#! /bin/bash

# Run first pass of hotspot.

gnom=_GENOME_

# Tags file.  Bed file.  May be compressed (see below).
tags=_TAGS_

# Program for uncompressing tags file.  If file is already uncompressed, use "cat".
uncompr=_UNCOMPR_

# Hotspot requires a "lib" file, not a bed file. lib file will be created by this script from the original bed file, 
# if necessary, and will go in the following location.  The name of the lib file will be same as the tags file,
# with added extension lib.filter.txt or lib.filter.nodup.txt, the latter being chosen if variable
# dupok is not "T".
libd=_LIBD_

# Chromosome start, stop positions
chroms=_CHROM_FILE_

# Hotspot binary
hotspot=_HOTSPOT_

# A count of uniquely-mappable bases in 10kb intervals, used for background calculations in hotspot
umap=_MAPPABLE_10KB_FILE_

# Total number of uniquely mappable bases in genome.
mpblgenome=`awk '{t+=$3}END{print t}' $umap`

# Duplicate tags OK?  (Set to T if yes - for DNaseI data, for instance; anything else - for ChIP, for instance - means no.)
dupok=_DUPOK_

name=`basename $tags`

if [ $dupok == "T" ]; then
    lib=$libd/$name.lib.filter.txt
else
    lib=$libd/$name.lib.filter.nodup.txt
fi

echo "Hotspot pass 1..."

outd=$name-pass1
mkdir -p $outd
cd $outd
if [ ! -e $lib ] || [ ! -s $lib ]
then
    echo "creating lib file..."
    if [ $dupok == "T" ]; then
	$uncompr $tags \
	    | setops -e -0% - $chroms \
	    | awk '{print substr($1, 4), $2}' - > $lib
    else
	$uncompr $tags \
	    | setops -e -0% - $chroms \
	    | uniq - \
	    | awk '{print substr($1, 4), $2}' - > $lib
    fi
fi

$hotspot -range 200 300 50 -o $name.hotspot.out -i $lib -k $umap -gendw -bckgnmsize $mpblgenome > $name.stdout

cd ..
