#!/bin/csh -ef
#############################
# Author:  sjn
# Date:    June 2007
# Project: Please Save Space
#############################


# Check inputs
if ( $#argv > 2 ) then
  printf "\n\t[<unique-tag>] [<bed-file>]\n"
  exit
endif


# Locals
set machID = `uname -a | cut -f2 -d' '`
set uniqTag = "pid$$.$machID"
set bedFile = "-"
if ( $#argv == 1 ) then
  set bedFile = $1
  if ( "$bedFile" != "-" ) then
    if ( ! -e $bedFile ) then
      set uniqTag = $1
      set bedFile = "-"
    endif
  endif
else if ( $#argv > 1 ) then
  set bedFile = $2
  set uniqTag = $1
  if ( "$bedFile" != "-" ) then
    if ( ! -e $bedFile ) then
      printf "\n\tCannot find $bedFile\n"
      exit
    endif
  endif
endif


# Convert ; p25 means probe length 25
setenv LANG POSIX
set chroms = `awk -v u=$uniqTag \
    'BEGIN {FS="\t";OFS="\t";d="";last="";lcdiff="";out=""} ; \
    { \
      if ($1 != d) { \
        print $1; \
        close(out); \
        out=$1"."u; \
        d = $1; \
        last = ""; \
        lcdiff=""; \
      } ; \
      { \
        coorddiff = ($3-$2); \
        if ( coorddiff != lcdiff ) { \
          lcdiff = coorddiff; \
          print "p"coorddiff >> out; \
        } \
        if ( last != "" ) { \
          if ( NF >= 5 ) { \
            print ($2-last), $5 >> out \
          } else { \
            print ($2-last) >> out \
          } \
        } \
        else { \
          if ( NF >= 5 ) { \
            print $2, $5 >> out \
          } else { \
            print ($2-last) >> out \
          } \
        } \
        last = $3; \
      } ; \
    }' $bedFile \
  | sort -`

# While tar has -j option, it doesn't zip then tar, it tar's then compresses
#  --> lose direct file access capabilities
bzip2 -9 {`echo $chroms | tr ' ' ','`}.$uniqTag

# This tar cmd requires lexicographical ordering for proper usage later
tar --preserve-order --preserve-permissions --remove-file -c {`echo $chroms | tr ' ' ','`}.$uniqTag.bz2

exit 0
