// Files included
#include "Assertion.hpp"
#include "FeatBedReader.hpp"
#include "Input.hpp"
#include "StandardFiles.hpp"


//=========================================================
// See README for details on how this program can be used.
//=========================================================


namespace { // unnamed

//===============
// local globals
//===============
std::vector<FeatDist::PType> currentCoords;
std::vector<FeatDist::BedReader*> bedFiles;
std::vector<bool> doneFile;
std::vector<std::string> chrom;
std::vector<FeatDist::StrandPolarity> strand;
std::vector<std::string> idField;
bool includeID = false;
const unsigned char UCBIGGEST = std::numeric_limits<unsigned char>::max();

} // unnamed



namespace FeatDist {

//============================================
// Define externs from FeatureDistanceDefns.h
//============================================
const PType NADA = std::make_pair(0, 0);
const std::string NOCHROM = std::string(20, UCBIGGEST);
const std::string NOID = "no-id";
typedef std::pair<Direction, std::pair<unsigned long, std::string> > CrazyPair;
const CrazyPair NOTINRANGE =
         std::make_pair(UPSTREAM, std::make_pair(
                        std::numeric_limits<unsigned long>::max(), NOID
                                                )
                       );

const CrazyPair NOGO = std::make_pair(DOWNSTREAM, NOTINRANGE.second);
const char SEP = '\t';
const std::string CHROMOSOME = "chr";
const std::string USESTDIN = "use standard input";
static const int REF    = 0;
static const int NONREF = 1;


//============
// Prototypes
//============
void cleanup();
void doDistance(int i, const Input&);
bool getNextFileLine(int i);
std::pair<Direction, std::pair<unsigned long, std::string> >
            nextDistanceLine(int, std::list<TType>&, const Input&);
void record(bool, unsigned long);
void record(const std::string&, long, const std::string&);


} // namespace FeatDist


//========
// main()
//========
int main(int argc, char** argv) {
  using namespace FeatDist;

  bool error = false;
  try {

    // Check inputs; initialize variables
    Input input(argc, argv);
    int numFiles = input.NumberFiles();
    std::string msg = "Expect Master File and Comparison File";
    Assert<InputError>(2 == numFiles, msg);
    includeID = input.IncludeClosestID();
    for ( int i = 0; i < numFiles; ++i ) {
      bedFiles.push_back(input.NextBed());
      doneFile.push_back(false);
      currentCoords.push_back(NADA);
      chrom.push_back(NOCHROM);
      strand.push_back(PLUS);
      idField.push_back(NOID);
    } // for
    long numFieldsRef = bedFiles[REF]->NumberApplicableColumns();
    long numFieldsNonRef = bedFiles[NONREF]->NumberApplicableColumns();
    bool strandCompare = input.StrandCompare();
    msg = "Must have >= 6 fields in input bed files for -p option";
    Assert<InputError>(!strandCompare || numFieldsRef >= 6, msg);
    Assert<InputError>(!strandCompare || numFieldsNonRef >= 6, msg);
    msg = "Must have >= 4 fields in Comparator File for -c option";
    Assert<InputError>(!includeID || numFieldsNonRef >= 4, msg);

    // Get results
    doDistance(numFiles, input);
  } catch(HelpException& he) {
    std::cerr << he.GetMessage() << std::endl;
  } catch(InputError& ie) {
    std::cerr << "Input Error Detected" << std::endl;
    std::cerr << ie.GetMessage() << std::endl;
    error = true;
  } catch(ProgramError& pe) {
    std::cerr << "Program Error Detected" << std::endl;
    std::cerr << pe.GetMessage() << std::endl;
    error = true;
  } catch(std::exception& err) {
    std::cerr << "Error: " << err.what() << std::endl;
    error = true;
  } catch(...) {
    std::cerr << "Unknown Error - is input in .bed format?" << std::endl;
    error = true;
  }
  cleanup();
  return(error);
}



// Function implementations
namespace FeatDist {

//===========
// cleanup()
//===========
void cleanup() {
  std::vector<BedReader*>::iterator i = bedFiles.begin(),
                                    j = bedFiles.end();
  while ( i != j ) {
    if ( *i )
      delete(*i);
    ++i;
  } // while
}

//==========
// record()
//==========
void record(Direction d, unsigned long diff, const std::string& id) {
  if ( d == UPSTREAM && diff != 0 )
    std::cout << "-";
  std::cout << diff;

  if ( includeID )
    std::cout << SEP << id;
  std::cout << std::endl;    
}

//==========
// record()
//==========
void record(const std::string& s, const std::string& chromosome) {
  std::cout << s << chromosome;
  if ( includeID )
    std::cout << SEP  << "NAN";
  std::cout << std::endl;
}

//==============
// doDistance()
//==============
void doDistance(int numFiles, const Input& in) {
  bool done = false;
  std::string noData = "no-data-chr";
  std::pair< Direction, std::pair<unsigned long, std::string> > value;
  std::list<TType> q;
  bool strandCompare = in.StrandCompare();

  // Check for actual coords in each input file
  bool hasCoords = getNextFileLine(REF);
  Assert<InputError>(hasCoords, "Empty input file");
  hasCoords = getNextFileLine(NONREF);
  Assert<InputError>(hasCoords, "Empty input file");

  // Stick first value into q
  TType tmp;
  tmp.first               = chrom[NONREF];
  tmp.second.first.first  = strand[NONREF];
  tmp.second.first.second = idField[NONREF];
  tmp.second.second       = currentCoords[NONREF];
  q.push_back(tmp);

  while ( !done ) {
    value = nextDistanceLine(numFiles, q, in);

    if ( value == NOGO )
      break;
    else if ( value == NOTINRANGE ) {
      std::string origRef = chrom[REF];
      StrandPolarity lastStrand = strand[REF];
      do { // for each origRef in REF, nothing in NOREF
        record(noData, chrom[REF]);
        getNextFileLine(REF);
      } while ( /* strand comparison makes a bit tricky */
      /*
        don't worry about overlaps: if overlaps, and is not allowed,
        will grab closest non-overlapped region --> NOTINRANGE is
        not returned unless overlap region is only segment or it has
        opposite strand of all others in the same chromosome.  In
        both cases, the following test will fail.
      */
        !doneFile[REF] &&
        chrom[REF] == origRef &&
        (!strandCompare || strand[REF] == lastStrand)
              );
        // don't getNetFileLine(REF) again;
        // doneFile[REF] checked at nextDistanceLine() next iteration
      continue;
    }
    else
      record(value.first, value.second.first, value.second.second);
    getNextFileLine(REF); // unmerged
  } // while
}

//===================
// getNextFileLine()
//===================
bool getNextFileLine(int i) {
  // Read next line from index i without merging
  if ( !bedFiles[i]->HasNext() ) {
    doneFile[i] = true;
    return(false);
  }
  currentCoords[i] = bedFiles[i]->ReadLine();
  chrom[i]         = bedFiles[i]->Chrom();
  strand[i]        = bedFiles[i]->Strand();
  if ( includeID )
    idField[i] = bedFiles[i]->ID();
  return(true); // doneFile[i] checked on next call
}

//=================
// closestRegion()
//=================
std::pair<bool, std::pair<Direction, unsigned long> >
  closestRegion(unsigned long start, unsigned long stop, const TType& t) {
  /*
    Current requirement is to compare "left-2-left" independent of the
    strand polarities, where "left" means upstream on the + strand or
    downstream on the - strand.
  */

  unsigned long distance = 0;
  Direction d = DOWNSTREAM; // t.second.second.first relative to start
  bool isSame = false;

  if ( t.second.second.first == start &&
       t.second.second.second == stop ) // exact match
    isSame = true;    
  else if ( t.second.second.first < start )
    distance = start - t.second.second.first;
  else if ( t.second.second.first > start ) {
    distance = t.second.second.first - start;
    d = UPSTREAM;
  }
  // else defaults work; start = t.second.first, but stop != t.second.second
  return(std::make_pair(isSame, std::make_pair(d, distance)));
}

//=========
// check()
//=========
bool check(bool doCompareStrands) {
  bool cond1 = !doneFile[NONREF];
  bool cond2 = chrom[NONREF] < chrom[REF];
  bool cond3 = chrom[NONREF] == chrom[REF];
  bool cond4 = currentCoords[NONREF].first <= currentCoords[REF].first;
  bool cond5 = !cond4;
  bool cond6 = strand[NONREF] != strand[REF];
  bool cond7 = doCompareStrands;
  bool cond8 = cond3 && cond4;
  bool cond9 = cond3 && cond5 && cond6 && cond7;
  return(cond1 && (cond2 || cond8 || cond9));
}

//====================
// nextDistanceLine()
//====================
std::pair<Direction, std::pair<unsigned long, std::string> >
       nextDistanceLine(int numFiles, std::list<TType>& compList,
                        const Input& input) {

  /* Remember when dealing with TTypes:
    .first                == chromosome
    .second.first.first   == strand
    .second.first.second  == id
    .second.second.first  == start position
    .second.second.second == end position
  */

  if ( doneFile[REF] ) // we're done
    return(NOGO);
  if ( doneFile[NONREF] && compList.empty() ) // no more matches
    return(NOTINRANGE);

  static const bool strandCompare = input.StrandCompare();
  static const bool allowCompleteOverlap = input.AllowCompleteOverlaps();

  // If chrom[NONREF] < chrom[REF], then compList is obsolete
  if ( !doneFile[NONREF] && chrom[NONREF] < chrom[REF] )
    compList.clear();

  // Increment NONREF until it's > REF; strand sensitivity makes tricky
  while ( check(strandCompare) ) {
    /*
      Keep going until non-reference file is exhausted or
      non-reference file is beyond the reference file in terms
      of current chromosome or coordinates on the same chromosome.
      If the latter and they are not on the same strand, then keep
      searching unless we do not care about strand comparisons.
    */
    if ( !getNextFileLine(NONREF) ) // NONREF depleted
      break;
    if ( chrom[NONREF] >= chrom[REF] ) {
      TType tmp;
      tmp.first               = chrom[NONREF];
      tmp.second.first.first  = strand[NONREF];
      tmp.second.first.second = idField[NONREF];
      tmp.second.second       = currentCoords[NONREF];
      compList.push_back(tmp);
    }
  } // while

  // Get rid of obsolete compList items on the low end of reference
  //  NOTE: The items in compList are always ordered.
  typedef std::list<TType>::iterator IterType;
  IterType beg = compList.begin(), toErase, find, end;
  while ( beg != compList.end() ) {
    toErase = beg++;
    if ( toErase->first < chrom[REF] )
      compList.erase(toErase); // out of range on low end
    else if ( toErase->first > chrom[REF] )
      break; // > REF by chromosome
    else if ( toErase->second.second.first >= currentCoords[REF].first )
      break; // >= REF by coordinates; keep even if on opposite strand
    else { // toErase->second.second.first < currentCoords[REF].first
      /*
        potential strand sensitivity requires that we search
        possibly more than just the adjacent item.
        note that toErase->first == chrom[REF]
      */
      find = beg;
      while ( find != compList.end() ) {
        if ( find->first == chrom[REF] &&
             find->second.second.first < currentCoords[REF].first &&
             (
               !strandCompare ||
               find->second.first.first == toErase->second.first.first
             )
           ) {
          compList.erase(toErase);
          break;
        }
        else if ( find->first > chrom[REF] ||
                 (find->first == chrom[REF] &&
                  find->second.second.first > currentCoords[REF].first)
                ) {
          break; // coordinates can only get bigger
        }
        ++find;
      } // while
    }
  } // while

  if ( compList.empty() )
    return(NOTINRANGE);

  // Find closest segment
  std::pair< bool, std::pair<Direction, unsigned long> > diffPair;
  Direction direction = UPSTREAM;
  const unsigned long noMatch = std::numeric_limits<unsigned long>::max();
  unsigned long currentDiff = noMatch;
  beg = compList.begin(); end = compList.end();
  std::string id = NOID;
  while ( beg != end ) {
    if ( beg->first == chrom[REF] ) { // chrom's match
      diffPair = closestRegion(currentCoords[REF].first,
                               currentCoords[REF].second, *beg);
      // Check distance, full overlaps and strands; ties go to the left
      if ( currentDiff > diffPair.second.second ) {
        if ( allowCompleteOverlap || !diffPair.first ) {
          if ( !strandCompare ||
               beg->second.first.first == strand[REF] ) {
            direction   = diffPair.second.first;
            currentDiff = diffPair.second.second;
            id          = beg->second.first.second;
          }
        }
      }
      else if ( currentDiff != noMatch && /* even if on opp strands */
                beg->second.second.first > currentCoords[REF].first) {
        /*
          With very odd inputs, it's possible to load all > REF rows
          into compList.  Without this check, running times explode.
          Example: Master File == Comparator File in all chrom, start,
                   stop fields across all rows, but everything is on
                   the opposite strand.
          The next if statement accounts for odd opposite strand
          and coordinate possibilities.  beg->second.second.first can
          only get bigger or stay the same on the next iteration.
        */
        if ( beg->second.second.first-currentCoords[REF].first > currentDiff )
          break;
      }
    }
    else if ( beg->first > chrom[REF] )
      break;
    ++beg;
  } // while
  if ( currentDiff == noMatch )
    return(NOTINRANGE);
  return(std::make_pair(direction, std::make_pair(currentDiff, id)));
}

} // namespace FeatDist
