/*
  FILE: Bed.c
  AUTHOR: Scott Kuehn
  CREATE DATE: Tue May 16 10:06:58 PDT 2006
  PROJECT: CompBio
  ID: '$Id: Bed.c,v 1.8 2007/05/29 16:39:24 skuehn Exp $'
*/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#include "Bed.h"


BedData * 
initializeBedData(const Genome *genome) 
{

  BedData * beds;

  beds = malloc(sizeof(BedData));
  if (beds == NULL) 
    {
      return NULL;
    }
  beds->numChroms = 0;

  if(genome == NULL)
    {
      beds->chroms = malloc(sizeof(ChromBedData *) * (NUM_CHROM_EST));
    }
  else
    {
      beds->chroms = malloc(sizeof(ChromBedData *) * (genome->numChroms));
    }
  if (beds->chroms == NULL) 
    {
      return NULL;
    }
  beds->genome = genome;

  return beds;
}


ChromBedData * 
initializeChromBedData(char * chromBuf) {
  ChromBedData *chrom;
  size_t chromBufLen;

  if(chromBuf == NULL) 
    {
      return NULL;
    }

  chrom = malloc(sizeof(ChromBedData));
  if(chrom == NULL) 
    {
      return NULL;
    }

  /* Coords */
  chrom->coords = malloc(sizeof(BedCoordData) * NUM_BED_ITEMS_EST);
  if(chrom->coords == NULL)
    {
      return NULL;
    }
  
  /* Chrom name*/
  chromBufLen = strlen(chromBuf);
  strncpy(chrom->chromName, chromBuf, chromBufLen);
  chrom->chromName[chromBufLen] = '\0';
  chrom->numCoords = 0;
  return chrom;
}



int 
appendChromBedEntry(ChromBedData *chrom, int startPos, int endPos, char *data) 
{

  int index, dataBufLen;
  char *dataPtr;

  if(chrom == NULL)
    {
      return -1;
    }
  
  index = chrom->numCoords;

  if(((index + 1) % NUM_BED_ITEMS_EST) == 0)
    {
      //fprintf(stderr, "Reallocating...\n");
      chrom->coords = realloc(chrom->coords, sizeof(BedCoordData) * (index + NUM_BED_ITEMS_EST));
      if(chrom->coords == NULL)
	{
	  return -1;
	}
    }

  /* Coords */
  chrom->coords[index].startCoord = startPos;
  chrom->coords[index].endCoord = endPos;
  
  /* Copy in data */
  if(data)
    {
      dataBufLen = strlen(data);
      if(dataBufLen <= 0)
	{
	  return -1;
	}
      dataPtr = calloc(dataBufLen + 1, sizeof(char));
      if(dataPtr == NULL) 
	{
	  return -1;
	}
      chrom->coords[index].data = strncpy(dataPtr, data, dataBufLen + 1);
      chrom->coords[index].data[dataBufLen] = '\0';
    }

  chrom->numCoords++;
  
  return chrom->numCoords;
}


BedData * 
readChromBedData(const char **bedFileNames, int numFiles, const Genome *genome) {
  
  FILE *bedFile;
  int i, j, k, notStdin = 0, chromEntryCount, newChrom, startPos, 
    endPos, lines = 0, fields = 0, chromCount, validChrom, inChr,
    chromAllocs = 1;
  
  /*Line reading buffers*/
  char bedLine[BED_LINE_LEN + 1];
  char chromBuf[CHROM_NAME_LEN + 1];

  BedData *beds;
  ChromBedData *chrom;

  
  beds = initializeBedData(genome);
  if (beds == NULL) 
    {
      fprintf(stderr, "Error: %s, %d: Unable to create BED structure. Aborting.\n", __FILE__, __LINE__);
      return NULL;
    }

  for(i = 0; i < numFiles; i++) 
    {
      notStdin = strcmp(bedFileNames[i], "-");
      if(notStdin) 
	{
	  bedFile = fopen(bedFileNames[i], "r");
	  if (!bedFile) 
	    {
	      fprintf(stderr, "Unable to access %s\n", bedFileNames[i]);
	      return NULL;
	    }
	} 
      else 
	{
	  bedFile = stdin;
	}

      while(fgets(bedLine, BED_LINE_LEN, bedFile))
	{
	  fields = sscanf(bedLine, "%s\t%d\t%d\t%[^\n]s\n", chromBuf, &startPos, &endPos, bedLine);
	  if(fields < 3)
	    {
	      fprintf(stderr, "Improper BED format at line %d\n", lines);
	      return NULL;
	    }

	  lines++;
	  newChrom = 1;
	  chromCount = beds->numChroms;

	  /* Validate Coords */
	  if ((startPos < 0) || (endPos < 0)) 
	    {
	      fprintf(stderr, "Error on line %d in %s. Genomic position must be greater than 0.\n", 
		      lines, bedFileNames[i]);
	      return NULL;
	    }
	  if (endPos < startPos) 
	    {
	      fprintf(stderr, "Error on line %d in %s. Genomic end coordinate is less than start coordinate.\n", 
		      lines, bedFileNames[i]);
	      return NULL;
	    }

	  /*Find the chrom*/
	  for(j = 0; j < beds->numChroms; j++) 
	    {

	      if(strcmp(beds->chroms[j]->chromName, chromBuf) == 0) 
		{

		  /* Append data to current chrom */
		  if(fields > 3)
		    {
		      chromEntryCount = appendChromBedEntry(beds->chroms[j], startPos, endPos, bedLine);
		    }
		  else
		    {
		      chromEntryCount = appendChromBedEntry(beds->chroms[j], startPos, endPos, NULL);
		    }
		  if (chromEntryCount < 0) 
		    {
		      fprintf(stderr, "Error: %s, %d: Unable to create BED structure. Aborting.\n", __FILE__, __LINE__);
		      return NULL;
		    }
		  newChrom = 0;
		  break;
		}
	    }

	  if((newChrom) || (chromCount == 0)) 
	    {

	      errno = 0;

	      /* Is chrom valid? */
	      if(genome != NULL)
		{
		  validChrom = 0;
		  for(k = 0; k < genome->numChroms; k++)
		    {
		      inChr = strcmp(genome->chroms[k], chromBuf);
		      if (inChr == 0)
			{
			  validChrom = 1;
			  break;
			}
		    }
		  if(!validChrom)
		    {
		      fprintf(stderr, "Error: %s is not a valid chromosome. Aborting.\n", chromBuf);
		      return NULL;
		    }
		}

	      /* Create a new chrom */

	      /* Resize Chrom Structure */
	      if(beds->numChroms >= ((NUM_CHROM_EST * chromAllocs) - 1))
		{
		  /* fprintf(stderr, "Reallocating...\n"); */
		  chromAllocs++;
		  beds->chroms = realloc(beds->chroms, sizeof(ChromBedData *) * NUM_CHROM_EST * chromAllocs);
		  if(beds->chroms == NULL)
		    {
		      fprintf(stderr, "Error: %s, %d: Unable to expand Chrom structure: %s. Aborting.\n", __FILE__, 
			      __LINE__, strerror(errno));
		    }
		}

	      chrom = initializeChromBedData(chromBuf);
	      if (chrom == NULL) 
		{
		  fprintf(stderr, "Error: %s, %d: Unable to create Chrom structure: %s. Aborting.\n", __FILE__, 
			  __LINE__, strerror(errno));
		  return NULL;
		}

	      if(fields > 3)
		{
		  chromEntryCount = appendChromBedEntry(chrom, startPos, endPos, bedLine);
		}
	      else
		{
		  chromEntryCount = appendChromBedEntry(chrom, startPos, endPos, NULL);
		}
	      if (chromEntryCount < 0) 
		{
		  fprintf(stderr, "Error: %s, %d: Unable to create BED structure. Aborting.\n", __FILE__, __LINE__);
		  return NULL;
		}

	      beds->chroms[chromCount] = chrom;
	      beds->numChroms++;      
	    }
	}
      
      if(notStdin) 
	{
	  fclose(bedFile);
	}
      
    }
  return beds;
}

void 
printBed(BedData *beds) 
{
  
  int i, j;

  if(beds == NULL) 
    return;

  for(i = 0; i < beds->numChroms; i++) 
    for(j = 0; j < beds->chroms[i]->numCoords; j++) 
      {
	printf("%s\t%d\t%d", beds->chroms[i]->chromName, beds->chroms[i]->coords[j].startCoord, 
	       beds->chroms[i]->coords[j].endCoord);
	if(beds->chroms[i]->coords[j].data)
	  printf("\t%s\n", beds->chroms[i]->coords[j].data);
      	else
	  printf("\n");
      }

  return;
}

void 
freeBedData(BedData *beds) 
{

  int i, j;

  if(beds == NULL) 
    {
      return;
    }
  
  for(i = 0; i < beds->numChroms; i++) 
    {

      for(j = 0; j < beds->chroms[i]->numCoords; j++) 
	{
	  free(beds->chroms[i]->coords[j].data);
	}
      free(beds->chroms[i]);
    }
  free(beds);
}



void
lexSortBedData(BedData *beds)
{
  unsigned long i;

  if(beds == NULL) 
    {
      return;
    }
  /*Sort coords*/
  for(i = 0; i < beds->numChroms; i++) 
    {

      qsort(beds->chroms[i]->coords, beds->chroms[i]->numCoords, 
	    sizeof(BedCoordData), numCompareBedData);
    }

  /*Sort chroms*/
  qsort(beds->chroms, beds->numChroms, sizeof(ChromBedData *), lexCompareBedData); 
  return;

}

void numSortBedData(BedData *beds)
{
  unsigned long i, j, tmp, *chromOrder;
  ChromBedData *tmpP;

  if(beds == NULL) 
    {
      return;
    }

  /*Sort coords*/
  for(i = 0; i < beds->numChroms; i++) 
    {
      qsort(beds->chroms[i]->coords, beds->chroms[i]->numCoords, 
	  sizeof(BedCoordData), numCompareBedData);
    }

  /*Sort chroms*/
  
  /*Get the indices of all chroms*/
  chromOrder = calloc((size_t)beds->numChroms, sizeof(unsigned long));
  for(i = 0; i < beds->numChroms; i++) 
    {
      for(j = 0; j < beds->genome->numChroms; j++) 
	{
	  if (strcmp(beds->chroms[i]->chromName, beds->genome->chroms[j]) == 0) 
	    {
	      chromOrder[i] = j;
	      break;
	    }
	}
    }

  /*Linear insertion sort on chrom indices*/
  for(i = 1; i < beds->numChroms; i++) 
    {  /* Look for an insertion point */
      for (j = i; j > 0 && GT(chromOrder[j - 1], chromOrder[j]); j--) 
	{
	  
	  /*Move the others down and insert it*/
	  tmp = chromOrder[j];
	  tmpP = beds->chroms[j];
	  
	  beds->chroms[j] = beds->chroms[j - 1];
	  chromOrder[j] = chromOrder[j - 1];
	  
	  beds->chroms[j - 1] = tmpP;
	  chromOrder[j - 1] = tmp;
	}
    }

  free(chromOrder);
  
  return;

}

int 
numCompareBedData(const void *pos1, const void *pos2) 
{
  long int diff;
  diff = ((BedCoordData *)pos1)->startCoord - ((BedCoordData *)pos2)->startCoord;
  if (diff)
    {
      return diff;
    }
  else
    {
      return ((BedCoordData *)pos1)->endCoord - ((BedCoordData *)pos2)->endCoord;
    }

}

int 
lexCompareBedData(const void *chrPos1, const void *chrPos2) 
{
  return(strcmp((*((ChromBedData **)chrPos1))->chromName, (*((ChromBedData **)chrPos2))->chromName));
}


#ifdef STANDALONE

int 
main(int argc, char **argv) 
{

  /*BedData *beds;*/
  PosData *poss;

  /*beds = readChromBedData("/home/skuehn/dev/CompBio/utility/Count/test/Encode.feb05-gaussBoxHSs-125.scored.bed", 
			  &human);
  if (beds == NULL) {
  exit(EXIT_FAILURE);
  } */

  poss = readChromPosData("/home/skuehn/dev/CompBio/utility/Count/test/big.bed", 
			  &human);

  if (poss == NULL) 
    {
    exit(EXIT_FAILURE);
    }
  
  /*if (!isSorted(poss)) {
    printf("/home/skuehn/dev/CompBio/utility/Count/test/big.bed is not sorted.\n");
    exit(EXIT_FAILURE);
    }*/
  
  /*  printBed(beds);
      freeBedData(beds);
      printPos(poss);*/
  
  lexSortPosData(poss);
  printPos(poss);
  freePosData(poss);
  
  exit(EXIT_SUCCESS);
}


#endif /*STANDALONE*/
