#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sys/stat.h>
#include <list>
#include <assert.h>
#include <fstream>
#include "analysisFunction.h"
#include "aio.h"

double angsd::addProtect2(double a,double b){
  //function does: log(exp(a)+exp(b)) while protecting for underflow
  double maxVal;// = std::max(a,b));
  if(a>b)
    maxVal=a;

  else
    maxVal=b;
  double sumVal = exp(a-maxVal)+exp(b-maxVal);
  return log(sumVal) + maxVal;
}


double angsd::addProtect3(double a,double b, double c){
  //function does: log(exp(a)+exp(b)+exp(c)) while protecting for underflow
  double maxVal;// = std::max(a,std::max(b,c));
  if(a>b&&a>c)
    maxVal=a;
  else if(b>c)
    maxVal=b;
  else
    maxVal=c;
  double sumVal = exp(a-maxVal)+exp(b-maxVal)+exp(c-maxVal);
  return log(sumVal) + maxVal;
}

double angsd::addProtectN(double a[],int len){
  //function does: log(sum(exp(a))) while protecting for underflow
  double maxVal = a[0];

  for(int i=1;i<len;i++)
    if(maxVal<a[i])
      maxVal=a[i];

  double sumVal = 0;
  for(int i=1;i<len;i++)
    sumVal += exp(a[i]-maxVal);

  return log(sumVal) + maxVal;
}

double angsd::getMax(double a,double b, double c){ 
    //get the maximum value of a, b and c
    double maxVal;// = std::max(a,std::max(b,c));
    if(a>b&&a>c)
      maxVal=a;
    else if(b>c)
      maxVal=b;
    else
      maxVal=c;
    return maxVal;
}




int angsd::fexists(const char* str){///@param str Filename given as a string.
  struct stat buffer ;
  return (stat(str, &buffer )==0 ); /// @return Function returns 1 if file exists.
}



angsd::Matrix<double> angsd::getMatrix(const char *name,int doBinary,int lens){
  if(!angsd::fexists(name)){
    fprintf(stderr,"\t-> Problems opening file: %s\n",name);
    exit(0);
  }
  const char* delims = " \t";
  std::ifstream pFile(name,std::ios::in);
  
  char buffer[lens];
  std::list<double *> rows;
  int ncols =0;
  while(!pFile.eof()){
    pFile.getline(buffer,lens);
    if(strlen(buffer)==0)
      continue;
    char *tok = strtok(buffer,delims);
    std::list<double> items;
    while(tok!=NULL){
      if(doBinary)
	items.push_back(atoi(tok));
      else
	items.push_back(atof(tok));
      tok = strtok(NULL,delims);
    }
    //fprintf(stderr,"[%s] ncols:%lu\n",__FUNCTION__,items.size());
    ncols = items.size();
    double *drows = new double[items.size()];
    int i=0;
    for(std::list<double>::iterator it=items.begin();it!=items.end();it++)
      drows[i++]  = *it;
    rows.push_back(drows);
    
  }
  //  fprintf(stderr,"%s nrows:%lu\n",__FUNCTION__,rows.size());
  double **data = new double*[rows.size()];
  int i=0;
  for(std::list<double*>::iterator it=rows.begin();it!=rows.end();it++)
    data[i++]  = *it;
  
  Matrix<double> retMat;
  retMat.matrix=data;
  retMat.x = rows.size();
  retMat.y = ncols;
  return retMat;

}
angsd::Matrix<int> angsd::getMatrixInt(const char *name,int lens){
  if(!angsd::fexists(name)){
    fprintf(stderr,"\t-> Problems opening file: %s\n",name);
    exit(0);
  }
  const char* delims = " \t";
  std::ifstream pFile(name,std::ios::in);
  
  char buffer[lens];
  std::list<int *> rows;
  int ncols =0;
  while(!pFile.eof()){
    pFile.getline(buffer,lens);
    if(strlen(buffer)==0)
      continue;
    char *tok = strtok(buffer,delims);
    std::list<int> items;
    while(tok!=NULL){
	items.push_back(atoi(tok));
      tok = strtok(NULL,delims);
    }
    //fprintf(stderr,"[%s] ncols:%lu\n",__FUNCTION__,items.size());
    ncols = items.size();
    int *drows = new int[items.size()];
    int i=0;
    for(std::list<int>::iterator it=items.begin();it!=items.end();it++)
      drows[i++]  = *it;
    rows.push_back(drows);
    
  }
  //  fprintf(stderr,"%s nrows:%lu\n",__FUNCTION__,rows.size());
  int **data = new int*[rows.size()];
  int i=0;
  for(std::list<int*>::iterator it=rows.begin();it!=rows.end();it++)
    data[i++]  = *it;
  
  Matrix<int> retMat;
  retMat.matrix=data;
  retMat.x = rows.size();
  retMat.y = ncols;
  return retMat;

}

void angsd::deleteMatrixInt(Matrix<int> mat){
  assert(mat.matrix!=NULL);
  for(int i=0;i<mat.x;i++)
    delete [] mat.matrix[i];
  delete[] mat.matrix;
  mat.matrix =NULL;
}

void angsd::deleteMatrix(Matrix<double> mat){
  assert(mat.matrix!=NULL);
  for(int i=0;i<mat.x;i++)
    delete [] mat.matrix[i];
  delete[] mat.matrix;
  mat.matrix =NULL;
}



void angsd::printMatrix(Matrix<double> mat,FILE *file){
  fprintf(stderr,"Printing mat:%p with dim=(%d,%d)\n",mat.matrix,mat.x,mat.y);
  for(int xi=0;xi<mat.x;xi++){
    for(int yi=0;yi<mat.y;yi++)
      fprintf(file,"%f\t",mat.matrix[xi][yi]);
    fprintf(file,"\n");
  }    
}

//have to implement method for comparing char* in std::map
struct cmp_strEmil{
  bool operator()(char const *a, char const *b) const{
    return std::strcmp(a, b) < 0;
  }
};


angsd::doubleTrouble<double> angsd::getSample(const char *name,int lens, char* whichPhe, char* whichCov){

  if(!angsd::fexists(name)){
    fprintf(stderr,"\t-> Problems opening file: %s\n",name);
    exit(0);
  }

  const char* delims = " \t";
  std::ifstream pFile(name,std::ios::in);

  char buffer[lens];
  std::list<char *> firstLine;
  std::list<char *> secondLine;
  std::list<double *> rows;

  int hasMissing = 0;
  int hasID_2 = 0;
  int hasPheno = 0;
  
  // for reading in specified phenotypes and covariates
  std::map <char*,int, cmp_strEmil> pheMap;
  std::map <char*,int, cmp_strEmil> covMap;

  std::map <int,int> pheMap2;
  std::map <int,int> covMap2;

  const char* delims2 = ",";
  
  if(whichPhe!=NULL){
    char* id = strtok(whichPhe,delims2);
    while(id!=NULL){
      pheMap[strdup(id)] = 1;
      id = strtok(NULL,delims2);      
    }
  }

  if(whichCov!=NULL){
    char* id = strtok(whichCov,delims2);
    while(id!=NULL){
      covMap[strdup(id)] = 1;
      id = strtok(NULL,delims2);
    }
  }
  
  //read first line to know how many cols
  pFile.getline(buffer,lens);
  int ncols=0;
  char* tmp = strtok(buffer,delims);
  while(tmp!=NULL){
    
    if(ncols==0 && (strcmp(tmp,"ID")!=0 && strcmp(tmp,"ID_1")!=0) ){
      fprintf(stderr,"\t-> First column first row of file must be 'ID' or 'ID_1' is %s\n",tmp);
      exit(0);
    }

    if(ncols==1 && strcmp(tmp,"ID_2")==0 ){
      hasID_2 = 1;
    } else if(ncols==1 && strcmp(tmp,"missing")==0 ){
      hasMissing = 1;
    }
    
    if(ncols==2 && strcmp(tmp,"missing")==0){
      if(hasMissing){
	fprintf(stderr,"\t-> Cannot have two 'missing' columns in .sample file\n");
	exit(0);
      }
      hasMissing = 1;
    }

    //reads in which columns to read into design matrix - if specifed,
    //otherwise it will just read all of the columns    
    if(pheMap.count(tmp)>0 && whichPhe!=NULL){
      pheMap2[ncols] = 1;
    } else if(whichPhe==NULL){
      pheMap2[ncols] = 1;
    }
    
    if(covMap.count(tmp)>0 && whichCov!=NULL){
      covMap2[ncols] = 1;
    } else if(whichCov==NULL){
      covMap2[ncols] = 1;
    }
    
    ncols++;
    tmp = strtok(NULL,delims);       
  }
  
  std::map<char*, int>::iterator it;
  
  for ( it = pheMap.begin(); it != pheMap.end(); it++ ){
    //free strdup for keys 
    free(it->first);
  }

  for ( it = covMap.begin(); it != covMap.end(); it++ ){
    //free strdup for keys 
    free(it->first);
  }
  
  //read second line to know types of each
  std::vector <char> sampleMap;
  pFile.getline(buffer,lens);
  int count=0;
  tmp = strtok(buffer,delims);
  while(tmp!=NULL){

    if(count==0 && tmp[0]!='0'){
      fprintf(stderr,"\t-> First column second row of file must be '0' is %c\n",tmp[0]);
      exit(0);
    }
    
    if(count<3 && hasID_2 && hasMissing && tmp[0]!='0'){
      fprintf(stderr,"\t-> Second and third column second row of file must be '0' is %c\n",tmp[0]);
      exit(0);
    }

    if(count<2 && !hasID_2 && hasMissing && tmp[0]!='0'){
      fprintf(stderr,"\t-> Second column second row of file must be '0' is %c\n",tmp[0]);
      exit(0);
    }

    if(count<2 && hasID_2 && !hasMissing && tmp[0]!='0'){
      fprintf(stderr,"\t-> Second column second row of file must be '0' is %c\n",tmp[0]);
      exit(0);
    }
        
    // keep track of which column is what - 0 ID or missing, D discrete covar, C continious covar, B discrete pheno, P continious pheno
    sampleMap.push_back(tmp[0]);
    count++;    
    tmp = strtok(NULL,delims);
    
  }

  assert(count==ncols);
  
  //which column we are at
  int pheCols = 0;
  int covCols = 0;

  int column = 0;
  // to keep track of pheno either only binary or quant
  int isBinary = 0;
  
  std::list<double*> covRows;
  std::list<double*> pheRows;

  //NA string used in .sample file has to give -999 value for this
  const char * test = "NA";
  
  //create matrix for covar and pheno  
  while(!pFile.eof()){
    pFile.getline(buffer,lens);
    if(strlen(buffer)==0)
      continue;
    
    char *tok = strtok(buffer,delims);
    
    std::list<double> covRow;
    std::list<double> pheRow;

    column = 0;
    
    while(tok!=NULL){

      if(sampleMap[column] == '0'){
	column++;
	tok = strtok(NULL,delims);            
	continue;
	//covar
      } else if(sampleMap[column] == 'D' || sampleMap[column] == 'C'){		
	if(covMap2.count(column)>0){
	  if(strcmp(tok,test)==0){
	    covRow.push_back(-999);
	  } else{	    
	    covRow.push_back(atof(tok));
	  }
	}
	column++;
	//pheno
      } else if(sampleMap[column] == 'B'){
	//ok to have binary phenotype as double??
	if(pheMap2.count(column)>0){
	  if(strcmp(tok,test)==0){
	    pheRow.push_back(-999);
	  } else{	  
	    pheRow.push_back(atof(tok));
	  }
	  isBinary = 1;
	  hasPheno = 1;
	}
	column++;	
      } else if(sampleMap[column] == 'P'){;
	if(pheMap2.count(column)>0){
	  if(strcmp(tok,test)==0){
	    pheRow.push_back(-999);
	  } else{	  
	    pheRow.push_back(atof(tok));
	  }	  
	  assert(isBinary==0);
	  hasPheno = 1;
	}
	column++;
      } else{
	fprintf(stderr,"error .sample file has unreconigsed column type (D, C, B, 0 and P are allowed): %c \n",sampleMap[column]);
	exit(0);
      }
      
      tok = strtok(NULL,delims);            
    }
    
    double *crows = new double[covRow.size()];
    double *prows = new double[pheRow.size()];

    covCols = covRow.size();
    pheCols = pheRow.size();

    int i=0;
    for(std::list<double>::iterator it=covRow.begin();it!=covRow.end();it++)    
      crows[i++]  = *it;

    i=0;
    for(std::list<double>::iterator it=pheRow.begin();it!=pheRow.end();it++)
      prows[i++]  = *it;

    covRows.push_back(crows);
    pheRows.push_back(prows);
  
  }

  //checks that any phenos have been read
  if(not hasPheno){
    fprintf(stderr,"##############################################\n");
    fprintf(stderr,"## WARNING: NO PHENOTYPE PRESENT IN .sample FILE!!\n");
    fprintf(stderr,"##############################################\n");    
  }

  double **covData = new double*[covRows.size()];
  double **pheData = new double*[pheRows.size()];
  
  int i = 0;
  for(std::list<double*>::iterator it=covRows.begin();it!=covRows.end();it++)
    covData[i++]  = *it;
  
  i = 0;
  for(std::list<double*>::iterator it=pheRows.begin();it!=pheRows.end();it++)
    pheData[i++]  = *it;
  
  doubleTrouble<double> dT;
  dT.matrix0=pheData;
  dT.x0 = pheRows.size();
  dT.y0 = pheCols;
  dT.isBinary = isBinary;

  dT.matrix1=covData;
  dT.x1 = covRows.size();
  dT.y1 = covCols;

  return dT;

}


void angsd::deleteDoubleTrouble(doubleTrouble<double> dT){
  
  assert(dT.matrix0!=NULL && dT.matrix1!=NULL);
  for(int i=0;i<dT.x0;i++)
    delete [] dT.matrix0[i];
  delete[] dT.matrix0;
  dT.matrix0=NULL;
  for(int i=0;i<dT.x1;i++)
    delete [] dT.matrix1[i];
  delete[] dT.matrix1;
  dT.matrix1=NULL;
  
}



void angsd::printDoubleTrouble(doubleTrouble<double> dT,FILE *file){
  
  fprintf(stderr,"Printing phe doubleTrouble:%p with dim=(%d,%d)\n",dT.matrix0,dT.x0,dT.y0);
  fprintf(stderr,"Printing cov doubleTrouble:%p with dim=(%d,%d)\n",dT.matrix1,dT.x1,dT.y1);
  
  for(int xi=0;xi<dT.x0;xi++){
    for(int yi=0;yi<dT.y0;yi++)
      fprintf(file,"%f\t",dT.matrix0[xi][yi]);
    for(int yi=0;yi<dT.y1;yi++)
      fprintf(file,"%f\t",dT.matrix1[xi][yi]);  
    fprintf(file,"\n");
  }
  
}



double **angsd::get3likes(funkyPars *pars){

  double **loglike = NULL;
  loglike = new double*[pars->numSites]; 
  for(int s=0;s<pars->numSites;s++)
    loglike[s] = new double[3*pars->nInd];
  
  for(int s=0;s<pars->numSites;s++){

    if(pars->keepSites[s]==0)
      continue;
    for(int i=0;i<pars->nInd;i++){
      
      //fprintf(stderr,"mm: %d\t%d\n",pars->major[s],pars->major[s]);
      //fprintf(stderr,"%s\t%d\t%c\t%c\t",pars->sites[s].chromo,pars->sites[s].position+1,intToRef[pars->major[s]],intToRef[pars->minor[s]]);

      loglike[s][i*3+0]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->major[s]]];
      loglike[s][i*3+1]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->minor[s]]];
      loglike[s][i*3+2]=pars->likes[s][i*10+angsd::majorminor[pars->minor[s]][pars->minor[s]]];
    }
  }
  return loglike;

}

double **angsd::get3likesRescale(funkyPars *pars){

  double **loglike = NULL;
  loglike = new double*[pars->numSites]; 
  for(int s=0;s<pars->numSites;s++)
    loglike[s] = new double[3*pars->nInd];
  
  for(int s=0;s<pars->numSites;s++){

    if(pars->keepSites[s]==0)
      continue;
    for(int i=0;i<pars->nInd;i++){
      /*(
	fprintf(stderr,"refid: %d posi:%d pars->major:%p\n",pars->refId,pars->posi[s]+1,pars->major);
	fprintf(stderr,"mm: %d\t%d\n",pars->major[s],pars->major[s]);
	fprintf(stderr,"%d\t%d\t%c\t%c\t",pars->refId,pars->posi[s]+1,intToRef[pars->major[s]],intToRef[pars->minor[s]]);
      */
      loglike[s][i*3+0]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->major[s]]];
      loglike[s][i*3+1]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->minor[s]]];
      loglike[s][i*3+2]=pars->likes[s][i*10+angsd::majorminor[pars->minor[s]][pars->minor[s]]];
      double mmax = loglike[s][i*3+0];
      for(int ii=1;ii<3;ii++)
	if(loglike[s][i*3+ii]>mmax)
	  mmax = loglike[s][i*3+ii];
      for(int ii=0;(!std::isinf(mmax))&&ii<3;ii++){
	loglike[s][i*3+ii] -=mmax;
	if(std::isnan(loglike[s][i*3+ii])){
	  fprintf(stderr,"mmax: %f\n",mmax);
	  exit(0);
	}
      }
    }
  }
  return loglike;

}


double **angsd::get3likesRMlow(funkyPars *pars,int *keepInd){
 
  int nKeep=0;
  for(int i=0;i<pars->nInd;i++){
    if(keepInd[i])
      nKeep++;
  }
 
  double **loglike = NULL;
  loglike = new double*[pars->numSites]; 
 
  for(int s=0;s<pars->numSites;s++){
     loglike[s] = new double[3*nKeep];
  }


  for(int s=0;s<pars->numSites;s++){
    if(pars->keepSites[s]==0)//always extract this, to avoid problems in multitrheading
      continue;
    int count=0;
    

    for(int i=0;i<pars->nInd;i++){
      if(keepInd[i]==0)
	continue;
      loglike[s][count*3+0]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->major[s]]];
      loglike[s][count*3+1]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->minor[s]]];
      loglike[s][count*3+2]=pars->likes[s][i*10+angsd::majorminor[pars->minor[s]][pars->minor[s]]];
   
      if(loglike[s][count*3+0] < -20 && loglike[s][count*3+1] < -20 && loglike[s][count*3+2] < -20){
	loglike[s][count*3+0] = 0;
	loglike[s][count*3+1] = 0;
	loglike[s][count*3+2] = 0;

      }
      count++;

    }
  }
  return loglike;
}

double **angsd::get3likes(funkyPars *pars,int *keepInd){
 
  int nKeep=0;
  for(int i=0;i<pars->nInd;i++){
    if(keepInd[i])
      nKeep++;
  }
 
  double **loglike = NULL;
  loglike = new double*[pars->numSites]; 
 
  for(int s=0;s<pars->numSites;s++){
     loglike[s] = new double[3*nKeep];
  }


  for(int s=0;s<pars->numSites;s++){
    if(pars->keepSites[s]==0)//always extract this, to avoid problems in multitrheading
      continue;
    int count=0;
    

    for(int i=0;i<pars->nInd;i++){
      if(keepInd[i]==0)
	continue;
    
      loglike[s][count*3+0]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->major[s]]];
      loglike[s][count*3+1]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->minor[s]]];
      loglike[s][count*3+2]=pars->likes[s][i*10+angsd::majorminor[pars->minor[s]][pars->minor[s]]];
      count++;
    }
  }
 
  return loglike;

}

double **angsd::getlikes(funkyPars *pars,int *keepInd){

  int nKeep=0;
  for(int i=0;i<pars->nInd;i++){
    if(keepInd[i])
      nKeep++;
  }

  double **loglike = NULL;
  loglike = new double*[pars->numSites]; 
  for(int s=0;s<pars->numSites;s++)
    loglike[s] = new double[10*nKeep];
  
  for(int s=0;s<pars->numSites;s++){
    if(pars->keepSites[s]==0)
      continue;
    int count=0;
    for(int i=0;i<pars->nInd;i++){
      if(keepInd[i]==0)
	continue;
      for(int g=0;g<10;g++)
	loglike[s][count*10+g]=pars->likes[s][i*10+g];
      count++;
    }
  }
  return loglike;

}
//DRAGON just use std::swap
void angsd::swapDouble (double& first, double& second)
{
        double temp = first;
        first = second;
        second = temp;
}

int angsd::matinv( double x[], int n, int m, double space[])
{
  //from rasmus nielsens code
  /* x[n*m]  ... m>=n*/
  register int i,j,k; 
  int *irow=(int*) space;
  double ee=1.0e-20, t,t1,xmax;
  double det=1.0;
  
  FOR (i,n)  {
    xmax = 0.;
    for (j=i; j<n; j++) {
      if (xmax < fabs(x[j*m+i]))  {
	xmax = fabs( x[j*m+i] );
	irow[i] = j;
      }
    }
    det *= xmax;
    if (xmax < ee)   {
      fprintf(stderr,"\nDeterminant becomes zero at %3d!\t\n", i+1);
      return(-1);
    }
    if (irow[i] != i) {
      FOR (j,m) {
	t = x[i*m+j];
	x[i*m+j] = x[irow[i] * m + j];
	x[ irow[i] * m + j] = t;
      }
    }
    t = 1./x[i*m+i];
    FOR (j,n) {
      if (j == i) continue;
      t1 = t*x[j*m+i];
      FOR(k,m)  x[j*m+k] -= t1*x[i*m+k];
      x[j*m+i] = -t1;
    }
    FOR(j,m)   x[i*m+j] *= t;
    x[i*m+i] = t;
  }                            /* i  */
  for (i=n-1; i>=0; i--) {
    if (irow[i] == i) continue;
    FOR(j,n)  {
      t = x[j*m+i];
      x[j*m+i] = x[ j*m + irow[i] ];
      x[ j*m + irow[i] ] = t;
    }
  }
  return (0);
}



void angsd::logrescale(double *ary,int len){
  int maxId = 0;
  for(int i=1;i<len;i++)
    if(ary[i]>ary[maxId])
      maxId=i;
  
  double maxVal = ary[maxId];
  for(int i=0;i<len;i++)
    ary[i] -= maxVal;
  
  
}

void print_array(FILE *fp,double *ary,int len){
  for(int i=0;i<len-1;i++)
    fprintf(fp,"%f,",ary[i]);
  fprintf(fp,"%f\n",ary[len-1]);
}

void print_array(FILE *fp,int *ary,int len){
  for(int i=0;i<len-1;i++)
    fprintf(fp,"%d,",ary[i]);
  fprintf(fp,"%d\n",ary[len-1]);
}


double angsd::sigm(double x){
  return(1/(1+exp(-x)));
}


double angsd::lbico(double n, double k){
  return lgamma(n+1)-lgamma(k+1)-lgamma(n-k+1);
}

double angsd::myComb2(int k,int r, int j){
  if(j>r)
    fprintf(stderr,"%s error in k=%d r=%d j=%d\n",__FUNCTION__,k,r,j);

  double fac1= lbico(r,j)+lbico(2*k-r,2-j);
  double fac2=lbico(2*k,2);
  
  return exp(fac1-fac2);
}



double *angsd::readDouble(const char*fname,int hint){
  FILE *fp = NULL;
  fp = aio::getFILE(fname,"r");
  char buf[aio::fsize(fname)+1];
  if(aio::fsize(fname)!=fread(buf,sizeof(char),aio::fsize(fname),fp)){
    fprintf(stderr,"Problems reading file: %s\n will exit\n",fname);
    exit(0);
  }
  buf[aio::fsize(fname)]='\0';
  std::vector<double> res;
  res.push_back(atof(strtok(buf,"\t\n ")));
  char *tok=NULL;
  while((tok=strtok(NULL,"\t\n "))) {  
    //fprintf(stderr,"%s\n",tok);
    res.push_back(atof(tok));

  }
  //  fprintf(stderr,"size of prior=%lu\n",res.size());
  if(hint!=res.size()){
    fprintf(stderr,"\t-> File: \'%s\' should contain %d values, but has %lu\n",fname,hint,res.size());
    fprintf(stderr,"\t-> If you are supplying an estimated sfs, make sure your input file is a single line (an estimate for a single region)\n");
    for(size_t i=0;i<res.size();i++)
      
      fprintf(stderr,"%zu=%f\n",i,res[i]);
    exit(0);
  }
  double *ret = new double[res.size()];
  for(size_t i=0;i<res.size();i++)
    ret[i] = res[i];
  if(fp) fclose(fp);
  return ret;
}

int angsd::whichMax(double *d,int len){
  int r=0;
  for(int i=1;i<len;i++)
    if(d[i]>d[r])
      r=i;
  //now check if site doesnt have data.
  
  if(r==0){//only check if nothing is higher than the first
    for(int i=1;i<len;i++)
      if(d[i]!=d[0])//we see a diffrence so we have information
	return r;
    return -1;//we didnt have information 
  }else
    return r;
}


//count is 5 long, A C G T N
int angsd::getRandomCount(suint *counts, int i,int depth){

  if(depth==-1){
    depth=0;
    for( int b = 0; b < 4; b++ )
      depth+=counts[b+4*i];
  }

  if(depth==0)
    return 4;

  int j = std::rand() % depth;
  int cumSum=0;
  int res=4;

  for( int b = 0; b < 4; b++ ){
    cumSum+=counts[b+4*i];
    if( cumSum > j ){
      res = b;
      break;
    }
  }
  return res;
}

// get the most frequent base, use random for tie
// depth is without N
// i is the individual
int angsd::getMaxCount(suint *counts,int i, int depth){

  if(depth==-1){
    depth=0;
    for( int b = 0; b < 4; b++ )
      depth+=counts[b+4*i];
  }

  if(depth<=0)
    return 4;

  int whichMax = 0;
  int nMax=1;  
  for(int b=1;b<4;b++){
    if (counts[b+4*i]>counts[whichMax+4*i]){
      whichMax = b;
      nMax = 1;
    }
    else if(counts[b+4*i]==counts[whichMax+4*i]){
      nMax++;
    }
  }

  if(nMax>1){ // in case of ties
    int j=0;
    int r = std::rand() % nMax;
    for(int b=1;b<4;b++){
      if(counts[b+4*i]==counts[whichMax+4*i]){
	if(r==j){
	  whichMax=b;
	  break;
	}
	j++;
      }
    }     
  }
  

  return whichMax;
}

// combine all bases to get IUPAC code
// depth is without N
// i is the individual
int angsd::getIupacCount(suint *counts,int i, double iRatio, int depth){

  if(depth==-1){
    depth=0;
    for( int b = 0; b < 4; b++ )
      depth+=counts[b+4*i];
  }

  if(depth<=0)
    return 14;

  int whichIUPAC = 0;
  double bIUPACscore = 0;
  for(int b=0;b<4;b++){
    if (double(counts[b+4*i])/double(depth)>iRatio){
      bIUPACscore = bIUPACscore + pow(b+1,2);
    }
  }
  //N
  if(bIUPACscore == 0){
    whichIUPAC = 14;
  }
  //A
  if(bIUPACscore == 1){
    whichIUPAC = 0;
  }
  //C
  if(bIUPACscore == 4){
    whichIUPAC = 1;
  }
  //G
  if(bIUPACscore == 9){
    whichIUPAC = 2;
  }
  //T
  if(bIUPACscore == 16){
    whichIUPAC = 3;
  }
  //A+G
  if(bIUPACscore == 10){
    whichIUPAC = 4;
  }
  //C+T
  if(bIUPACscore == 20){
    whichIUPAC = 5;
  }
  //G+C
  if(bIUPACscore == 13){
    whichIUPAC = 6;
  }
  //A+T
  if(bIUPACscore == 17){
    whichIUPAC = 7;
  }
  //G+T
  if(bIUPACscore == 25){
    whichIUPAC = 8;
  }
  //A+C
  if(bIUPACscore == 5){
    whichIUPAC = 9;
  }
  //C+G+T
  if(bIUPACscore == 29){
    whichIUPAC = 10;
  }
  //A+G+T
  if(bIUPACscore == 26){
    whichIUPAC = 11;
  }
  //A+C+T
  if(bIUPACscore == 21){
    whichIUPAC = 12;
  }
  //A+C+G
  if(bIUPACscore == 14){
    whichIUPAC = 13;
  }
  //A+C+G+T
  if(bIUPACscore == 30){
    whichIUPAC = 14;
  }
  return whichIUPAC;
}

//count is 4 long, A C G T
int angsd::getRandomCountTotal(suint *counts, int nInd){

  size_t totalCounts[4]={0,0,0,0};
  for(int i=0;i<4*nInd;i++)
    totalCounts[i%4] +=counts[i];   
  

  size_t depth=0;
  for( int b = 0; b < 4; b++ )
    depth+=totalCounts[b];
  

  if(depth==0)
    return 4;

  size_t j = std::rand() % depth;
  size_t cumSum=0;
  int res=4;

  for( int b = 0; b < 4; b++ ){
    cumSum+=totalCounts[b];
    if( cumSum > j ){
      res = b;
      break;
    }
  }
  return res;
}

// get the most frequent base, use random for tie
// depth is without N
// i is the individual
int angsd::getMaxCountTotal(suint *counts,int nInd){

  size_t totalCounts[4]={0,0,0,0};
  for(int i=0;i<4*nInd;i++)
    totalCounts[i%4] +=counts[i];   
  

  size_t depth=0;
  for( int b = 0; b < 4; b++ )
    depth+=totalCounts[b];
  

  if(depth==0)
    return 4;

  int whichMax = 0;
  int nMax=1;  
  for(int b=1;b<4;b++){
    if ( totalCounts[b] > totalCounts[whichMax] ){
      whichMax = b;
      nMax = 1;
    }
    else if( totalCounts[b] == totalCounts[whichMax] ){
      nMax++;
    }
  }

  if(nMax>1){ // in case of ties
    int j=0;
    int r = std::rand() % nMax;
     for(int b=1;b<4;b++){
       if( totalCounts[b] == totalCounts[whichMax] ){
	 if(r==j){
	   whichMax=b;
	   break;
	 }
	 j++;
       }

     }
      
  }


  return whichMax;
}

// combine all bases to get IUPAC code
// depth is without N
// i is the individual
int angsd::getIupacCountTotal(suint *counts,int nInd, double iRatio){

  size_t totalCounts[4]={0,0,0,0};
  for(int i=0;i<4*nInd;i++)
    totalCounts[i%4] +=counts[i];   
  

  size_t depth=0;
  for( int b = 0; b < 4; b++ )
    depth+=totalCounts[b];
  

  if(depth==0)
    return 14;

  int whichIUPAC = 0;
  double bIUPACscore = 0;
  for(int b=0;b<4;b++){
    if (double(totalCounts[b])/double(depth)>iRatio){
      bIUPACscore = bIUPACscore + pow(b+1,2);
    }
  }
  //N
  if(bIUPACscore == 0){
    whichIUPAC = 14;
  }
  //A
  if(bIUPACscore == 1){
    whichIUPAC = 0;
  }
  //C
  if(bIUPACscore == 4){
    whichIUPAC = 1;
  }
  //G
  if(bIUPACscore == 9){
    whichIUPAC = 2;
  }
  //T
  if(bIUPACscore == 16){
    whichIUPAC = 3;
  }
  //A+G
  if(bIUPACscore == 10){
    whichIUPAC = 4;
  }
  //C+T
  if(bIUPACscore == 20){
    whichIUPAC = 5;
  }
  //G+C
  if(bIUPACscore == 13){
    whichIUPAC = 6;
  }
  //A+T
  if(bIUPACscore == 17){
    whichIUPAC = 7;
  }
  //G+T
  if(bIUPACscore == 25){
    whichIUPAC = 8;
  }
  //A+C
  if(bIUPACscore == 5){
    whichIUPAC = 9;
  }
  //C+G+T
  if(bIUPACscore == 29){
    whichIUPAC = 10;
  }
  //A+G+T
  if(bIUPACscore == 26){
    whichIUPAC = 11;
  }
  //A+C+T
  if(bIUPACscore == 21){
    whichIUPAC = 12;
  }
  //A+C+G
  if(bIUPACscore == 14){
    whichIUPAC = 13;
  }
  //A+C+G+T
  if(bIUPACscore == 30){
    whichIUPAC = 14;
  }
  return whichIUPAC;
}




int ludcmp(double **a, int *indx, double &d,int n)
{
  int imax = 0;
  double big, dum, sum, temp;
  double vv[n];
  d=1;

  for (int i=0; i<n; i++){
    big=0;
    for (int j=0; j<n; j++){
      //fprintf(stderr,"%f\t",a[i][j]);
      if ((temp=fabs(a[i][j])) > big) 
	big=temp;
    }
    if(big==0){
      //fprintf(stderr,"singular matrix in ludcmp");
      return(1);
	//    assert(big!=0) ;

    }
    vv[i]=1/big;
  }

  for (int j=0; j<n; j++){
    for (int i=0; i<j; i++){
      sum = a[i][j];
      for (int k=0; k<i; k++) 
	sum -= a[i][k] * a[k][j];
      a[i][j]=sum;
    }
    big=0;
    for (int i=j; i<n; i++)	{
      sum=a[i][j];
      for (int k=0; k<j; k++)
	sum -= a[i][k] * a[k][j];
      a[i][j]=sum;
      if ((dum=vv[i]*fabs(sum)) >= big) {
	big = dum;
	imax = i;
      }
    }
    if (j != imax){
      for (int k=0; k<n; k++){
	dum=a[imax][k];
	a[imax][k]=a[j][k];
	a[j][k]=dum;
      }
      d = -d;
      vv[imax]=vv[j];
    }
    indx[j]=imax;
    if (a[j][j] == 0) 
      a[j][j] = 1.0e-20;
    if (j != n-1){
      dum = 1/(a[j][j]);
      for (int i=j+1; i<n; i++) 
	a[i][j] *= dum;
    }
  }
  return 0;
}


void lubksb(double **a, int *indx, double *b,int n)
{

  int ii=0;
  double sum;

  for (int i=0; i<n; i++){
    int ip=indx[i];
    sum=b[ip];
    b[ip]=b[i];
    if (ii != 0)
      for (int j=ii-1; j<i; j++) 
	sum -= a[i][j]*b[j];
    else if (sum != 0.0) 
      ii=i+1;
    b[i]=sum;
  }
  for (int i=n-1; i>=0; i--){
    sum=b[i];
    for (int j=i+1; j<n; j++) 
      sum -= a[i][j]*b[j];
    b[i]=sum/a[i][i];
  }
}

//usefull little function to split
char *angsd::strpop(char **str,char split){
  char *tok=*str;
  while(**str){
    if(**str!=split)
      (*str)++;
    else{
      **str='\0'; (*str)++;
      break;
    }
  }
  return tok;
}




int angsd::svd_inverse(double mat[],int xLen, int yLen){
  if(xLen !=yLen){

    fprintf(stderr,"non square matrix [%s]\t[%s]\n",__FILE__,__FUNCTION__);
    exit(0);

  }
  double *col;
  double y[xLen * yLen];
  col = new double[xLen];
  double **tm;
  int *indx=new int[xLen];
  double d;
  tm = new double*[xLen];
  for (int i=0; i < xLen; i++)
    tm[i] = new double[xLen];

  for(int i=0;i<xLen;i++)
    for(int j=0;j<yLen;j++)
      tm[i][j]=mat[j*xLen+i];


  int singular=ludcmp(tm,indx,d,xLen);
  if(singular)
    return 1 ;
  
  for (int j=0; j<xLen; j++)
    {
      for (int i=0; i<xLen; i++)
	col[i]=0;
      col[j]=1;
      lubksb(tm,indx,col,xLen);
      for (int i=0; i<xLen; i++) 
	y[j*xLen+i]=col[i];
    }
  
  
  for (int j=0; j<yLen; j++)
    for (int i=0; i<xLen; i++)
      mat[j*xLen+i]=y[j*xLen+i];

  delete[] col;
  delete[] indx;
  for (int i=0; i < xLen; i++)
    delete[] tm[i];
  delete[] tm;
  return 0;
}



//function for getting density of normal distribution, has safeguards against underflow
//by emil added 24-11-2018
double angsd::dnorm(double x,double mean,double sd,int ifLog){

  double fac = 1.0/(sd*sqrt(2.0*M_PI));
  double val = exp(-(((x-mean)*(x-mean))/(2*sd*sd)));

  double lower_bound=1e-20;//emil - not for users

  if(ifLog){    
    if(val<lower_bound){
      return(log(lower_bound));
    } else{
      return (log(fac)+log(val));
    }    
  } else{
    // if val is 0 because exp(-(x-mean)*(x-mean)) is due to underflow, returns low value
    if(val<lower_bound){      
      return(lower_bound);
    } else{
      return fac*val;
    }
  }
  
}

//function for getting probability of bernoulli distribution, has safeguards against underflow
//by emil added 24-11-2018
double angsd::bernoulli(int k, double p, int ifLog){
  // if p is 0 or 1, cannot do log
  // however this because of over/underlow and p i just very close 0 or 1
  double lower_bound=1e-20;//emil - not for users
  
  if(p>1-lower_bound){
    p = 1-lower_bound;
  } else if(p<lower_bound){
    p = lower_bound;
  }
  
  if(ifLog){
    return( log(pow(p,k)*pow(1-p,1-k)) );
  } else{
    return( pow(p,k)*pow(1-p,1-k) );
  }
}



// function for getting standard derivation of a set of data
//by emil added 24-11-2018
double angsd::sd(double* phe, int size ){
  double ts = 0;
  for(int i=0;i<size;i++)
    ts += phe[i];
  double u = ts/(1.0*size);
  ts = 0;
  for(int i=0;i<size;i++)
    ts += (phe[i]-u)*(phe[i]-u);
  return ts/(1.0*(size-1.0));
}

double angsd::to_pval(Chisqdist *chisq,double f){
  return f<0?1:1-chisq->cdf(f);
}

// function for getting density of lambda function
//by emil added 12-04-2019
// from: http://www.masaers.com/2013/10/08/Implementing-Poisson-pmf.html
double angsd::poisson(double k,  double lambda, int ifLog) {
  if(ifLog){
    return (k * log(lambda) - lgamma(k + 1.0) - lambda);
  } else{
    return exp(k * log(lambda) - lgamma(k + 1.0) - lambda);
  }
}




// a,c,g,t,n
// A,C,G,T,N
// 0,1,2,3,4
int refToInt[256] = {
  0,1,2,3,4,4,4,4,4,4,4,4,4,4,4,4,//15
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//31
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//47
  0,1,2,3,4,4,4,4,4,4,4,4,4,4,4,4,//63
  4,0,4,1,4,4,4,2,4,4,4,4,4,4,4,4,//79
  4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4,//95
  4,0,4,1,4,4,4,2,4,4,4,4,4,4,4,4,//111
  4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4,//127
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//143
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//159
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//175
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//191
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//207
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//223
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//239
  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4//255
};

char intToRef[5] = {'A','C','G','T','N'};

char intToIupac[15] = {'A','C','G','T','R','Y','S','W','K','M','B','D','H','V','N'};

// 
char refToChar[256] = {
    0,1,2,3,4,4,4,4,4,4,4,4,4,4,4,4,//15
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//31
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//47
    0,1,2,3,4,4,4,4,4,4,4,4,4,4,4,4,//63
    4,0,4,1,4,4,4,2,4,4,4,4,4,4,4,4,//79
    4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4,//95
    4,0,4,1,4,4,4,2,4,4,4,4,4,4,4,4,//111
    4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4,//127
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//143
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//159
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//175
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//191
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//207
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//223
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,//239
    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4//255
};

void angsd::norm(double *d,size_t len){
  double ts=0;
  for(int i=0;i<len;i++)
    ts += d[i];

  for(int i=0;i<len;i++)
    d[i] /= ts;

}


// em freqeuncy assuming HWE
double angsd::estFreq(double *loglike,int numInds){

  float W0;
  float W1;
  float W2;
  // fprintf(stderr,"start=%f\n",start);
  float p= 0.1;
  float temp_p=p;
  double accu=0.00001;
  double accu2=0;
  float sum;
  int iter=100;

  int it=0;
  
  for(it=0;it<iter;it++){
    sum=0;
    for(int i=0;i<numInds;i++){
     
      W0=exp(loglike[i*3+0])*pow(1-p,2);
      W1=exp(loglike[i*3+1])*2*p*(1-p);
      W2=exp(loglike[i*3+2])*(pow(p,2));
      sum+=(W1+2*W2)/(2*(W0+W1+W2));
      //  fprintf(stderr,"%f %f %f\n",W0,W1,W2);
      if(0&&std::isnan(sum)){
	//fprintf(stderr,"PRE[%d]: W %f\t%f\t%f sum=%f\n",i,W0,W1,W2,sum);
	exit(0);
      }
    }

    p=sum/numInds;
    // fprintf(stderr,"it=%d\tp=%f\tsum=%f\tkeepInd=%d\n",it,p,log(sum),keepInd);
    if((p-temp_p<accu&&temp_p-p<accu)||(p/temp_p<1+accu2&&p/temp_p>1-accu2))
      break;
    temp_p=p;
  }

  if(std::isnan(p)){
    fprintf(stderr,"[%s] caught nan will not exit\n",__FUNCTION__);
    fprintf(stderr,"logLike (3*nInd). nInd=%d\n",numInds);
    //print_array(stderr,loglike,3*numInds);
    fprintf(stderr,"keepList (nInd)\n");
    //print_array(stderr,keep,numInds);
    fprintf(stderr,"used logLike (3*length(keep))=%d\n",numInds);

    for(int ii=0;1&&ii<numInds;ii++){
    
      fprintf(stderr,"1\t");
      for(int gg=0;gg<3;gg++)
	fprintf(stderr,"%f\t",loglike[ii*3+gg]);
      fprintf(stderr,"\n");
    }
    sum=0;
    for(int i=0;i<numInds;i++){
     
      W0=exp(loglike[i*3+0])*pow(1-p,2);
      W1=exp(loglike[i*3+1])*2*p*(1-p);
      W2=exp(loglike[i*3+2])*(pow(p,2));
      sum+=(W1+2*W2)/(2*(W0+W1+W2));
      fprintf(stderr,"p=%f W %f\t%f\t%f sum=%f loglike: %f\n",p,W0,W1,W2,sum,exp(loglike[i*3+2])*pow(1-p,2));
    }
    p=-999;
    //exit(0);
  }
  
  return(p);
}

int countsSample(suint *a){
  double r = drand48()*(a[0]+a[1]+a[2]+a[3]);
  if(r<a[0])
    return 0;
  else if(r>=a[0] &&r<a[1])
    return 1;
  else if(r>=a[1] &&r<a[2])
    return 2;
  else
    return 3;


}








//public domain from here http://www.johndcook.com/cpp_phi.html
double phi(double x){
    // constants
    double a1 =  0.254829592;
    double a2 = -0.284496736;
    double a3 =  1.421413741;
    double a4 = -1.453152027;
    double a5 =  1.061405429;
    double p  =  0.3275911;

    // Save the sign of x
    int sign = 1;
    if (x < 0)
        sign = -1;
    x = fabs(x)/sqrt(2.0);

    // A&S formula 7.1.26
    double t = 1.0/(1.0 + p*x);
    double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x);

    return 0.5*(1.0 + sign*y);
}
