/**********************************************************************
 *
 *  lmEQTL.c
 *
 *  Created by Wei Sun on 2/21/06.
 *  
 *  as I expected, this c program is much faster than the R version
 *  one job need about 5 minutes in c, but 150 minutes in R
 *  
 *  to my surprise
 *  lmEQTL_backward is not really much faster than lmEQTL_complete
 *  200 genes in adipose: 302 secs vs. 256 secs
 *  200 genes in brain:   259 secs vs. 218 secs
 *  200 genes in liver:   324 secs vs. 275 secs
 *  200 genes in muscle:  328 secs vs. 275 secs
 *
 * these computation time is recorded in my (poor) laptop, MacBook Pro 15"
 * 2GHz Intel Core Duo 1.66G RAM
 *
 **********************************************************************/

#include <stdio.h>
#include <stddef.h>
#include <time.h>
#include <string.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_multifit.h>
#include <R.h>
#include "utility.h"


/**********************************************************************
 * lmEQTL_complete
 *
 * use linear model including sex, additive effect, dominant effect 
 * and possible interactions to find eQTL. Reduce the result so that 
 *
 * "complete" means to evaluate all the following 6 models 
 *
 * coding sex by 0 and 1, coding marker by -1, 0, and 1
 *
 * model 0: g ~ sex
 * model 1: g ~ sex + add
 * model 2: g ~ sex + add + sex:add
 * model 3: g ~ sex + add + dom
 * model 4: g ~ sex + add + dom + sex:add
 * model 5: g ~ sex + add + dom + sex:add + sex:dom
 *
 **********************************************************************/

int lmEQTL_complete (int* dims, double* ve, double* vm, int* ve_na, int* vm_na, 
double* sex, char** output, double* RP_cut, int* add_only, int* detail, 
double* Rtol, double* Rnna_percent, int* succeed) {

  int i, j, k, c, which;
  // chisq (residual sum square)
  double chisq, chisq0;
  // rank of model matrix for different models
  size_t rank;
  // LR statistics, p-values
  double LR[6];
  double LR_P[6];
  // t-statistics
  double t_stat;
  // minimum p-values of the 5 models
  double P_min;
  // p-value frequencies
  double tmp;
  unsigned long freqs[101];
  for(i=0; i<=100; i++){
    freqs[i] = 0;
  }
  
  const int nrow_e = dims[0];
  const int nrow_m = dims[1];
  const int ncol   = dims[2];
  const double P_cut = RP_cut[0];
  const double tol = Rtol[0];
  const double nna_percent = Rnna_percent[0];
  
  Rprintf("\n------------------------------------------------------\n");
  Rprintf("dims=(%d, %d, %d)\t", nrow_e, nrow_m, ncol);
  Rprintf("P_cut=%e\n", P_cut);

  gsl_matrix * me = gsl_matrix_alloc (nrow_e, ncol); // gene expression matrix
  gsl_matrix * mm = gsl_matrix_alloc (nrow_m, ncol); // marker genotype matrix
  gsl_matrix_int * ie = gsl_matrix_int_alloc (nrow_e, ncol); // missing index
  gsl_matrix_int * im = gsl_matrix_int_alloc (nrow_m, ncol); // missing index
  
  gsl_vector * vs = gsl_vector_alloc (ncol); // vector of sex data
  gsl_vector * gd = gsl_vector_alloc (ncol); // gene expression data
  gsl_vector * sd = gsl_vector_alloc (ncol); // sex data
  gsl_vector * md = gsl_vector_alloc (ncol); // marker genotype data
  gsl_vector * md_dom = gsl_vector_alloc (ncol); // marker genotype data
  gsl_vector_int * gi = gsl_vector_int_alloc (ncol); // missing value index
  gsl_vector_int * mi = gsl_vector_int_alloc (ncol); // missing value index
  
  // intercept to be used in model matrix
  gsl_vector * intercept = gsl_vector_alloc (ncol); 
  gsl_vector_set_all(intercept, 1.0);

  // the model matrix
  gsl_matrix * X = gsl_matrix_alloc (ncol, 6);
  gsl_matrix_view Xa;

  // reorganize the data into gsl_vector/matrix structure
  for(j=0; j<ncol; j++){
    for(i=0; i<nrow_e; i++){
      gsl_matrix_set(me, i, j, ve[j*nrow_e+i]);
      gsl_matrix_int_set(ie, i, j, ve_na[j*nrow_e+i]);
    }
  }
  
  for(j=0; j<ncol; j++){
    for(i=0; i<nrow_m; i++){
      gsl_matrix_set(mm, i, j, vm[j*nrow_m+i]);
      gsl_matrix_int_set(im, i, j, vm_na[j*nrow_m+i]);
    }
  }

  for(j=0; j<ncol; j++){
    gsl_vector_set(vs, j, sex[j]);
  }
  

  /**
   * model 0: g ~ sex
   * model 1: g ~ sex + add
   * model 2: g ~ sex + add + sex:add
   * model 3: g ~ sex + add + dom
   * model 4: g ~ sex + add + dom + sex:add
   * model 5: g ~ sex + add + dom + sex:add + sex:dom
   **/

  // coefficients different models
  gsl_vector * c0 = gsl_vector_alloc (2);
  gsl_vector * c0_p = gsl_vector_alloc (2);
  gsl_matrix * c0_cov = gsl_matrix_alloc (2, 2);
  gsl_vector * c1 = gsl_vector_alloc (3);
  gsl_vector * c1_p = gsl_vector_alloc (3);
  gsl_matrix * c1_cov = gsl_matrix_alloc (3, 3);
  gsl_vector * c2 = gsl_vector_alloc (4);
  gsl_vector * c2_p = gsl_vector_alloc (4);
  gsl_matrix * c2_cov = gsl_matrix_alloc (4, 4);
  gsl_vector * c3 = gsl_vector_alloc (4);
  gsl_vector * c3_p = gsl_vector_alloc (4);
  gsl_matrix * c3_cov = gsl_matrix_alloc (4, 4);
  gsl_vector * c4 = gsl_vector_alloc (5);
  gsl_vector * c4_p = gsl_vector_alloc (5);
  gsl_matrix * c4_cov = gsl_matrix_alloc (5, 5);
  gsl_vector * c5 = gsl_vector_alloc (6);
  gsl_vector * c5_p = gsl_vector_alloc (6);
  gsl_matrix * c5_cov = gsl_matrix_alloc (6, 6);
        
  // work space for linear model fitting
  gsl_multifit_linear_workspace * work;

  // output file handles
  FILE * fo, *ff;
    
  // time records
  time_t sec_s;
  time_t sec_e;
  time_t sec_st;
  time_t sec_et;

  sec_s  = time(NULL);
  sec_st = time(NULL);
  
  /*
  Rprintf("matrix of gene expression[1, 1:8]\n");
  Rprint_matrix(me, 0, 0, 0, 7, 1);
  Rprintf("matrix of markers[1, 1:8]\n");
  Rprint_matrix_i(mm, 0, 0, 0, 7, 1);
  Rprintf("vector of sex\n");
  Rprint_vector_i(vs, 0, 7);
  */
    
  sec_et = time(NULL);
  Rprintf("time spent on reading data is %ld secs\n", sec_et-sec_st);

  fo  = fopen (output[0], "w");
 /**
  * coding sex by 0 and 1, coding marker by -1, 0, and 1
  *
  * model 0: g ~ sex
  * model 1: g ~ sex + add
  * model 2: g ~ sex + add + sex:add
  * model 3: g ~ sex + add + dom
  * model 4: g ~ sex + add + dom + sex:add
  * model 5: g ~ sex + add + dom + sex:add + sex:dom
  *
  */

  fprintf(fo, "Gene_ID\tMarker_ID\tN\tMk");
  fprintf(fo, "\tLR_Mk_M0\tLR_Mk_M0_P");
  fprintf(fo, "\tLR_M1_M0\tLR_M1_M0_P");
  fprintf(fo, "\tLR_M2_M0\tLR_M2_M0_P");

  if(*add_only == 0){
    fprintf(fo, "\tLR_M3_M0\tLR_M3_M0_P");
    fprintf(fo, "\tLR_M4_M0\tLR_M4_M0_P");
    fprintf(fo, "\tLR_M5_M0\tLR_M5_M0_P");
  }
    
  if(*detail == 1){
    
    fprintf(fo, "\tM0_Int\tM0_Int_P\tM0_Sex\tM0_Sex_P");

    fprintf(fo, "\tM1_Int\tM1_Int_P\tM1_Sex\tM1_Sex_P");
    fprintf(fo, "\tM1_Add\tM1_Add_P");

    fprintf(fo, "\tM2_Int\tM2_Int_P\tM2_Sex\tM2_Sex_P");
    fprintf(fo, "\tM2_Add\tM2_Add_P\tM2_SAdd\tM2_SAdd_P");
    
    if(*add_only == 0){
      fprintf(fo, "\tM3_Int\tM3_Int_P\tM3_Sex\tM3_Sex_P");
      fprintf(fo, "\tM3_Add\tM3_Add_P\tM3_Dom\tM3_Dom_P");
  
      fprintf(fo, "\tM4_Int\tM4_Int_P\tM4_Sex\tM4_Sex_P");
      fprintf(fo, "\tM4_Add\tM4_Add_P\tM4_Dom\tM4_Dom_P");
      fprintf(fo, "\tM4_SAdd\tM4_SAdd_P");
  
      fprintf(fo, "\tM5_Int\tM5_Int_P\tM5_Sex\tM5_Sex_P");
      fprintf(fo, "\tM5_Add\tM5_Add_P\tM5_Dom\tM5_Dom_P");
      fprintf(fo, "\tM5_SAdd\tM5_SAdd_P\tM5_SDom\tM5_SDom_P");
    }
  }
  fprintf(fo, "\n");

  for(i=0; i<nrow_e; i++){ // the (i+1)-th gene expression profile
  
    if(i%1000 == 0){ Rprintf("finish %d-th gene expression profile\n",i); }
    gsl_vector_view gd_v = gsl_matrix_row (me, i);
    gsl_matrix_int_get_row (gi, ie, i);
    
    // check whether there is enough observations for gi
    c = 0;
    for(k = 0; k<ncol; k++){
      if(gsl_vector_int_get(gi,k) == 1){ c++; }
    }
    
    if(c < ncol * nna_percent){ continue; }
        
    for(j=0; j<nrow_m; j++){ // the (j+1)-th marker genotype profile

      gsl_vector_view md_v = gsl_matrix_row (mm, j);
      gsl_matrix_int_get_row (mi, im, j);
      gsl_vector_set_all (md_dom, -1.0);
            
      // after multiplication, mi is the index whether there is 
      // missing value, either gene expression or genotype
      gsl_vector_int_mul (mi, gi);
      
      c = 0;
      for(k = 0; k<ncol; k++){
        if(gsl_vector_int_get(mi,k) == 1){
          gsl_vector_set(gd, c, gsl_vector_get(&gd_v.vector,k));
          gsl_vector_set(md, c, gsl_vector_get(&md_v.vector,k));
          if(gsl_vector_get(md, c) == 0.0){
            gsl_vector_set(md_dom, c, 1.0);
          }
          // make sure if there is missing value in gd or md
          // the corresponding item in vector sex is not used
          gsl_vector_set(sd, c, gsl_vector_get(vs,k));
          c++;
        }
      }

      // if there is not enough observations left, exit
      if(c < ncol * nna_percent){ continue; }
                  
      // generate the model matrix
      // g = intercept + sex + m.add + m.dom + sex*m.add + sex*m.dom
      gsl_matrix_set_col(X, 0, intercept);
      gsl_matrix_set_col(X, 1, sd);
      gsl_matrix_set_col(X, 2, md);
      gsl_matrix_set_col(X, 3, md_dom);
      gsl_vector_mul(md, sd);
      gsl_matrix_set_col(X, 4, md);
      gsl_vector_mul(md_dom, sd);
      gsl_matrix_set_col(X, 5, md_dom);
      
      // Rprint_matrix_i(X, 0, ncol-1, 0, 5, 0);
      // reset LR, LR_P
      for(k=0; k<=5; k++){
        LR[k] = -1.0;
        LR_P[k] = 9.0;
      }

      // vector include non-missing value
      gsl_vector_view gd_a = gsl_vector_subvector(gd, 0, c);
      
      /*
       * model 0: g ~ sex
       * model 1: g ~ sex + add
       * model 2: g ~ sex + add + sex:add
       * model 3: g ~ sex + add + dom
       * model 4: g ~ sex + add + dom + sex:add
       * model 5: g ~ sex + add + dom + sex:add + sex:dom
       */

      // model 0: g ~ sex
      Xa  = gsl_matrix_submatrix(X, 0, 0, c, 2);
      work = gsl_multifit_linear_alloc (c,2);
      gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                &rank, c0, c0_cov, &chisq0, work);
      gsl_multifit_linear_free(work);

      if(rank < 2){ continue; }

      // model 1: g ~ sex + add
      Xa  = gsl_matrix_submatrix(X, 0, 0, c, 3);
      work = gsl_multifit_linear_alloc (c,3);
      gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                &rank, c1, c1_cov, &chisq, work);
      gsl_multifit_linear_free(work);
      
      if(rank < 3){ 
        continue; 
      }else{
        LR[1] = c*log(chisq0/chisq);
        LR_P[1] = gsl_cdf_chisq_Q(LR[1], 1);
      }
      
      // model 2: g ~ sex + add + sex:add
      gsl_matrix_swap_columns(X, 3, 4);
      Xa  = gsl_matrix_submatrix(X, 0, 0, c, 4);
      work = gsl_multifit_linear_alloc (c,4);
      gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                &rank, c2, c2_cov, &chisq, work);
      gsl_multifit_linear_free(work);

      if(rank == 4){
        LR[2] = c*log(chisq0/chisq);
        LR_P[2] = gsl_cdf_chisq_Q(LR[2], 2);
      }

      if(*add_only == 0){
        // model 3: g ~ sex + add + dom
        gsl_matrix_swap_columns(X, 3, 4);
              Xa  = gsl_matrix_submatrix(X, 0, 0, c, 4);
        work = gsl_multifit_linear_alloc (c,4);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                  &rank, c3, c3_cov, &chisq, work);
        gsl_multifit_linear_free(work);
        
        if(rank == 4){
          LR[3] = c*log(chisq0/chisq);
          LR_P[3] = gsl_cdf_chisq_Q(LR[3], 2);
        }
  
        // model 4: g ~ sex + add + dom + sex:add
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 5);
        work = gsl_multifit_linear_alloc (c,5);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                  &rank, c4, c4_cov, &chisq, work);
        gsl_multifit_linear_free(work);
        
        if(rank == 5){
          LR[4] = c*log(chisq0/chisq);
          LR_P[4] = gsl_cdf_chisq_Q(LR[4], 3);
        }
  
        // model 5: g ~ sex + add + dom + sex*add + sex*dom
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 6);
        work = gsl_multifit_linear_alloc (c,6);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                  &rank, c5, c5_cov, &chisq, work);
        gsl_multifit_linear_free(work);
        
        if(rank == 6){
          LR[5] = c*log(chisq0/chisq);
          LR_P[5] = gsl_cdf_chisq_Q(LR[5], 4);
        }
      }
          
      // if the minimum p-value is smaller than the cutoff
      if(*add_only==0){
        P_min = min(LR_P, 1, 5, &which);
      }else{
        P_min = min(LR_P, 1, 2, &which);
      }
      k = (int) (P_min / 0.01);
      freqs[k] += 1;
            
      if(P_min < P_cut){
        freqs[100] += 1;
      }else{
        continue;
      }
            
      if(*detail==1){
        for(k=0; k<2; k++){
          t_stat = gsl_vector_get(c0,k)/sqrt(gsl_matrix_get(c0_cov,k,k));
          gsl_vector_set(c0_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-2));
        }
        
        for(k=0; k<3; k++){
          t_stat = gsl_vector_get(c1,k)/sqrt(gsl_matrix_get(c1_cov,k,k));
          gsl_vector_set(c1_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-3));
        }
        
        for(k=0; k<4; k++){
          t_stat = gsl_vector_get(c2,k)/sqrt(gsl_matrix_get(c2_cov,k,k));
          gsl_vector_set(c2_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-4));
        }
        
        if(*add_only == 0){
          for(k=0; k<4; k++){
            t_stat = gsl_vector_get(c3,k)/sqrt(gsl_matrix_get(c3_cov,k,k));
            gsl_vector_set(c3_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-4));
          }
  
          for(k=0; k<5; k++){
            t_stat = gsl_vector_get(c4,k)/sqrt(gsl_matrix_get(c4_cov,k,k));
            gsl_vector_set(c4_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-5));
          }
  
          for(k=0; k<6; k++){
            t_stat = gsl_vector_get(c5,k)/sqrt(gsl_matrix_get(c5_cov,k,k));
            gsl_vector_set(c5_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-6));
          }
        }
      }
      
      fprintf(fo, "%d\t%d\t%d\t%d", i+1, j+1, c, which);
      fprintf(fo, "\t%.3f\t%.2e", LR[which], LR_P[which]);

      fprintf(fo, "\t%.3f\t%.2e", LR[1], LR_P[1]);
      fprintf(fo, "\t%.3f\t%.2e", LR[2], LR_P[2]);

      if(*add_only==0){      
        fprintf(fo, "\t%.3f\t%.2e", LR[3], LR_P[3]);
        fprintf(fo, "\t%.3f\t%.2e", LR[4], LR_P[4]);
        fprintf(fo, "\t%.3f\t%.2e", LR[5], LR_P[5]);
      }
      
      if(*detail == 1){
        for(k=0; k<2; k++){
          fprintf(fo, "\t%.3f\t%.2e", gsl_vector_get(c0, k), gsl_vector_get(c0_p,k));
        }

        for(k=0; k<3; k++){
          fprintf(fo, "\t%.3f\t%.2e", gsl_vector_get(c1, k), gsl_vector_get(c1_p,k));
        }
        
        for(k=0; k<4; k++){
          fprintf(fo, "\t%.3f\t%.2e", gsl_vector_get(c2, k), gsl_vector_get(c2_p,k));
        }
        
        if(*add_only==0){
          for(k=0; k<4; k++){
            fprintf(fo, "\t%.3f\t%.2e", gsl_vector_get(c3, k), gsl_vector_get(c3_p,k));
          }
          
          for(k=0; k<5; k++){
            fprintf(fo, "\t%.3f\t%.2e", gsl_vector_get(c4, k), gsl_vector_get(c4_p,k));
          }
          
          for(k=0; k<6; k++){
            fprintf(fo, "\t%.3f\t%.2e", gsl_vector_get(c5, k), gsl_vector_get(c5_p,k));
          }
        }
      }
      fprintf(fo, "\n");
    }
  }
  
  fclose(fo);
  
  // print out the frequencies
  ff  = fopen (output[1], "w");
  tmp = 0.0;
  
  fprintf(ff, "<%.2e", P_cut);
  for(i=0; i<100; i++){
    fprintf(ff, "\t%.2f-%.2f", tmp, tmp+0.01);
    tmp += 0.01;
  }
  fprintf(ff, "\n");
  
  fprintf(ff, "%lu", freqs[100]);
  for(i=0; i<100; i++){
    fprintf(ff, "\t%lu", freqs[i]);
  }
  fprintf(ff, "\n");
  
  fclose(ff);

  // finish up
  sec_e  = time(NULL);
  Rprintf("total time spent is %ld secs\n", sec_e-sec_s);
  Rprintf("------------------------------------------------------\n");

  gsl_matrix_free(me);
  gsl_matrix_free(mm);
  gsl_matrix_int_free(ie);
  gsl_matrix_int_free(im);
  gsl_vector_free(vs);
  gsl_vector_free(gd);
  gsl_vector_free(md);
  gsl_vector_free(md_dom);
  gsl_vector_int_free(gi);
  gsl_vector_int_free(mi);
  gsl_vector_free(sd);
  gsl_vector_free(c0);
  gsl_vector_free(c0_p);
  gsl_matrix_free(c0_cov);
  gsl_vector_free(c1);
  gsl_vector_free(c1_p);
  gsl_matrix_free(c1_cov);
  gsl_vector_free(c2);
  gsl_vector_free(c2_p);
  gsl_matrix_free(c2_cov);
  gsl_vector_free(c3);
  gsl_vector_free(c3_p);
  gsl_matrix_free(c3_cov);
  gsl_vector_free(c4);
  gsl_vector_free(c4_p);
  gsl_matrix_free(c4_cov);
  gsl_vector_free(c5);
  gsl_vector_free(c5_p);
  gsl_matrix_free(c5_cov);
  gsl_vector_free(intercept);
  gsl_matrix_free(X);
    
  *succeed = 1;
  return(1);
}

/**********************************************************************
 * lmEQTL_complete_byChr
 *
 * use linear model including sex, additive effect, dominant effect 
 * and possible interactions to find eQTL. Reduce the result so that 
 * for each gene, keep the most significant linkage in each chromosome
 *
 * "complete" means to evaluate all the following 6 models 
 *
 * coding sex by 0 and 1, coding marker by -1, 0, and 1
 *
 * model 0: g ~ sex
 * model 1: g ~ sex + add
 * model 2: g ~ sex + add + sex:add
 * model 3: g ~ sex + add + dom
 * model 4: g ~ sex + add + dom + sex:add
 * model 5: g ~ sex + add + dom + sex:add + sex:dom
 *
 **********************************************************************/

int lmEQTL_complete_byChr (int* dims, double* ve, double* vm, int* ve_na, 
int* vm_na, int* chr_freq, int* n_chr, double* sex, char** output, double* RP_cut, 
int* detail, double* Rtol, double* Rnna_percent, int* succeed) {

  int i, u, j, j0, j1, k, c, which;
  // chisq (residual sum square)
  double chisq, chisq0;
  // rank of model matrix for different models
  size_t rank;
  // LR statistics, p-values
  double LR[6];
  double LR_P[6];
  // t-statistics
  double t_stat;
  // minimum p-values of the 5 models
  double P_min;
  // minimum P_min for all the markers in one chromosome
  double P_chr;
  // p-value frequencies
  double tmp;
  unsigned long freqs[101];
  for(i=0; i<=100; i++){ freqs[i] = 0; }
  
  const int nrow_e = dims[0];
  const int nrow_m = dims[1];
  const int ncol   = dims[2];
  const double P_cut = RP_cut[0];
  const double tol = Rtol[0];
  const double nna_percent = Rnna_percent[0];
  
  Rprintf("\n------------------------------------------------------\n");
  Rprintf("dims=(%d, %d, %d)\t", nrow_e, nrow_m, ncol);
  Rprintf("P_cut=%e\n", P_cut);

  gsl_matrix * me = gsl_matrix_alloc (nrow_e, ncol); // gene expression matrix
  gsl_matrix * mm = gsl_matrix_alloc (nrow_m, ncol); // marker genotype matrix
  gsl_matrix_int * ie = gsl_matrix_int_alloc (nrow_e, ncol); // missing index
  gsl_matrix_int * im = gsl_matrix_int_alloc (nrow_m, ncol); // missing index
  gsl_vector * vs = gsl_vector_alloc (ncol); // vector of sex data
  
  gsl_vector * gd = gsl_vector_alloc (ncol); // gene expression data
  gsl_vector * sd = gsl_vector_alloc (ncol); // sex data
  gsl_vector * md = gsl_vector_alloc (ncol); // marker genotype data
  gsl_vector * md_dom = gsl_vector_alloc (ncol); // marker genotype data
  gsl_vector_int * gi = gsl_vector_int_alloc (ncol); // missing value index
  gsl_vector_int * mi = gsl_vector_int_alloc (ncol); // missing value index
  
  // intercept to be used in model matrix
  gsl_vector * intercept = gsl_vector_alloc (ncol); 
  gsl_vector_set_all(intercept, 1.0);

  // the model matrix
  gsl_matrix * X = gsl_matrix_alloc (ncol, 6);
  gsl_matrix_view Xa;

  // reorganize the data into gsl_vector/matrix structure
  for(j=0; j<ncol; j++){
    for(i=0; i<nrow_e; i++){
      gsl_matrix_set(me, i, j, ve[j*nrow_e+i]);
      gsl_matrix_int_set(ie, i, j, ve_na[j*nrow_e+i]);
    }
  }
  
  for(j=0; j<ncol; j++){
    for(i=0; i<nrow_m; i++){
      gsl_matrix_set(mm, i, j, vm[j*nrow_m+i]);
      gsl_matrix_int_set(im, i, j, vm_na[j*nrow_m+i]);
    }
  }

  for(j=0; j<ncol; j++){
    gsl_vector_set(vs, j, sex[j]);
  }
  
  /**
   * model 0: g ~ sex
   * model 1: g ~ sex + add
   * model 2: g ~ sex + add + sex:add
   * model 3: g ~ sex + add + dom
   * model 4: g ~ sex + add + dom + sex:add
   * model 5: g ~ sex + add + dom + sex:add + sex:dom
   **/

  // coefficients different models
  gsl_vector * c0 = gsl_vector_alloc (2);
  gsl_vector * c0_p = gsl_vector_alloc (2);
  gsl_matrix * c0_cov = gsl_matrix_alloc (2, 2);
  gsl_vector * c1 = gsl_vector_alloc (3);
  gsl_vector * c1_p = gsl_vector_alloc (3);
  gsl_matrix * c1_cov = gsl_matrix_alloc (3, 3);
  gsl_vector * c2 = gsl_vector_alloc (4);
  gsl_vector * c2_p = gsl_vector_alloc (4);
  gsl_matrix * c2_cov = gsl_matrix_alloc (4, 4);
  gsl_vector * c3 = gsl_vector_alloc (4);
  gsl_vector * c3_p = gsl_vector_alloc (4);
  gsl_matrix * c3_cov = gsl_matrix_alloc (4, 4);
  gsl_vector * c4 = gsl_vector_alloc (5);
  gsl_vector * c4_p = gsl_vector_alloc (5);
  gsl_matrix * c4_cov = gsl_matrix_alloc (5, 5);
  gsl_vector * c5 = gsl_vector_alloc (6);
  gsl_vector * c5_p = gsl_vector_alloc (6);
  gsl_matrix * c5_cov = gsl_matrix_alloc (6, 6);
        
  // work space for linear model fitting
  gsl_multifit_linear_workspace * work;

  // output file handles
  FILE * fo, *ff;
  char row[1024];
    
  // time records
  time_t sec_s;
  time_t sec_e;

  sec_s  = time(NULL);
  
  /*
  Rprintf("matrix of gene expression[1, 1:8]\n");
  Rprint_matrix(me, 0, 0, 0, 7, 1);
  Rprintf("matrix of markers[1, 1:8]\n");
  Rprint_matrix_i(mm, 0, 0, 0, 7, 1);
  Rprintf("vector of sex\n");
  Rprint_vector_i(vs, 0, 7);
  */
    
  Rprintf("Finish reading data\n");

  fo  = fopen (output[0], "w");
  
  /**
   * coding sex by 0 and 1, coding marker by -1, 0, and 1
   *
   * model 0: g ~ sex
   * model 1: g ~ sex + add
   * model 2: g ~ sex + add + sex:add
   * model 3: g ~ sex + add + dom
   * model 4: g ~ sex + add + dom + sex:add
   * model 5: g ~ sex + add + dom + sex:add + sex:dom
   *
   */

  fprintf(fo, "Gene_ID\tMarker_ID\tN\tMk\t");
  fprintf(fo, "LR_Mk_M0\tLR_Mk_M0_P\t");
  fprintf(fo, "LR_M1_M0\tLR_M1_M0_P\t");
  fprintf(fo, "LR_M2_M0\tLR_M2_M0_P\t");
  fprintf(fo, "LR_M3_M0\tLR_M3_M0_P\t");
  fprintf(fo, "LR_M4_M0\tLR_M4_M0_P\t");
  
  if(*detail == 0){
    fprintf(fo, "LR_M5_M0\tLR_M5_M0_P\n");
  }else{
    fprintf(fo, "LR_M5_M0\tLR_M5_M0_P\t");
  
    fprintf(fo, "M0_Int\tM0_Int_P\tM0_Sex\tM0_Sex_P\t");
    
    fprintf(fo, "M1_Int\tM1_Int_P\tM1_Sex\tM1_Sex_P\t");
    fprintf(fo, "M1_Add\tM1_Add_P\t");
    
    fprintf(fo, "M2_Int\tM2_Int_P\tM2_Sex\tM2_Sex_P\t");
    fprintf(fo, "M2_Add\tM2_Add_P\tM2_SAdd\tM2_SAdd_P\t");
    
    fprintf(fo, "M3_Int\tM3_Int_P\tM3_Sex\tM3_Sex_P\t");
    fprintf(fo, "M3_Add\tM3_Add_P\tM3_Dom\tM3_Dom_P\t");
    
    fprintf(fo, "M4_Int\tM4_Int_P\tM4_Sex\tM4_Sex_P\t");
    fprintf(fo, "M4_Add\tM4_Add_P\tM4_Dom\tM4_Dom_P\t");
    fprintf(fo, "M4_SAdd\tM4_SAdd_P\t");
    
    fprintf(fo, "M5_Int\tM5_Int_P\tM5_Sex\tM5_Sex_P\t");
    fprintf(fo, "M5_Add\tM5_Add_P\tM5_Dom\tM5_Dom_P\t");
    fprintf(fo, "M5_SAdd\tM5_SAdd_P\tM5_SDom\tM5_SDom_P\n");
  }
    
  for(i=0; i<nrow_e; i++){ // the (i+1)-th gene expression profile
    if(i%1000 == 0){ Rprintf("finish %d-th gene expression profile\n",i); }
    gsl_vector_view gd_v = gsl_matrix_row (me, i);
    gsl_matrix_int_get_row (gi, ie, i);
    
    // check whether there is enough observations for gi
    c = 0;
    for(k = 0; k<ncol; k++){
      if(gsl_vector_int_get(gi,k) == 1){ c++; }
    }
    if(c < ncol * nna_percent){ continue; }
    
    j0 = j1 = 0;
    for(u=0; u < *n_chr; u++){ // for each chromosome
      j0 = j1;
      j1 = j0 + chr_freq[u];
      P_chr = 1.0;
      
      for(j=j0; j<j1; j++){ // the (j+1)-th marker in chrom u+1
        gsl_vector_view md_v = gsl_matrix_row (mm, j);
        gsl_matrix_int_get_row (mi, im, j);
        gsl_vector_set_all (md_dom, -1.0);
        
        // after multiplication, mi is the index whether there is 
        // missing value, either gene expression or genotype
        gsl_vector_int_mul (mi, gi);
        
        c = 0;
        for(k = 0; k<ncol; k++){
          if(gsl_vector_int_get(mi,k) == 1){
            gsl_vector_set(gd, c, gsl_vector_get(&gd_v.vector,k));
            gsl_vector_set(md, c, gsl_vector_get(&md_v.vector,k));
            if(gsl_vector_get(md, c) == 0.0){
                gsl_vector_set(md_dom, c, 1.0);
            }
            // make sure if there is missing value in gd or md
            // the corresponding item in vector sex is not used
            gsl_vector_set(sd, c, gsl_vector_get(vs,k));
            c++;
          }
        }
    
        // if there is not enough observations left, exit
        if(c < ncol * nna_percent){ continue; }
                      
        // generate the model matrix
        // g = intercept + sex + m.add + m.dom + sex*m.add + sex*m.dom
        gsl_matrix_set_col(X, 0, intercept);
        gsl_matrix_set_col(X, 1, sd);
        gsl_matrix_set_col(X, 2, md);
        gsl_matrix_set_col(X, 3, md_dom);
        gsl_vector_mul(md, sd);
        gsl_matrix_set_col(X, 4, md);
        gsl_vector_mul(md_dom, sd);
        gsl_matrix_set_col(X, 5, md_dom);

        // Rprint_matrix_i(X, 0, ncol-1, 0, 5, 0);
        
        // reset LR, LR_P
        for(k=0; k<=5; k++){
          LR[k] = -1.0;
          LR_P[k] = 9.0;
        }

        // vector include no non-missing value
        gsl_vector_view gd_a = gsl_vector_subvector(gd, 0, c);
        
        // model 0: g ~ sex
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 2);
        work = gsl_multifit_linear_alloc (c,2);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            &rank, c0, c0_cov, &chisq0, work);
        gsl_multifit_linear_free(work);

        if(rank < 2){ continue; }

        // model 1: g ~ sex + add
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 3);
        work = gsl_multifit_linear_alloc (c,3);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            &rank, c1, c1_cov, &chisq, work);
        gsl_multifit_linear_free(work);
        
        if(rank < 3){ 
          continue; 
        }else{
          LR[1] = c*log(chisq0/chisq);
          LR_P[1] = gsl_cdf_chisq_Q(LR[1], 1);
        }
        
        // model 2: g ~ sex + add + sex:add
        gsl_matrix_swap_columns(X, 3, 4);
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 4);
        work = gsl_multifit_linear_alloc (c,4);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            &rank, c2, c2_cov, &chisq, work);
        gsl_multifit_linear_free(work);

        if(rank == 4){
          LR[2] = c*log(chisq0/chisq);
          LR_P[2] = gsl_cdf_chisq_Q(LR[2], 2);
        }

        // model 3: g ~ sex + add + dom
        gsl_matrix_swap_columns(X, 3, 4);
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 4);
        work = gsl_multifit_linear_alloc (c,4);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            &rank, c3, c3_cov, &chisq, work);
        gsl_multifit_linear_free(work);
        
        if(rank == 4){
          LR[3] = c*log(chisq0/chisq);
          LR_P[3] = gsl_cdf_chisq_Q(LR[3], 2);
        }

        // model 4: g ~ sex + add + dom + sex:add
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 5);
        work = gsl_multifit_linear_alloc (c,5);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            &rank, c4, c4_cov, &chisq, work);
        gsl_multifit_linear_free(work);
        
        if(rank == 5){
          LR[4] = c*log(chisq0/chisq);
          LR_P[4] = gsl_cdf_chisq_Q(LR[4], 3);
        }

        // model 5: g ~ sex + add + dom + sex*add + sex*dom
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 6);
        work = gsl_multifit_linear_alloc (c,6);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            &rank, c5, c5_cov, &chisq, work);
        gsl_multifit_linear_free(work);
        
        if(rank == 6){
          LR[5] = c*log(chisq0/chisq);
          LR_P[5] = gsl_cdf_chisq_Q(LR[5], 4);
        }

        // if the minimum p-value is smaller than the cutoff
        P_min = min(LR_P, 1, 5, &which);
        if(P_chr > P_min){
          P_chr = P_min;
          if(P_chr >= P_cut){ continue; }
          
          /**
           * well it is kind of waste here to compute the t-stat 
           * because we only keep the most significant one for 
           * each chromosome, though not that much computation 
           * it is just the program looks long :)
           */
          if(*detail==1){
            for(k=0; k<2; k++){
              t_stat = gsl_vector_get(c0,k)/sqrt(gsl_matrix_get(c0_cov,k,k));
              gsl_vector_set(c0_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-2));
            }
            
            for(k=0; k<3; k++){
              t_stat = gsl_vector_get(c1,k)/sqrt(gsl_matrix_get(c1_cov,k,k));
              gsl_vector_set(c1_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-3));
            }
            
            for(k=0; k<4; k++){
              t_stat = gsl_vector_get(c2,k)/sqrt(gsl_matrix_get(c2_cov,k,k));
              gsl_vector_set(c2_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-4));
            }
            
            for(k=0; k<4; k++){
              t_stat = gsl_vector_get(c3,k)/sqrt(gsl_matrix_get(c3_cov,k,k));
              gsl_vector_set(c3_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-4));
            }

            for(k=0; k<5; k++){
              t_stat = gsl_vector_get(c4,k)/sqrt(gsl_matrix_get(c4_cov,k,k));
              gsl_vector_set(c4_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-5));
            }

            for(k=0; k<6; k++){
              t_stat = gsl_vector_get(c5,k)/sqrt(gsl_matrix_get(c5_cov,k,k));
              gsl_vector_set(c5_p,k,2*gsl_cdf_tdist_Q(fabs(t_stat),c-6));
            }
          }
          
          /**
           * store the result here in case this one is actually the most 
           * significant one in this chromosome, it will write out 
           */   
          sprintf(row, "%d\t%d\t%d\t%d\t", i+1, j+1, c, which);
          sprintf(row, "%s%.3f\t%.2e\t", row, LR[which], LR_P[which]);

          sprintf(row, "%s%.3f\t%.2e\t", row, LR[1], LR_P[1]);
          sprintf(row, "%s%.3f\t%.2e\t", row, LR[2], LR_P[2]);
          sprintf(row, "%s%.3f\t%.2e\t", row, LR[3], LR_P[3]);
          sprintf(row, "%s%.3f\t%.2e\t", row, LR[4], LR_P[4]);
          
          if(*detail == 0){
            sprintf(row, "%s%.3f\t%.2e\n", row, LR[5], LR_P[5]);
          }else{
            sprintf(row, "%s%.3f\t%.2e\t", row, LR[5], LR_P[5]);

            for(k=0; k<2; k++){
              sprintf(row, "%s%.3f\t%.2e\t", row, gsl_vector_get(c0, k), gsl_vector_get(c0_p,k));
            }

            for(k=0; k<3; k++){
              sprintf(row, "%s%.3f\t%.2e\t", row, gsl_vector_get(c1, k), gsl_vector_get(c1_p,k));
            }
            
            for(k=0; k<4; k++){
              sprintf(row, "%s%.3f\t%.2e\t", row, gsl_vector_get(c2, k), gsl_vector_get(c2_p,k));
            }
            
            for(k=0; k<4; k++){
              sprintf(row, "%s%.3f\t%.2e\t", row, gsl_vector_get(c3, k), gsl_vector_get(c3_p,k));
            }
            
            for(k=0; k<5; k++){
              sprintf(row, "%s%.3f\t%.2e\t", row, gsl_vector_get(c4, k), gsl_vector_get(c4_p,k));
            }
            
            for(k=0; k<5; k++){
              sprintf(row, "%s%.3f\t%.2e\t", row, gsl_vector_get(c5, k), gsl_vector_get(c5_p,k));
            }
            sprintf(row, "%s%.3f\t%.2e\n", row, gsl_vector_get(c5, k), gsl_vector_get(c5_p,k));
          }
                
        }
      } /* finish one chromosome for one gene */
        
      k = (int) (P_chr / 0.01);
      freqs[k] += 1;
        
      if(P_chr < P_cut){
        freqs[100] += 1;
        fputs(row, fo);
      }
    } /* finish all the chromosomes for one gene */
  } /* finish all the genes */
  
  fclose(fo);
  
  // print out the frequencies
  ff  = fopen (output[1], "w");
  tmp = 0.0;
  
  fprintf(ff, "<%.2e", P_cut);
  for(i=0; i<100; i++){
    fprintf(ff, "\t%.2f-%.2f", tmp, tmp+0.01);
    tmp += 0.01;
  }
  fprintf(ff, "\n");
  
  fprintf(ff, "%lu", freqs[100]);
  for(i=0; i<100; i++){
    fprintf(ff, "\t%lu", freqs[i]);
  }
  fprintf(ff, "\n");
  
  fclose(ff);

  // finish up
  sec_e  = time(NULL);
  Rprintf("total time spent is %ld secs\n", sec_e-sec_s);
  Rprintf("------------------------------------------------------\n");

  gsl_matrix_free(me);
  gsl_matrix_free(mm);
  gsl_matrix_int_free(ie);
  gsl_matrix_int_free(im);
  gsl_vector_free(vs);
  gsl_vector_free(gd);
  gsl_vector_free(md);
  gsl_vector_free(md_dom);
  gsl_vector_int_free(gi);
  gsl_vector_int_free(mi);
  gsl_vector_free(sd);
  gsl_vector_free(c0);
  gsl_vector_free(c0_p);
  gsl_matrix_free(c0_cov);
  gsl_vector_free(c1);
  gsl_vector_free(c1_p);
  gsl_matrix_free(c1_cov);
  gsl_vector_free(c2);
  gsl_vector_free(c2_p);
  gsl_matrix_free(c2_cov);
  gsl_vector_free(c3);
  gsl_vector_free(c3_p);
  gsl_matrix_free(c3_cov);
  gsl_vector_free(c4);
  gsl_vector_free(c4_p);
  gsl_matrix_free(c4_cov);
  gsl_vector_free(c5);
  gsl_vector_free(c5_p);
  gsl_matrix_free(c5_cov);
  gsl_vector_free(intercept);
  gsl_matrix_free(X);
    
  *succeed = 1;
  return(1);
}

/**********************************************************************
 * backward_1row
 * 
 * handle one row in backward model selection
 * update p-value frequencies and write out into file/string
 *
 **********************************************************************/

void backward_1row (FILE * fo, int g, int m, int n, int mk, 
unsigned long* freqs, double* lr, double* lr_p, double P_cut){
  int i, k;
  k = (int) (lr_p[5] / 0.01);
  freqs[k] += 1;
  
  if(lr_p[5] < P_cut){
    freqs[100] += 1;

    fprintf(fo, "%d\t%d\t%d\t", g, m, n);
    fprintf(fo, "%d\t", mk);
    for(i = 0; i<5; i++){
      fprintf(fo, "%.3f\t%.2e\t", lr[i], lr_p[i]);
    }
    fprintf(fo, "%.3f\t%.2e\n", lr[5], lr_p[5]);
  }
}

void sbackward_1row (char* row, int g, int m, int n, int mk, 
double* lr, double* lr_p, double P_cut){
  int i;
  
  if(lr_p[5] < P_cut){
    sprintf(row, "%d\t%d\t%d\t%d\t", g, m, n, mk);
    for(i=0; i<5; i++){
      sprintf(row, "%s%.3f\t%.2e\t", row, lr[i], lr_p[i]);
    }
    sprintf(row, "%s%.3f\t%.2e\n", row, lr[5], lr_p[5]);
  }
}

/**********************************************************************
 * lmEQTL_backward
 *
 * backward model selection
 *
 * coding sex by 0 and 1, coding marker by -1, 0, and 1
 *
 * model 0: g ~ sex
 * model 1: g ~ sex + add
 * model 2: g ~ sex + add + sex:add
 * model 3: g ~ sex + add + dom
 * model 4: g ~ sex + add + dom + sex:add
 * model 5: g ~ sex + add + dom + sex:add + sex:dom
 *
 **********************************************************************/

int lmEQTL_backward (int* dims, double* ve, double* vm, int* ve_na, int* vm_na, 
double* sex, char** output, double* RP_cut, double* RLRT_P_cut, double* Rtol, 
double* Rnna_percent, int* succeed) {

  int i, j, k, c;
  // chisq (residual sum square)
  double chisq[6];
  // rank of model matrix for different models
  size_t rank[6];
  // LR statistics, p-values
  double LR[6];
  double LR_P[6];
  // for record the histogram of p-values
  double tmp;
  unsigned long freqs[101];
  for(i=0; i<=100; i++){
     freqs[i] = 0;
  }
  const int nrow_e = dims[0];
  const int nrow_m = dims[1];
  const int ncol   = dims[2];
  const double P_cut = RP_cut[0];
  const double LRT_P_cut = RLRT_P_cut[0];
  const double tol = Rtol[0];
  const double nna_percent = Rnna_percent[0];
  
  Rprintf("\n------------------------------------------------------\n");
  Rprintf("dims=(%d, %d, %d)\t", nrow_e, nrow_m, ncol);
  Rprintf("P_cut=%e\n", P_cut);

  gsl_matrix * me = gsl_matrix_alloc (nrow_e, ncol); // gene expression matrix
  gsl_matrix * mm = gsl_matrix_alloc (nrow_m, ncol); // marker genotype matrix
  gsl_matrix_int * ie = gsl_matrix_int_alloc (nrow_e, ncol); // missing index
  gsl_matrix_int * im = gsl_matrix_int_alloc (nrow_m, ncol); // missing index
  
  gsl_vector * vs = gsl_vector_alloc (ncol); // vector of sex data
  gsl_vector * gd = gsl_vector_alloc (ncol); // gene expression data
  gsl_vector * sd = gsl_vector_alloc (ncol); // sex data
  gsl_vector * md = gsl_vector_alloc (ncol); // marker genotype data
  gsl_vector * md_dom = gsl_vector_alloc (ncol); // marker genotype data
  gsl_vector_int * gi = gsl_vector_int_alloc (ncol); // missing value index
  gsl_vector_int * mi = gsl_vector_int_alloc (ncol); // missing value index
  
  // intercept to be used in model matrix
  gsl_vector * intercept = gsl_vector_alloc (ncol); 
  gsl_vector_set_all(intercept, 1.0);

  // the model matrix
  gsl_matrix * X  = gsl_matrix_alloc (ncol, 6);
  gsl_matrix_view Xa;
  
  // reorganize the data into gsl_vector/matrix structure
  for(j=0; j<ncol; j++){
    for(i=0; i<nrow_e; i++){
      gsl_matrix_set(me, i, j, ve[j*nrow_e+i]);
      gsl_matrix_int_set(ie, i, j, ve_na[j*nrow_e+i]);
    }
  }
  
  for(j=0; j<ncol; j++){
    for(i=0; i<nrow_m; i++){
      gsl_matrix_set(mm, i, j, vm[j*nrow_m+i]);
      gsl_matrix_int_set(im, i, j, vm_na[j*nrow_m+i]);
    }
  }

  for(j=0; j<ncol; j++){
    gsl_vector_set(vs, j, sex[j]);
  }

  /**
   * model 0: g ~ sex
   * model 1: g ~ sex + add
   * model 2: g ~ sex + add + sex:add
   * model 3: g ~ sex + add + dom
   * model 4: g ~ sex + add + dom + sex:add
   * model 5: g ~ sex + add + dom + sex:add + sex:dom
   **/

  // coefficients different models
  gsl_vector * c0 = gsl_vector_alloc (2);
  gsl_matrix * c0_cov = gsl_matrix_alloc (2, 2);
  gsl_vector * c1 = gsl_vector_alloc (3);
  gsl_matrix * c1_cov = gsl_matrix_alloc (3, 3);
  gsl_vector * c2 = gsl_vector_alloc (4);
  gsl_matrix * c2_cov = gsl_matrix_alloc (4, 4);
  gsl_vector * c3 = gsl_vector_alloc (4);
  gsl_matrix * c3_cov = gsl_matrix_alloc (4, 4);
  gsl_vector * c4 = gsl_vector_alloc (5);
  gsl_matrix * c4_cov = gsl_matrix_alloc (5, 5);
  gsl_vector * c5 = gsl_vector_alloc (6);
  gsl_matrix * c5_cov = gsl_matrix_alloc (6, 6);
    
  // work space for linear model fitting
  gsl_multifit_linear_workspace * work;

  // output file handles
  FILE * fo, * ff;

  // time records
  time_t sec_s;
  time_t sec_e;
  time_t sec_st;
  time_t sec_et;

  sec_s  = time(NULL);
  sec_st = time(NULL);
  
  /*
  Rprintf("matrix of gene expression[1, 1:8]\n");
  Rprint_matrix(me, 0, 0, 0, 7, 1);
  Rprintf("matrix of markers[1, 1:8]\n");
  Rprint_matrix_i(mm, 0, 0, 0, 7, 1);
  Rprintf("vector of sex\n");
  Rprint_vector_i(vs, 0, 7);
  */

  sec_et = time(NULL);
  Rprintf("time spent on reading data is %ld secs\n", sec_et-sec_st);

  fo  = fopen (output[0], "w");

  fprintf(fo, "Gene_ID\tMarker_ID\tN\t");
  fprintf(fo, "Mk\t");
  fprintf(fo, "LR_M5_M4\tLR_M5_M4_P\t");
  fprintf(fo, "LR_M4_M2\tLR_M4_M2_P\t");
  fprintf(fo, "LR_M4_M3\tLR_M4_M3_P\t");
  fprintf(fo, "LR_M2_M1\tLR_M2_M1_P\t");
  fprintf(fo, "LR_M1_M0\tLR_M1_M0_P\t");
  fprintf(fo, "LR_Mk_M0\tLR_Mk_M0_P\n");

  for(i=0; i<nrow_e; i++){
    if(i%1000 == 0){ Rprintf("finish %d-th gene expression profile\n",i); }
    gsl_vector_view gd_v = gsl_matrix_row (me, i);
    gsl_matrix_int_get_row (gi, ie, i);
    
    // check whether there is enough observations for this gene expression trait
    c = 0;
    for(k = 0; k<ncol; k++){
      if(gsl_vector_int_get(gi,k) == 1){ c++; }
    }
    if(c < ncol * nna_percent){ continue; }
        
    for(j=0; j<nrow_m; j++){
    
      gsl_vector_view md_v = gsl_matrix_row (mm, j);
      gsl_matrix_int_get_row (mi, im, j);
      gsl_vector_set_all (md_dom, -1.0);
            
      // after multiplication, mi is the index whether there is 
      // missing value, either gene expression or genotype
      gsl_vector_int_mul (mi, gi);
      
      c = 0;
      for(k = 0; k<ncol; k++){
        if(gsl_vector_int_get(mi,k) == 1){
          gsl_vector_set(gd, c, gsl_vector_get(&gd_v.vector,k));
          gsl_vector_set(md, c, gsl_vector_get(&md_v.vector,k));
          if(gsl_vector_get(md, c) == 0.0){
            gsl_vector_set(md_dom, c, 1.0);
          }
          gsl_vector_set(sd, c, gsl_vector_get(vs,k));
          c++;
        }
      }
      // printf("c=%d\n\n", c);
      // if there is not enough observations left, try next marker
      if(c < ncol * nna_percent){ continue; }
                  
      // generate the model matrix
      // g ~ sex + m.add + m.dom + sex*m.add + sex*m.dom
      gsl_matrix_set_col(X, 0, intercept);
      gsl_matrix_set_col(X, 1, sd);
      gsl_matrix_set_col(X, 2, md);
      gsl_matrix_set_col(X, 3, md_dom);
      gsl_vector_mul(md, sd);
      gsl_matrix_set_col(X, 4, md);
      gsl_vector_mul(md_dom, sd);
      gsl_matrix_set_col(X, 5, md_dom);
      
      // Rprint_matrix_i(X, 0, ncol-1, 0, 5, 0);
      
      // reset LR, LR_P
      for(k=0; k<6; k++){
        LR[k] = -1.0;
        LR_P[k] = 9.0;
      }
      // vector include no missing value
      gsl_vector_view gd_a = gsl_vector_subvector(gd, 0, c);
      
      // model 0: g ~ sex
      Xa  = gsl_matrix_submatrix(X, 0, 0, c, 2);
      work = gsl_multifit_linear_alloc (c,2);
      gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                rank, c0, c0_cov, chisq, work);
      gsl_multifit_linear_free(work);

      if(rank[0] < 2){ continue; }
            
      // model 5: g ~ sex + add + dom + sex*add + sex*dom
      Xa  = gsl_matrix_submatrix(X, 0, 0, c, 6);
      work = gsl_multifit_linear_alloc (c,6);
      gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                rank+5, c5, c5_cov, chisq+5, work);
      gsl_multifit_linear_free(work);      

      // model 4: g ~ sex + add + dom + sex:add
      Xa  = gsl_matrix_submatrix(X, 0, 0, c, 5);
      work = gsl_multifit_linear_alloc (c,5);
      gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                rank+4, c4, c4_cov, chisq+4, work);
      gsl_multifit_linear_free(work);
            
      if(rank[5]<6){ 
        // if both model 5 and model 4 are not full rank, 
        // we will not explore the linkage
        if(rank[4]<5){ continue; }
      }else{
        // LR statistic between model 5 and model 4
        LR[0] = c*log(chisq[4]/chisq[5]);
        LR_P[0] = gsl_cdf_chisq_Q(LR[0], 1);

        if(LR_P[0] < LRT_P_cut){ // choose model 5
          LR[5] = c*(log(chisq[0]/chisq[5]));
          LR_P[5] = gsl_cdf_chisq_Q(LR[5], 4);
          backward_1row (fo, i+1, j+1, c, 5, freqs, LR, LR_P, P_cut);
          continue;
        }
      }
      
      // model 2: g ~ sex + add + sex:add
      gsl_matrix_swap_columns(X, 3, 4);
      Xa  = gsl_matrix_submatrix(X, 0, 0, c, 4);
      work = gsl_multifit_linear_alloc (c,4);
      gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
                rank+2, c2, c2_cov, chisq+2, work);
      gsl_multifit_linear_free(work);

      // LR statistic between model 4 and model 2
      LR[1] = c*log(chisq[2]/chisq[4]);
      LR_P[1] = gsl_cdf_chisq_Q(LR[1], 1);
            
      if(LR_P[1] < LRT_P_cut){
        // model 3: g ~ sex + add + dom
        gsl_matrix_swap_columns(X, 3, 4);
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 4);
        work = gsl_multifit_linear_alloc (c,4);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            rank+3, c3, c3_cov, chisq+3, work);
        gsl_multifit_linear_free(work);
  
        // LR statistic between model 4 and model 3
        LR[2] = c*log(chisq[3]/chisq[4]);
        LR_P[2] = gsl_cdf_chisq_Q(LR[2], 1);
  
        if(LR_P[2] < LRT_P_cut){ // choose model 4
          LR[5] = c*(log(chisq[0]/chisq[4]));
          LR_P[5] = gsl_cdf_chisq_Q(LR[5], 3);
          backward_1row (fo, i+1, j+1, c, 4, freqs, LR, LR_P, P_cut);
          continue;
        }else{ // choose model 3
          LR[5] = c*(log(chisq[0]/chisq[3]));
          LR_P[5] = gsl_cdf_chisq_Q(LR[5], 2);
          backward_1row (fo, i+1, j+1, c, 3, freqs, LR, LR_P, P_cut);
          continue;
        }
      }else{
        // model 1: g ~ sex + add
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 3);
        work = gsl_multifit_linear_alloc (c,3);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            rank+1, c1, c1_cov, chisq+1, work);
        gsl_multifit_linear_free(work);

        // LR statistic between model 2 and model 1
        LR[3] = c*log(chisq[1]/chisq[2]);
        LR_P[3] = gsl_cdf_chisq_Q(LR[3], 1);

        if(LR_P[3] < LRT_P_cut){ // choose model 2
          LR[5] = c*(log(chisq[0]/chisq[2]));
          LR_P[5] = gsl_cdf_chisq_Q(LR[5], 2);
          backward_1row (fo, i+1, j+1, c, 2, freqs, LR, LR_P, P_cut);
          continue;
        }

        // LR statistic between model 1 and model 0
        LR[4] = c*log(chisq[0]/chisq[1]);
        LR_P[4] = gsl_cdf_chisq_Q(LR[4], 1);

        // choose model 1, this is default
        LR[5] = LR[4];
        LR_P[5] = LR_P[4];
        backward_1row (fo, i+1, j+1, c, 1, freqs, LR, LR_P, P_cut);
        continue;
      }
    }
  }
  
  fclose(fo);

  // print out the frequencies
  ff  = fopen (output[1], "w");
  tmp = 0.0;
  
  fprintf(ff, "<%.2e", P_cut);
  for(i=0; i<100; i++){
    fprintf(ff, "\t%.2f-%.2f", tmp, tmp+0.01);
    tmp += 0.01;
  }
  fprintf(ff, "\n");
  
  fprintf(ff, "%lu", freqs[100]);
  for(i=0; i<100; i++){
    fprintf(ff, "\t%lu", freqs[i]);
  }
  fprintf(ff, "\n");
  
  fclose(ff);

  sec_e  = time(NULL);
  Rprintf("total time spent is %ld secs\n", sec_e-sec_s);
  Rprintf("------------------------------------------------------\n");
    
  gsl_matrix_free(me);
  gsl_matrix_free(mm);
  gsl_matrix_int_free(ie);
  gsl_matrix_int_free(im);
  gsl_vector_free(vs);
  gsl_vector_free(gd);
  gsl_vector_free(md);
  gsl_vector_free(md_dom);
  gsl_vector_free(sd);
  gsl_vector_int_free(gi);
  gsl_vector_int_free(mi);
  gsl_vector_free(c0);
  gsl_matrix_free(c0_cov);
  gsl_vector_free(c1);
  gsl_matrix_free(c1_cov);
  gsl_vector_free(c2);
  gsl_matrix_free(c2_cov);
  gsl_vector_free(c3);
  gsl_matrix_free(c3_cov);
  gsl_vector_free(c4);
  gsl_matrix_free(c4_cov);
  gsl_vector_free(c5);
  gsl_matrix_free(c5_cov);
  gsl_vector_free(intercept);
  gsl_matrix_free(X);
    
  *succeed = 1;
  return(1);
}

/**********************************************************************
 * lmEQTL_backward_byChr
 *
 * backward model selection, for each gene, only select the most 
 * significant linkage per chromosome 
 *
 * coding sex by 0 and 1, coding marker by -1, 0, and 1
 *
 * model 0: g ~ sex
 * model 1: g ~ sex + add
 * model 2: g ~ sex + add + sex:add
 * model 3: g ~ sex + add + dom
 * model 4: g ~ sex + add + dom + sex:add
 * model 5: g ~ sex + add + dom + sex:add + sex:dom
 *
 **********************************************************************/

int lmEQTL_backward_byChr (int* dims, double* ve, double* vm, int* ve_na, 
int* vm_na, int* chr_freq, int* n_chr, double* sex, char** output, double* RP_cut, 
double* RLRT_P_cut, double* Rtol, double* Rnna_percent, int* succeed) {

  int i, u, j, j0, j1, k, c;
  // chisq (residual sum square)
  double chisq[6];
  // rank of model matrix for different models
  size_t rank[6];
  // LR statistics, p-values
  double LR[6];
  double LR_P[6];
  // minimum P_min for all the markers in one chromosome
  double P_chr;
  // for record the histogram of p-values
  double tmp;
  unsigned long freqs[101];
  for(i=0; i<=100; i++){
    freqs[i] = 0;
  }
  const int nrow_e = dims[0];
  const int nrow_m = dims[1];
  const int ncol   = dims[2];
  const double P_cut = RP_cut[0];
  const double LRT_P_cut = RLRT_P_cut[0];
  const double tol = Rtol[0];
  const double nna_percent = Rnna_percent[0];
  
  Rprintf("\n------------------------------------------------------\n");
  Rprintf("dims=(%d, %d, %d)\t", nrow_e, nrow_m, ncol);
  Rprintf("P_cut=%e\n", P_cut);

  gsl_matrix * me = gsl_matrix_alloc (nrow_e, ncol); // gene expression matrix
  gsl_matrix * mm = gsl_matrix_alloc (nrow_m, ncol); // marker genotype matrix
  gsl_matrix_int * ie = gsl_matrix_int_alloc (nrow_e, ncol); // missing index
  gsl_matrix_int * im = gsl_matrix_int_alloc (nrow_m, ncol); // missing index
  
  gsl_vector * vs = gsl_vector_alloc (ncol); // vector of sex data
  gsl_vector * gd = gsl_vector_alloc (ncol); // gene expression data
  gsl_vector * sd = gsl_vector_alloc (ncol); // sex data
  gsl_vector * md = gsl_vector_alloc (ncol); // marker genotype data
  gsl_vector * md_dom = gsl_vector_alloc (ncol); // marker genotype data
  gsl_vector_int * gi = gsl_vector_int_alloc (ncol); // missing value index
  gsl_vector_int * mi = gsl_vector_int_alloc (ncol); // missing value index
  
  // intercept to be used in model matrix
  gsl_vector * intercept = gsl_vector_alloc (ncol); 
  gsl_vector_set_all(intercept, 1.0);

  // the model matrix
  gsl_matrix * X  = gsl_matrix_alloc (ncol, 6);
  gsl_matrix_view Xa;
  
  // reorganize the data into gsl_vector/matrix structure
  for(j=0; j<ncol; j++){
    for(i=0; i<nrow_e; i++){
      gsl_matrix_set(me, i, j, ve[j*nrow_e+i]);
      gsl_matrix_int_set(ie, i, j, ve_na[j*nrow_e+i]);
    }
  }
  
  for(j=0; j<ncol; j++){
    for(i=0; i<nrow_m; i++){
      gsl_matrix_set(mm, i, j, vm[j*nrow_m+i]);
      gsl_matrix_int_set(im, i, j, vm_na[j*nrow_m+i]);
    }
  }

  for(j=0; j<ncol; j++){
    gsl_vector_set(vs, j, sex[j]);
  }

  /**
   * model 0: g ~ sex
   * model 1: g ~ sex + add
   * model 2: g ~ sex + add + sex:add
   * model 3: g ~ sex + add + dom
   * model 4: g ~ sex + add + dom + sex:add
   * model 5: g ~ sex + add + dom + sex:add + sex:dom
   **/

  // coefficients different models
  gsl_vector * c0 = gsl_vector_alloc (2);
  gsl_matrix * c0_cov = gsl_matrix_alloc (2, 2);
  gsl_vector * c1 = gsl_vector_alloc (3);
  gsl_matrix * c1_cov = gsl_matrix_alloc (3, 3);
  gsl_vector * c2 = gsl_vector_alloc (4);
  gsl_matrix * c2_cov = gsl_matrix_alloc (4, 4);
  gsl_vector * c3 = gsl_vector_alloc (4);
  gsl_matrix * c3_cov = gsl_matrix_alloc (4, 4);
  gsl_vector * c4 = gsl_vector_alloc (5);
  gsl_matrix * c4_cov = gsl_matrix_alloc (5, 5);
  gsl_vector * c5 = gsl_vector_alloc (6);
  gsl_matrix * c5_cov = gsl_matrix_alloc (6, 6);
    
  // work space for linear model fitting
  gsl_multifit_linear_workspace * work;

  // output file handles
  FILE * fo, * ff;
  char row[1024];

  // time records
  time_t sec_s;
  time_t sec_e;
  time_t sec_st;
  time_t sec_et;

  sec_s  = time(NULL);
  sec_st = time(NULL);
  
  /*
  Rprintf("matrix of gene expression[1, 1:8]\n");
  Rprint_matrix(me, 0, 0, 0, 7, 1);
  Rprintf("matrix of markers[1, 1:8]\n");
  Rprint_matrix_i(mm, 0, 0, 0, 7, 1);
  Rprintf("vector of sex\n");
  Rprint_vector_i(vs, 0, 7);
    */

  sec_et = time(NULL);
  Rprintf("time spent on reading data is %ld secs\n", sec_et-sec_st);

  fo  = fopen (output[0], "w");

  fprintf(fo, "Gene_ID\tMarker_ID\tN\t");
  fprintf(fo, "Mk\t");
  fprintf(fo, "LR_M5_M4\tLR_M5_M4_P\t");
  fprintf(fo, "LR_M4_M2\tLR_M4_M2_P\t");
  fprintf(fo, "LR_M4_M3\tLR_M4_M3_P\t");
  fprintf(fo, "LR_M2_M1\tLR_M2_M1_P\t");
  fprintf(fo, "LR_M1_M0\tLR_M1_M0_P\t");
  fprintf(fo, "LR_Mk_M0\tLR_Mk_M0_P\n");

  for(i=0; i<nrow_e; i++){
    if(i%1000 == 0){ Rprintf("finish %d-th gene expression profile\n",i); }
    gsl_vector_view gd_v = gsl_matrix_row (me, i);
    gsl_matrix_int_get_row (gi, ie, i);
    
    // check whether there is enough observations for this gene expression trait
    c = 0;
    for(k = 0; k<ncol; k++){
      if(gsl_vector_int_get(gi,k) == 1){ c++; }
    }
    if(c < ncol * nna_percent){ continue; }
    
    j0 = j1 = 0;
    
    for(u=0; u < *n_chr; u++){ // for each chromosome
      j0 = j1;
      j1 = j0 + chr_freq[u];
      P_chr = 1.0;

      for(j=j0; j<j1; j++){

        gsl_vector_view md_v = gsl_matrix_row (mm, j);
        gsl_matrix_int_get_row (mi, im, j);
        gsl_vector_set_all (md_dom, -1.0);
        
        // after multiplication, mi is the index whether there is 
        // missing value, either gene expression or genotype
        gsl_vector_int_mul (mi, gi);
        
        c = 0;
        for(k = 0; k<ncol; k++){
          if(gsl_vector_int_get(mi,k) == 1){
            gsl_vector_set(gd, c, gsl_vector_get(&gd_v.vector,k));
            gsl_vector_set(md, c, gsl_vector_get(&md_v.vector,k));
            if(gsl_vector_get(md, c) == 0.0){
              gsl_vector_set(md_dom, c, 1.0);
            }
            gsl_vector_set(sd, c, gsl_vector_get(vs,k));
            c++;
          }
        }
        // printf("c=%d\n\n", c);
        // if there is not enough observations left, try next marker
        if(c < ncol * nna_percent){ continue; }
                    
        // generate the model matrix
        // g ~ sex + m.add + m.dom + sex*m.add + sex*m.dom
        gsl_matrix_set_col(X, 0, intercept);
        gsl_matrix_set_col(X, 1, sd);
        gsl_matrix_set_col(X, 2, md);
        gsl_matrix_set_col(X, 3, md_dom);
        gsl_vector_mul(md, sd);
        gsl_matrix_set_col(X, 4, md);
        gsl_vector_mul(md_dom, sd);
        gsl_matrix_set_col(X, 5, md_dom);
        
        // Rprint_matrix_i(X, 0, ncol-1, 0, 5, 0);
        
        // reset LR, LR_P
        for(k=0; k<6; k++){
          LR[k] = -1.0;
          LR_P[k] = 9.0;
        }
        // vector include no missing value
        gsl_vector_view gd_a = gsl_vector_subvector(gd, 0, c);
        
        /*
         * model 0: g ~ sex
         * model 1: g ~ sex + add
         * model 2: g ~ sex + add + sex:add
         * model 3: g ~ sex + add + dom
         * model 4: g ~ sex + add + dom + sex:add
         * model 5: g ~ sex + add + dom + sex:add + sex:dom
         */

        // model 0: g ~ sex
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 2);
        work = gsl_multifit_linear_alloc (c,2);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            rank, c0, c0_cov, chisq, work);
        gsl_multifit_linear_free(work);

        if(rank[0] < 2){ continue; }
        
        // model 5: g ~ sex + add + dom + sex*add + sex*dom
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 6);
        work = gsl_multifit_linear_alloc (c,6);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            rank+5, c5, c5_cov, chisq+5, work);
        gsl_multifit_linear_free(work);      

        // model 4: g ~ sex + add + dom + sex:add
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 5);
        work = gsl_multifit_linear_alloc (c,5);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            rank+4, c4, c4_cov, chisq+4, work);
        gsl_multifit_linear_free(work);

        if(rank[5]<6){ 
          Rprintf("i=%d, j=%d, rank[5]=%d\n", i, j, rank[5]);
          // if both model 5 and model 4 are not full rank, 
          // we will not explore the linkage
          if(rank[4]<5){ continue; }
        }else{
          // LR statistic between model 5 and model 4
          LR[0] = c*log(chisq[4]/chisq[5]);
          LR_P[0] = gsl_cdf_chisq_Q(LR[0], 1);
          
          if(LR_P[0] < LRT_P_cut){ // choose model 5
            LR[5] = c*(log(chisq[0]/chisq[5]));
            LR_P[5] = gsl_cdf_chisq_Q(LR[5], 4);
            if(P_chr > LR_P[5]){
              P_chr = LR_P[5];
              sbackward_1row (row, i+1, j+1, c, 5, LR, LR_P, P_cut);
            }
            continue;
          }
        }

        // model 2: g ~ sex + add + sex:add
        gsl_matrix_swap_columns(X, 3, 4);
        Xa  = gsl_matrix_submatrix(X, 0, 0, c, 4);
        work = gsl_multifit_linear_alloc (c,4);
        gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
            rank+2, c2, c2_cov, chisq+2, work);
        gsl_multifit_linear_free(work);

        // LR statistic between model 4 and model 2
        LR[1] = c*log(chisq[2]/chisq[4]);
        LR_P[1] = gsl_cdf_chisq_Q(LR[1], 1);
        
        if(LR_P[1] < LRT_P_cut){
          // model 3: g ~ sex + add + dom
          gsl_matrix_swap_columns(X, 3, 4);
          Xa  = gsl_matrix_submatrix(X, 0, 0, c, 4);
          work = gsl_multifit_linear_alloc (c,4);
          gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
              rank+3, c3, c3_cov, chisq+3, work);
          gsl_multifit_linear_free(work);

          // LR statistic between model 4 and model 3
          LR[2] = c*log(chisq[3]/chisq[4]);
          LR_P[2] = gsl_cdf_chisq_Q(LR[2], 1);

          if(LR_P[2] < LRT_P_cut){ // choose model 4
            LR[5] = c*(log(chisq[0]/chisq[4]));
            LR_P[5] = gsl_cdf_chisq_Q(LR[5], 3);
            if(P_chr > LR_P[5]){
              P_chr = LR_P[5];
              sbackward_1row (row, i+1, j+1, c, 4, LR, LR_P, P_cut);
            }
            continue;
          }else{ // choose model 3
            LR[5] = c*(log(chisq[0]/chisq[3]));
            LR_P[5] = gsl_cdf_chisq_Q(LR[5], 2);
            if(P_chr > LR_P[5]){
              P_chr = LR_P[5];
              sbackward_1row (row, i+1, j+1, c, 3, LR, LR_P, P_cut);
            }
            continue;
          }
        }else{
          // model 1: g ~ sex + add
          Xa  = gsl_matrix_submatrix(X, 0, 0, c, 3);
          work = gsl_multifit_linear_alloc (c,3);
          gsl_multifit_linear_svd (&Xa.matrix, &gd_a.vector, tol, 
              rank+1, c1, c1_cov, chisq+1, work);
          gsl_multifit_linear_free(work);

          // LR statistic between model 2 and model 1
          LR[3] = c*log(chisq[1]/chisq[2]);
          LR_P[3] = gsl_cdf_chisq_Q(LR[3], 1);

          if(LR_P[3] < LRT_P_cut){ // choose model 2
            LR[5] = c*(log(chisq[0]/chisq[2]));
            LR_P[5] = gsl_cdf_chisq_Q(LR[5], 2);
            if(P_chr > LR_P[5]){
              P_chr = LR_P[5];
              sbackward_1row (row, i+1, j+1, c, 2, LR, LR_P, P_cut);
            }
            continue;
          }

          // LR statistic between model 1 and model 0
          LR[4] = c*log(chisq[0]/chisq[1]);
          LR_P[4] = gsl_cdf_chisq_Q(LR[4], 1);

          // choose model 1, this is default
          LR[5] = LR[4];
          LR_P[5] = LR_P[4];
          if(P_chr > LR_P[5]){
            P_chr = LR_P[5];
            sbackward_1row (row, i+1, j+1, c, 1, LR, LR_P, P_cut);
          }
          continue;
        }
      }
      
      k = (int) (P_chr / 0.01);
      freqs[k] += 1;
      
      if(P_chr < P_cut){
        freqs[100] += 1;
        fputs(row, fo);
      }
    }
        
  }
  
  fclose(fo);

  // print out the frequencies
  ff  = fopen (output[1], "w");
  tmp = 0.0;
  
  fprintf(ff, "<%.2e", P_cut);
  for(i=0; i<100; i++){
    fprintf(ff, "\t%.2f-%.2f", tmp, tmp+0.01);
    tmp += 0.01;
  }
  fprintf(ff, "\n");
  
  fprintf(ff, "%lu", freqs[100]);
  for(i=0; i<100; i++){
    fprintf(ff, "\t%lu", freqs[i]);
  }
  fprintf(ff, "\n");
  
  fclose(ff);

  sec_e  = time(NULL);
  Rprintf("total time spent is %ld secs\n", sec_e-sec_s);
  Rprintf("------------------------------------------------------\n");
    
  gsl_matrix_free(me);
  gsl_matrix_free(mm);
  gsl_matrix_int_free(ie);
  gsl_matrix_int_free(im);
  gsl_vector_free(vs);
  gsl_vector_free(gd);
  gsl_vector_free(md);
  gsl_vector_free(md_dom);
  gsl_vector_free(sd);
  gsl_vector_int_free(gi);
  gsl_vector_int_free(mi);
  gsl_vector_free(c0);
  gsl_matrix_free(c0_cov);
  gsl_vector_free(c1);
  gsl_matrix_free(c1_cov);
  gsl_vector_free(c2);
  gsl_matrix_free(c2_cov);
  gsl_vector_free(c3);
  gsl_matrix_free(c3_cov);
  gsl_vector_free(c4);
  gsl_matrix_free(c4_cov);
  gsl_vector_free(c5);
  gsl_matrix_free(c5_cov);
  gsl_vector_free(intercept);
  gsl_matrix_free(X);
    
  *succeed = 1;
  return(1);
}
