/*
 *  anovaEQTL.c
 *
 *  Created by Wei Sun on 9/20/05.
 *  Copyright 2005 __MyCompanyName__. All rights reserved.
 *
 */

#include <stdio.h>
#include <time.h>
#include <string.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_randist.h>
#include <R.h>

void Rprint_matrix(gsl_matrix * m, long nrl, long nrh, long ncl, long nch, 
int rid)
{
	int i, j;
	double mij = 0.0;
	for (i = nrl; i <= nrh; i++){
		if(rid>0){
			Rprintf ("%d\t", i);
		}
		for (j = ncl; j < nch; j++){
			mij = gsl_matrix_get (m, i, j);
			Rprintf ("%f\t", mij);
		}
		mij = gsl_matrix_get (m, i, j);
		Rprintf ("%f\n", mij);
	}
	Rprintf ("\n");
}

void Rprint_matrix_i(gsl_matrix * m, long nrl, long nrh, long ncl, long nch, 
int rid)
{
	int i, j;
	double mij = 0.0;
	for (i = nrl; i <= nrh; i++){
		if(rid>0){
			Rprintf ("%d\t", i);
		}
		for (j = ncl; j < nch; j++){
			mij = gsl_matrix_get (m, i, j);
			Rprintf ("%.0f\t", mij);
		}
		mij = gsl_matrix_get (m, i, j);
		Rprintf ("%.0f\n", mij);
	}
	Rprintf ("\n");
}

void Rprint_matrix_e(gsl_matrix * m, long nrl, long nrh, long ncl, long nch, 
int rid)
{
	int i, j;
	double mij = 0.0;
	for (i = nrl; i <= nrh; i++){
		if(rid>0){
			Rprintf ("%d\t", i);
		}
		for (j = ncl; j < nch; j++){
			mij = gsl_matrix_get (m, i, j);
			Rprintf ("%e\t", mij);
		}
		mij = gsl_matrix_get (m, i, j);
		Rprintf ("%e\n", mij);
	}
	Rprintf ("\n");
}


void Rprint_vector(gsl_vector * v, long nrl, long nrh)
{
	int i;
	double vi = 0.0;
	for (i = nrl; i < nrh; i++){
		vi = gsl_vector_get (v, i);
		Rprintf ("%f\t", vi);
	}
	vi = gsl_vector_get (v, i);
	Rprintf ("%f\n", vi);
}

void Rprint_vector_i(gsl_vector * v, long nrl, long nrh)
{
	int i;
	double vi = 0.0;
	for (i = nrl; i < nrh; i++){
		vi = gsl_vector_get (v, i);
		Rprintf ("%.0f\t", vi);
	}
	vi = gsl_vector_get (v, i);
	Rprintf ("%.0f\n", vi);
}

void Rprint_vector_int(gsl_vector_int * v, long nrl, long nrh)
{
	int i;
	int vi = 0;
	for (i = nrl; i < nrh; i++){
		vi = gsl_vector_int_get (v, i);
		Rprintf ("%d\t", vi);
	}
	vi = gsl_vector_int_get (v, i);
	Rprintf ("%d\n", vi);
}

void fprint_vector_e(FILE * fo, gsl_vector * v, long nrl, long nrh)
{
	int i;
	double vi = 0.0;
	for (i = nrl; i < nrh; i++){
		vi = gsl_vector_get (v, i);
		fprintf (fo, "%.6e\t", vi);
	}
	vi = gsl_vector_get (v, i);
	fprintf (fo, "%.6e\n", vi);
}

/*
 * given two markers vm1, vm2, each 3 levles: -1, 0, 1 
 * or each two levels: -1, 1 or 0, 1
 * 
 * columns in model_matrix (three levels in one marker)
 * (Intercept) md10 md11 md20 md21 md10:md20 md11:md20 md10:md21 md11:md21
 * columns in model_matrix (binary marker)
 * (Intercept) md11 md21 md11:md21
 */
void model_matrix(gsl_matrix * model_m, gsl_vector * vm1, gsl_vector * vm2, 
int ncol, int binary)
{
	int i,  md1i, md2i;
	gsl_matrix_set_zero (model_m);
	// Rprintf("binary = %d\n", binary);
	for(i=0; i<ncol; i++){
		md1i = (int) gsl_vector_get(vm1, i);
		md2i = (int) gsl_vector_get(vm2, i);
		gsl_matrix_set(model_m, i, 0, 1.0);
    	// Rprintf("md1i = %d, md2i = %d\n", md1i, md2i);
		if(binary==1){
            if(md1i==1){ gsl_matrix_set(model_m, i, 1, 1.0); } 
            if(md2i==1){ gsl_matrix_set(model_m, i, 2, 1.0); }
            if(md1i==1 && md2i==1){ gsl_matrix_set(model_m, i, 3, 1.0); }
		}else{
            if(md1i==0){ gsl_matrix_set(model_m, i, 1, 1.0); } 
            else if(md1i==1){ gsl_matrix_set(model_m, i, 2, 1.0); }
            if(md2i==0){ gsl_matrix_set(model_m, i, 3, 1.0); } 
            else if(md2i==1){ gsl_matrix_set(model_m, i, 4, 1.0); }
            if(md1i==0 && md2i==0){ gsl_matrix_set(model_m, i, 5, 1.0);  } 
            else if(md1i==1 && md2i==0){ gsl_matrix_set(model_m, i, 6, 1.0); }
            else if(md1i==0 && md2i==1){ gsl_matrix_set(model_m, i, 7, 1.0); }
            else if(md1i==1 && md2i==1){ gsl_matrix_set(model_m, i, 8, 1.0); }
        }
	}
}

/*
 * calculate the mean value of a gsl_vector, length ncol
 */

double gsl_vector_mean(gsl_vector * v, int ncol)
{
	int i;
	double ave = 0.0;
	for(i=0; i<ncol; i++){
		ave += gsl_vector_get(v, i);
	}
	ave/=ncol;
	return(ave);
}

/*
 * calculate the variance of a gsl_vector, length ncol
 */

double gsl_vector_var(gsl_vector * v, int ncol, int mle)
{
	int i;
	double ave  = 0.0;
	double ave2 = 0.0;
	double vi   = 0.0;
	double var  = 0.0;
	for(i=0; i<ncol; i++){
		vi = gsl_vector_get(v, i);
		ave += vi;
		ave2 += vi*vi;
	}
	var = ave2 - ave*ave/ncol;
	if(mle){
		var /= ncol;
	}else{
		var /= (ncol-1);
	}
	return(var);
}

/*
 * calculate the product of one gsl_matrix m multiplies one gsl_vector v
 * store the result in gslvector * result, length(v) == ncol(m)
 */

void gsl_matrix_mul(gsl_matrix * m, gsl_vector * v, gsl_vector * result, int nrow, int ncol)
{
	int i, j;
	double tmp;
	for(i=0; i<nrow; i++){
		tmp = 0.0;
		for(j=0; j<ncol; j++){
			tmp += gsl_matrix_get(m, i, j)*gsl_vector_get(v, j);
		}
		gsl_vector_set(result, i, tmp);
	}
}

/*
 * 1-way ANOVA
 *
-------------------------------------------------------------------------------
Problem:
    l = lm(y~x1), anova(l) 
    to get significance of one main effect.
    
    y:  gene expression profile
    x1: profile of marker 1
    
Todo:
    implement it in C, calculate partial sum, no need to do regression
    
-------------------------------------------------------------------------------
*/

int anova1(int* dims, double* ve, double* vm, int* ve_na, int* vm_na, 
double* cut, int* minLen, char** output, int* succeed)
{
    // minLen:	the minimum length required for each level of m
	int nrow_e, nrow_m, ncol;
	double P_cut;
	double *e, *m;
    double **me;
    double **mm;
    int *e_na, *m_na;
    int **ie;
    int **im;
	const double pi=3.14159266;

  	int a, b, i, j, len, remainLevels, nLevels=3;
	double SST;        /* Sum Square for Treatments */
  	double SSE, MSE;   /* Sum (Mean) Square for Error      */
  	int df1, df2;      /* 2 degrees of freedom of F test */
	double s, ave, F, pval, var0, ebar, ebar2, log10L1, log10L0, lod;
	
    // tmpData:	the data matrix which will store the data of different levels
    // tmpLen:	the number of observations of different levels
    double **tmpData;
    int *tmpLen;
    
	/* dimensions of the data */
	nrow_e  = dims[0];
	nrow_m  = dims[1];
	ncol    = dims[2];
	P_cut   = *cut;

    /* data matrix */
	me = (double **) malloc(nrow_e * sizeof(double*));
	me[0] = (double *) calloc(nrow_e * ncol, sizeof(double));
	for (i=1; i<nrow_e; i++)
		me[i] = me[0] + i*ncol;

	mm = (double **) malloc(nrow_m * sizeof(double*));
	mm[0] = (double *) calloc(nrow_m * ncol, sizeof(double));
	for (i=1; i<nrow_m; i++)
		mm[i] = mm[0] + i*ncol;
		
    /* index matrix, indicating the NA value */
	ie = (int **) malloc(nrow_e * sizeof(int*));
	ie[0] = (int *) calloc(nrow_e * ncol, sizeof(int));
	for (i=1; i<nrow_e; i++)
		ie[i] = ie[0] + i*ncol;

	im = (int **) malloc(nrow_m * sizeof(int*));
	im[0] = (int *) calloc(nrow_m * ncol, sizeof(int));
	for (i=1; i<nrow_m; i++)
		im[i] = im[0] + i*ncol;

	tmpData = (double **) malloc(nLevels * sizeof(double*));
	tmpData[0] = (double *) calloc(nLevels * ncol, sizeof(double));
	for (i=1; i<nLevels; i++)
		tmpData[i] = tmpData[0] + i*ncol;

	tmpLen = (int *) calloc(nLevels, sizeof(int));

	/* timers */
	time_t sec_s;
	time_t sec_e;
	time_t sec_st;
	time_t sec_et;

	sec_st = time(NULL);
	
	/* load data into matrix */
	/* make a copy of data is safe way to avoid changing original data */
	/* remember, R uses column major order! */

    Rprintf("dims=(%d, %d, %d) ", nrow_e, nrow_m, ncol);
    Rprintf("P_cut=%f\n", P_cut);

	for(j=0; j<ncol; j++){
		for(i=0; i<nrow_e; i++){
			me[i][j]=ve[j*nrow_e+i];
			ie[i][j]=ve_na[j*nrow_e+i];
		}
	}
	// for(j=10; j<20; j++){ Rprintf("%f\t", me[22][j]); }
    // Rprintf("\n");
	// for(j=10; j<20; j++){ Rprintf("%d\t", ie[22][j]); }
    // Rprintf("\n");

	for(j=0; j<ncol; j++){
		for(i=0; i<nrow_m; i++){
			mm[i][j]=vm[j*nrow_m+i];
			im[i][j]=vm_na[j*nrow_m+i];
		}
	}
	// for(j=10; j<20; j++){ Rprintf("%f\t", mm[22][j]); }
    // Rprintf("\n");
	// for(j=10; j<20; j++){ Rprintf("%d\t", im[22][j]); }
    // Rprintf("\n");

	Rprintf("output file is %s \n", output[0]);
	FILE * fo = fopen (output[0], "w");
    fprintf(fo, "GENE_ID\tMARKER_ID\tMSE_MLE\tDF1\tDF2\tF\tP\tLOD\n");

    /* anova computation */
    for(a=0; a<nrow_e; a++){
        for(b=0; b<nrow_m; b++){
            // Rprintf("a=%d\n", a);
            // Rprintf("b=%d\n", b);
            e = me[a];
            e_na = ie[a];
            m = mm[b];
            m_na = im[b];

            ebar = 0.0;
            ebar2= 0.0;
            len  = 0;
            for (i=0;i<ncol;i++){
                len  += e_na[i]*m_na[i];
                ebar += e[i]*m_na[i];
                ebar2+= e[i]*e[i]*m_na[i];
            }
            var0 = ebar2 - ebar*ebar/len;
            var0 /= len; /* mle of var, divided by (len-1) in least square est*/
            ebar /= len;
            
            SST = 0.0;	/* Sum Square for Treatments */
            SSE = 0.0;	/* Sum Square for Error      */

            /* initiate number of observations per level */
            for(j=0; j<nLevels; j++){
                tmpLen[j]=0;
            }
            
            /* iterate over all the factor levels */
            /* suppose marker level is -1, 0 , 1 or -1, 1 or 0, 1*/
             for(i=0; i<ncol; i++){
                if(m_na[i]==1 && e_na[i]==1){
                    j = (int)m[i]+1;
                    tmpData[j][tmpLen[j]] = e[i];
                    tmpLen[j] += 1;
                }
            }
            
            remainLevels = nLevels;
            for(i=0; i<nLevels; i++){
                if(tmpLen[i] < *minLen){
                    remainLevels--;
                    if(remainLevels < 2) break;
                }else{
                    ave = 0.0;
                    for (j=0;j<tmpLen[i];j++){ ave += tmpData[i][j];}
                    ave=ave/tmpLen[i];
                    for (j=0;j<tmpLen[i];j++) {
                        s = tmpData[i][j]-ave;
                        SSE += s*s;
                    }
                    SST += tmpLen[i]*(ave-ebar)*(ave-ebar);
                }
            }
            
            if(remainLevels < 2){
                pval = 9.0;
            }else{
                df1 = remainLevels - 1;
                df2 = len - remainLevels;
                /* get MST and MSE */
                SST /= df1;
                SSE /= df2;
                F = SST/SSE;
                pval = gsl_cdf_fdist_Q(F, df1, df2);
            }
            if(pval < P_cut){
                /* MLE of MSE */
                MSE = SSE; MSE *= df2; MSE /= len;

                log10L1 = (-0.5)*len*(log(2*pi) + 1 -log(len) + log(MSE*len));
                log10L1 /= log(10);
        
                //Rprintf("var0=%f\t", MSE);
        
                log10L0 = (-0.5)*len*(log(2*pi) + 1 -log(len) + log(var0*len));
                log10L0 /= log(10);
        
                //Rprintf("log10L0=%f, log10L1=%f\n", log10L0, log10L1);
                
                lod = log10L1 - log10L0;

                fprintf(fo, "%d\t%d\t%.6e\t", a+1,b+1,MSE);
                fprintf(fo, "%d\t%d\t%.2f\t%.2e\t%.2f\n", df1,df2,F,pval,lod);
            }
        }
    }
            
    free(me);
    free(mm);
    fclose(fo);

	*succeed = 1;
    return(1);
}


/*
 * 2-way ANOVA
 *
-------------------------------------------------------------------------------
Problem:
    l = lm(y~x1+x2), anova(l) 
    to get significance of two main effects and one interaction effect.
    
    y:  gene expression profile
    x1: profile of marker 1
    x2: profile of marker 2
    
Todo:
    implement it in C, only use QR decomposition once, no need to do regression
    
-------------------------------------------------------------------------------
*/

int anova2(int* dims, double* ve, double* vm, double* vp, double* cuts, 
double* cuts_direction, char** output, int* print_coef, int* binary, 
int* succeed)
{
	int i, j, k, d;
	int nrow_e, nrow_m, ncol;
	double p = 0, effect = 0.0, effect_sum = 0.0, sse, inv_sse, mse;
	double P1, P2, P12, P1_cut, P2_cut, P12_cut, chisqP_cut;
    double ss[3] = {0.0, 0.0, 0.0};
    double df[3] = {0.0, 0.0, 0.0};

	/* dimensions of the data */
	nrow_e  = dims[0];
	nrow_m  = dims[1];
	ncol    = dims[2];

	/* cut off of anova p-values */
	P1_cut  = cuts[0];
	P2_cut  = cuts[1];
	P12_cut = cuts[2];
	
	/* cut off of chisq test, only consider marker pairs chisqP > chisqP_cut */
	chisqP_cut = cuts[3];

	/* the rank of model.matrix, i.e. the number of column of model matrix  */
	double rank;
	if(*binary == 1){ rank = 4.0; } else { rank = 9.0;}
	// Rprintf("rank is %.0f \n", rank);
	/* degree of freedom of residual */
	double dfr = (double) ncol - rank;

	/* allocate memory for vector and matirx */
	gsl_vector * ve1  = gsl_vector_alloc (ncol);
	gsl_vector * vm1  = gsl_vector_alloc (ncol);
	gsl_vector * vm2  = gsl_vector_alloc (ncol);
	gsl_vector * tau  = gsl_vector_alloc (rank);
	gsl_vector * coef = gsl_vector_alloc (rank);
	gsl_vector * comp = gsl_vector_alloc (ncol);
	gsl_vector * resd = gsl_vector_alloc (ncol);
	gsl_matrix * me = gsl_matrix_alloc (nrow_e, ncol);
	gsl_matrix * mm = gsl_matrix_alloc (nrow_m, ncol);
	gsl_matrix * mp = gsl_matrix_alloc (nrow_m, nrow_m);
	gsl_matrix * model_m = gsl_matrix_alloc (ncol, rank);

	/* timers */
	time_t sec_st;
	time_t sec_et;

	sec_st = time(NULL);
		
	// Rprintf("output file is %s \n", output[0]);
	FILE * fo = fopen (output[0], "w");
    if(*print_coef == 1){
        fprintf(fo, "M1_ID\tM2_ID\tGENE_ID\tMSE_MLE\tP1\tP2\tP12");
        for(i=1; i<10; i++){
            fprintf(fo, "\tCOEF%d", i);
        }
        fprintf(fo, "\n");
    }else{
        fprintf(fo, "M1_ID\tM2_ID\tGENE_ID\tMSE_MLE\tP1\tP2\tP12\n");
    }

	/* load data into the gsl_matrix */
	/* make a copy of data is safe way to avoid changing original data */
	/* remember, R uses column major order! */

    // Rprintf("dims=(%d, %d, %d) ", nrow_e, nrow_m, ncol);
    // Rprintf("P_cut=(%f %f %f)\n", P1_cut, P2_cut, P12_cut);

	for(j=0; j<ncol; j++){
		for(i=0; i<nrow_e; i++){
			gsl_matrix_set(me, i, j, ve[j*nrow_e+i]);
		}
	}
	// Rprint_matrix(me, 0, 9, 0, 9, 1);
	
	for(j=0; j<ncol; j++){
		for(i=0; i<nrow_m; i++){
			gsl_matrix_set(mm, i, j, vm[j*nrow_m+i]);
		}
	}
	// Rprint_matrix_i(mm, 0, 9, 0, 9, 1);

	for(j=0; j<nrow_m; j++){
		for(i=0; i<nrow_m; i++){
			gsl_matrix_set(mp, i, j, vp[j*nrow_m+i]);
		}
	}
	// Rprint_matrix_e(mp, 0, 9, 0, 9, 1);
	
    if(*binary == 1){
        df[0] = 1.0;
        df[1] = 1.0;
        df[2] = 1.0;
    }else{
        df[0] = 2.0;
        df[1] = 2.0;
        df[2] = 4.0;
    }
    
	for(i=0; i<nrow_m-1; i++){
		for(j=i+1; j<nrow_m; j++){			
			p = gsl_matrix_get(mp,i,j);
			if((p - chisqP_cut)*cuts_direction[3] > 0){
				gsl_matrix_set_zero (model_m);
                gsl_matrix_get_row (vm1, mm, i);
                gsl_matrix_get_row (vm2, mm, j);

				// Rprintf("marker 1\n");
				// Rprint_vector_i(vm1, 0, ncol-1);
				// Rprintf("marker 2\n");
				// Rprint_vector_i(vm2, 0, ncol-1);

				model_matrix(model_m, vm1, vm2, ncol, *binary);
				// Rprint_matrix_i(model_m, 0, 20, 0, rank-1, 1);
				/* do QR decomposition and store the QR matrix in model_m */
				gsl_linalg_QR_decomp (model_m, tau);
                // Rprint_matrix(model_m, 0, 20, 0, rank-1, 1);
				for(k=0; k<nrow_e; k++){
					sse = 0.0;
					for(d=0; d<3; d++){ ss[d] = 0.0; }
					gsl_matrix_get_row (ve1, me, k);
					gsl_vector_memcpy(comp, ve1);
					gsl_linalg_QR_QTvec (model_m, tau, comp);
					
					effect_sum = 0.0;
					if(*binary == 1){
                        effect = gsl_vector_get (comp, 1);
                        effect_sum += effect*effect;
					}else{
                        for(d=1; d<=2; d++){
                            effect = gsl_vector_get (comp, d);
                            effect_sum += effect*effect;
                        }
                    }
					ss[0] = effect_sum;

					effect_sum = 0.0;
					if(*binary == 1){
                        effect = gsl_vector_get (comp, 2);
                        effect_sum += effect*effect;
					}else{
                        for(d=3; d<=4; d++){
                            effect = gsl_vector_get (comp, d);
                            effect_sum += effect*effect;
                        }
                    }
					ss[1] = effect_sum;
					
					effect_sum = 0.0;
					if(*binary == 1){
                        effect = gsl_vector_get (comp, 3);
                        effect_sum += effect*effect;
					}else{
                        for(d=5; d<=8; d++){
                            effect = gsl_vector_get (comp, d);
                            effect_sum += effect*effect;
                        }
                    }
					ss[2] = effect_sum;

                    for(d=0; d<3; d++){ ss[d] = ss[d]/df[d]; }
					// now, ss is actually changed to ms
					// Rprintf("ms is: ");
					// Rprint_vector(ss, 0, 2);

					gsl_linalg_QR_lssolve (model_m, tau, ve1, coef, resd);
					
					for(d=0; d<ncol; d++){
						effect = gsl_vector_get (resd, d);
						sse += effect*effect;
					}
					// Rprintf("sse is: %f\n", sse);
					sse /= dfr;
					inv_sse = 1/sse;
					
					// now, ss is transformed to F statistics
					for(d=0; d<3; d++){ ss[d] = ss[d]*inv_sse; }
					
					// Rprintf("f-stat is: ");
					// Rprint_vector(ss, 0, 2);
					P1  = gsl_cdf_fdist_Q(ss[0], df[0], dfr);
					P2  = gsl_cdf_fdist_Q(ss[1], df[1], dfr);
					P12 = gsl_cdf_fdist_Q(ss[2], df[2], dfr);
					
					
					if( (P12 - P12_cut)*cuts_direction[2] > 0 && 
					    (P1 - P1_cut)*cuts_direction[0] > 0 && 
					    (P2 > P2_cut)*cuts_direction[1] > 0 ){
                        // calculate the mse of mle
                        mse = sse; mse *= dfr; mse /= ncol;
                        if(*print_coef == 1){
				            fprintf(fo, "%d\t%d\t%d\t%.6e\t",i+1,j+1,k+1,mse);
				            fprintf(fo, "%.2e\t%.2e\t%.2e\t", P1, P2, P12);
				            fprint_vector_e(fo, coef, 0, 8);
				        }else{
				            fprintf(fo, "%d\t%d\t%d\t%.6e\t",i+1,j+1,k+1,mse);
				            fprintf(fo, "%.2e\t%.2e\t%.2e\n", P1, P2, P12);
						}
					}
				}
			}
		}
	}
	sec_et = time(NULL);
	Rprintf("total time spent is %ld secs\n", sec_et-sec_st);
	Rprintf("\n");
	
	gsl_vector_free(tau);
	gsl_vector_free(coef);
	gsl_vector_free(comp);
	gsl_vector_free(resd);
	gsl_matrix_free(me);
	gsl_matrix_free(mm);
	gsl_matrix_free(mp);
	gsl_matrix_free(model_m);

	fclose(fo);
	*succeed = 1;
	return(1);
}

/**
 * 2 way ANOVA, with permuations
 *
 */

int anova2p(int* dims, double* ve, double* vm, double* vp, double* cuts, 
    double* cuts_direction, double* p12cuts, double* Ds, double* FDs,
    int* binary, int* showPer, int* succeed)
{
	int i, j, k, d, pp;
	int nrow_e, nrow_m, ncol, nP12Cuts, nper;
	int *permute;
	double p = 0, effect = 0.0, effect_sum = 0.0, sse, inv_sse, mse;
	double P1, P2, P12, P1_cut, P2_cut, P12_cut, chisqP_cut;
    gsl_vector_view column;
    const gsl_rng_type * T;
    gsl_rng * r;
    double ss[3] = {0.0, 0.0, 0.0};
    double df[3] = {0.0, 0.0, 0.0};

	// the most significant p-value for each gene, in original/permuted data
	double *gP;

	/* dimensions of the data */
	nrow_e  = dims[0];
	nrow_m  = dims[1];
	ncol    = dims[2];
	nP12Cuts= dims[3];
	nper    = dims[4];
	
	permute = (int *) calloc(ncol, sizeof(int));
	gP  = (double *) calloc(nrow_e, sizeof(double));

	
	/* cut off of anova p-values */
	P1_cut  = cuts[0];
	P2_cut  = cuts[1];
	P12_cut = cuts[2];
	
	/* cut off of X-sq test, only consider marker pairs chisqP > chisqP_cut */
	chisqP_cut = cuts[3];

	/* the rank of model.matrix, i.e. the number of column of model matrix  */
	double rank;
	if(*binary == 1){ rank = 4.0; } else { rank = 9.0;}
	// Rprintf("rank is %.0f \n", rank);
	/* degree of freedom of residual */
	double dfr = (double) ncol - rank;

	/* allocate memory for vector and matirx */
	gsl_vector * ve1  = gsl_vector_alloc (ncol);
	gsl_vector * vm1  = gsl_vector_alloc (ncol);
	gsl_vector * vm2  = gsl_vector_alloc (ncol);
	gsl_vector * tau  = gsl_vector_alloc (rank);
	gsl_vector * coef = gsl_vector_alloc (rank);
	gsl_vector * comp = gsl_vector_alloc (ncol);
	gsl_vector * resd = gsl_vector_alloc (ncol);
	gsl_matrix * me   = gsl_matrix_alloc (nrow_e, ncol);
	gsl_matrix * mm   = gsl_matrix_alloc (nrow_m, ncol);
	gsl_matrix * mp   = gsl_matrix_alloc (nrow_m, nrow_m);
	gsl_matrix * pme  = gsl_matrix_alloc (nrow_e, ncol);
	gsl_matrix * model_m = gsl_matrix_alloc (ncol, rank);

	/* timers */
	time_t sec_st;
	time_t sec_et;

	/* load data into the gsl_matrix */
	/* make a copy of data is safe way to avoid changing original data */
	/* remember, R uses column major order! */

    // Rprintf("dims=(%d, %d, %d) ", nrow_e, nrow_m, ncol);
    // Rprintf("P_cut=(%f %f %f)\n", P1_cut, P2_cut, P12_cut);

	for(j=0; j<ncol; j++){
		for(i=0; i<nrow_e; i++){
			gsl_matrix_set(me, i, j, ve[j*nrow_e+i]);
		}
	}
	// Rprint_matrix(me, 0, 9, 0, 9, 1);
	
	for(j=0; j<ncol; j++){
		for(i=0; i<nrow_m; i++){
			gsl_matrix_set(mm, i, j, vm[j*nrow_m+i]);
		}
	}
	// Rprint_matrix_i(mm, 0, 9, 0, 9, 1);

	for(j=0; j<nrow_m; j++){
		for(i=0; i<nrow_m; i++){
			gsl_matrix_set(mp, i, j, vp[j*nrow_m+i]);
		}
	}
	// Rprint_matrix_e(mp, 0, 9, 0, 9, 1);
	
    if(*binary == 1){
        df[0] = 1.0;
        df[1] = 1.0;
        df[2] = 1.0;
    }else{
        df[0] = 2.0;
        df[1] = 2.0;
        df[2] = 4.0;
    }
    
    gsl_rng_env_setup();
    T = gsl_rng_default;
    r = gsl_rng_alloc(T);
    
    for(i=0; i<ncol; i++){ permute[i] = i; }
    
    if(*showPer==1){
        Rprintf ("initial permutation:\n");  
        for(i=0; i<ncol; i++){
            Rprintf("%d ", permute[i]);
        }
        Rprintf("\n");
    }
    
    for(pp=0; pp<=nper; pp++){
    
    	sec_st = time(NULL);

        if(pp > 0){
            gsl_ran_shuffle (r, permute, ncol, sizeof(int));
            if(*showPer==1){
                Rprintf("permuation %d\n", pp);
                for(i=0; i<ncol; i++){
                    Rprintf("%d ", permute[i]);
                }
                Rprintf("\n");
            }
        }
        
        for(d=0; d<ncol; d++){
            column = gsl_matrix_column (me, permute[d]);
            gsl_matrix_set_col (pme, d, &column.vector);
        }
        
        // Rprint_matrix(pme, 0, 4, 0, 9, 0);
        
        for(k=0; k<nrow_e; k++){ gP[k] = 1.0; } // reset gP
        
        for(i=0; i<nrow_m-1; i++){
            for(j=i+1; j<nrow_m; j++){
                p = gsl_matrix_get(mp,i,j);
                if((p - chisqP_cut)*cuts_direction[3] > 0){
                    gsl_matrix_set_zero (model_m);
                    gsl_matrix_get_row (vm1, mm, i);
                    gsl_matrix_get_row (vm2, mm, j);

                    model_matrix(model_m, vm1, vm2, ncol, *binary);
                    // do QR decomposition and store the QR matrix in model_m 
                    gsl_linalg_QR_decomp (model_m, tau);
                    
                    for(k=0; k<nrow_e; k++){
                        sse = 0.0;
                        for(d=0; d<3; d++){ ss[d]=0.0; }
                        gsl_matrix_get_row (ve1, pme, k);
                        gsl_vector_memcpy(comp, ve1);
                        gsl_linalg_QR_QTvec (model_m, tau, comp);
                        
                        effect_sum = 0.0;
                        if(*binary == 1){
                            effect = gsl_vector_get (comp, 1);
                            effect_sum += effect*effect;
                        }else{
                            for(d=1; d<=2; d++){
                                effect = gsl_vector_get (comp, d);
                                effect_sum += effect*effect;
                            }
                        }
                        ss[0] = effect_sum;
    
                        effect_sum = 0.0;
                        if(*binary == 1){
                            effect = gsl_vector_get (comp, 2);
                            effect_sum += effect*effect;
                        }else{
                            for(d=3; d<=4; d++){
                                effect = gsl_vector_get (comp, d);
                                effect_sum += effect*effect;
                            }
                        }
                        ss[1] = effect_sum;
                        
                        effect_sum = 0.0;
                        if(*binary == 1){
                            effect = gsl_vector_get (comp, 3);
                            effect_sum += effect*effect;
                        }else{
                            for(d=5; d<=8; d++){
                                effect = gsl_vector_get (comp, d);
                                effect_sum += effect*effect;
                            }
                        }
                        ss[2] = effect_sum;
                        
                        for(d=0; d<3; d++){ ss[d] = ss[d]/df[d]; }
                        // now, ss is actually changed to ms
                        // Rprintf("ms is: ");
                        // Rprint_vector(ss, 0, 2);
    
                        gsl_linalg_QR_lssolve (model_m, tau, ve1, coef, resd);
                        
                        for(d=0; d<ncol; d++){
                            effect = gsl_vector_get (resd, d);
                            sse += effect*effect;
                        }
                        // Rprintf("sse is: %f\n", sse);
                        sse /= dfr;
                        inv_sse = 1/sse;
                        
                        // now, ss is transformed to F statistics
                        for(d=0; d<3; d++){ ss[d] = ss[d]*inv_sse; }
                        
                        // Rprintf("f-stat is: ");
                        // Rprint_vector(ss, 0, 2);
                        P1  = gsl_cdf_fdist_Q(ss[0], df[0], dfr);
                        P2  = gsl_cdf_fdist_Q(ss[1], df[1], dfr);
                        P12 = gsl_cdf_fdist_Q(ss[2], df[2], dfr);
					
                        
                        if( (P12 - P12_cut)*cuts_direction[2] > 0 && 
                        (P1 - P1_cut)*cuts_direction[0] > 0 && 
                        (P2 > P2_cut)*cuts_direction[1] > 0 ){
                            if(P12 < gP[k]){ gP[k] = P12; }
                        }
                    }// end of loop for each gene
                }
            }// end of loop for marker 2
        }// end of loop for marker 1
        
        if(pp == 0){
            for(k=0; k<nrow_e; k++){
                for(d=0; d<nP12Cuts; d++){
                   if(gP[k] <= p12cuts[d]){ Ds[d]+=1; }
                }
            }
        }else{
            for(k=0; k<nrow_e; k++){
                for(d=0; d<nP12Cuts; d++){
                   if(gP[k] <= p12cuts[d]){ FDs[d]+=1; }
                }
            }
        }
                    
        sec_et = time(NULL);
        Rprintf("time spent for this permutation: %ld secs\n", sec_et-sec_st);
        Rprintf("\n");
        
    }

    for(d=0; d<nP12Cuts; d++){
       FDs[d] /= (double)nper;
    }

    free(permute);
    free(gP);
	gsl_vector_free(tau);
	gsl_vector_free(coef);
	gsl_vector_free(comp);
	gsl_vector_free(resd);
	gsl_matrix_free(me);
	gsl_matrix_free(mm);
	gsl_matrix_free(mp);
	gsl_matrix_free(pme);
	gsl_matrix_free(model_m);
    gsl_rng_free(r);
    
	*succeed = 1;
	return(1);
}


/*
 * calculate LOD score based anova analysis result
 *
 */

int get_lod(int* dims, double *mse, int *eID, double *ve, double * lods)
{
	int i, j, e_id, nrow_anova, nrow_e, ncol;
	const double pi=3.14159266;
	double log10L0, log10L1, var0, var1; 
	time_t sec_s;
	time_t sec_e;
	
	/* dimensions of the data */
	nrow_anova  = dims[0];
	nrow_e  = dims[1];
	ncol    = dims[2];

	gsl_vector * e = gsl_vector_calloc (ncol);
	gsl_matrix * me = gsl_matrix_alloc (nrow_e, ncol);
	gsl_vector * vars = gsl_vector_calloc (nrow_e);
	gsl_vector_set_zero(vars);
	
	for(j=0; j<ncol; j++){
		for(i=0; i<nrow_e; i++){
			gsl_matrix_set(me, i, j, ve[j*nrow_e+i]);
		}
	}
	// Rprint_matrix(me, 0, 9, 0, 9, 1);
	// Rprintf("nrow_anova = %d\n", nrow_anova);

	sec_s  = time(NULL);
    for(i=0; i<nrow_anova; i++){
        e_id  = eID[i] - 1;
        // Rprintf("e_id=%d\t", e_id);
		
		// get variance of mle
        var1  = mse[i];
        // Rprintf("var1=%f\t", var1);

		// rss = var1*(ncol), here var1 is variance of mle 
		// Rprintf("var1=%f, rss=%f\n", var1, var1*ncol);
		log10L1 = (-0.5)*ncol*(log(2*pi) + 1 + log(var1));
		log10L1 /= log(10);
		
        if(gsl_vector_get(vars, e_id)==0.0){
            gsl_matrix_get_row (e, me, e_id);
            var0 = gsl_vector_var(e, ncol, 1);
            gsl_vector_set(vars, e_id, var0);
        }else{
            var0 = gsl_vector_get(vars, e_id);
        }
        // Rprintf("var0=%f\t", var0);

		log10L0 = (-0.5)*ncol*(log(2*pi) + 1 + log(var0));
		log10L0 /= log(10);

		// Rprintf("log10L0=%f, log10L1=%f\n", log10L0, log10L1);
        lods[i] = log10L1 - log10L0;
		// Rprintf("lod=%f\n", lods[i]);
	}
	Rprintf("\n");

	gsl_vector_free(e);
	gsl_vector_free(vars);
	gsl_matrix_free(me);
	return(1);
}

/**
 * chi-square test
 * n is length of x, y
 * m is number of levels of x or y
 * minLen is the minimum length for each factor level
 */
double chisq(int *x, int *y, int n, int m, double minLen){
    int i, j, k;
    double tmp, minL, **t; // t store the contingency table 
    double *ti, *tj; // ti, tj stores the marginal frequencies
    // stat is the chi-square statistics
    // T is the sum of all frequencies 
    // nu is the degree of freedom of chi-square test
    double stat, T, nu, pval;
        
	t  = (double**) malloc(m*sizeof(double*));
    t[0] = (double*) calloc(m*m, sizeof(double));
    for(i=1; i<m; i++){
        t[i] = t[0] + i*m;
    }
    
    ti = (double*) calloc(m, sizeof(double));
    tj = (double*) calloc(m, sizeof(double));

    for(k=0; k<n; k++){
        t[x[k]][y[k]] += 1.0;
    }
    
    minL = (double)n;
    for(i=0; i<m; i++){
        for(j=0; j<m; j++){
            tmp = t[i][j];
            ti[i] += tmp;
            tj[j] += tmp;
            if(tmp < minL){ minL = tmp; }
        }
    }
    // if at least one factor level does not have enough observations
    if(minL < minLen){ return 0.0; }
    
    T = 0.0;
    stat = 0.0;
    for(i=0; i<m; i++){
        T += ti[i];
        for(j=0; j<m; j++){
            stat += (t[i][j]*t[i][j])/(ti[i]*tj[j]);
        }
    }
    
    stat = T*(stat - 1);
    nu   = (double)(m-1)*(m-1);
    pval = gsl_cdf_chisq_Q(stat, nu);

    return(pval);
}

/**
 * 
 * reorg
 *
 * Reorganize a vector to a matrix of given size. 
 *
 * Allocation done by R_alloc, so that R does the cleanup.
 *
 */
 
void reorg_int(int *v, int ***m, int nrow, int ncol)
{
    int i;
    
    *m = (int **)R_alloc(nrow, sizeof(int*));
    
    (*m)[0] = v;
    if(nrow>1){
        for(i=1; i<nrow; i++){
            (*m)[i] = (*m)[i-1] + ncol;
        }
    }
}



void reorg(double *v, double ***m, int nrow, int ncol)
{
    int i;
    
    *m = (double **)R_alloc(nrow, sizeof(double*));
    
    (*m)[0] = v;
    if(nrow>1){
        for(i=1; i<nrow; i++){
            (*m)[i] = (*m)[i-1] + ncol;
        }
    }
}

/**
 * Chi-square test, taking the maker data matrix as input
 */

int chisqAll(int* dims, int *vm, double *vp, int *binary, double *minLen){
    int i, j, nr, nc;
    int **mm;
    double **mp;
    
    nr = dims[0];
    nc = dims[1];
    
    reorg_int(vm, &mm, nr, nc);
    reorg(vp, &mp, nr, nr);
    
    for(i=0; i<nr-1; i++){
        for(j=i+1; j<nr; j++){
            if(*binary == 1){
                mp[i][j] = chisq(mm[i], mm[j], nc, 2, *minLen);
            }else{
                mp[i][j] = chisq(mm[i], mm[j], nc, 3, *minLen);
            }
        }
    }
    
    return(1);
}

