\name{anova1}
\alias{anova1}
\title{ one way ANOVA analysis }
\description{
    ANOVA analysis for one continuous response and one factorial predictors.
    It is pretty fast in handling a large number of responses and predictors.
}
\usage{
anova1(me, mm, cut, output, minLen=5)
}
\arguments{
  \item{me}{ matrix of responses, each row is one response. This matrix is gene
   expression data in eQTL analysis. Only numerical values allowed.
  }
  \item{mm}{ matrix of predictors, each row is one predictor. This matrix is
   marker genotype data in eQTL analysis. Only values -1, 0, 1 allowed. 
  }
  \item{cut}{ cut-off of ANOVA p-value.}
  \item{minLen}{ minimum number of observations in one factor level. It must 
    be non-negative integer.Those levels with less than minLen observations
    are dropped. Cases that there are less than two levels at the end are
    skipped. }
  \item{output}{ name  of output file.}
}
\details{
  iterate each factor level to get variance decomposition, instead of using  
  linear model. 
}
\value{
    return 1 if succeeds, 0 otherwise. The ANOVA computation result is written
    into output file. 
    
    There are four columns in the output file:
    \item{GENE\_ID}{ The corresponding row number of the gene in the input gene 
      expression matrix me }
    \item{MARKER\_ID}{ The corresponding row number of the marker in the input 
      marker data matrix mm }
    \item{MSE\_MLE}{ The maximum likelihood estimate of mean square error, which
      used in get.lod function }
    \item{F}{ F statistics }
    \item{P}{ ANOVA P-value }
}
\references{  }
\author{ Wei Sun sunwei@stat.ucla.edu }
\note{ 
    The ANOVA p-value is set as NA if there are less than 2 factor levels with
    more than minLen observations.
}

\seealso{ \code{\link{anova2}} \code{\link{get.lod}} }
\examples{
data(yeastExpressions)
data(yeastMarkers)

me = data.matrix(yeastExpressions[1:1000,(3:42)])
mm = data.matrix(yeastMarkers[,(4:43)])
ie = yeastExpressions[1:1000,1:2]
im = yeastMarkers[,1:3]

# drop rows or columns with too many missing values
meKeep = dropNA(me);
mmKeep = dropNA(mm);
me = me[meKeep$rowKeep, intersect(meKeep$colKeep, mmKeep$colKeep)]
mm = mm[mmKeep$rowKeep, intersect(meKeep$colKeep, mmKeep$colKeep)]
ie = ie[meKeep$rowKeep,]
im = im[mmKeep$rowKeep,]

# impute missing value in gene expression data by 10 nearest neighbor
library(impute)
me = impute.knn(me, k=10, rowmax=0.6, colmax=0.8, maxp=nrow(me));

# impute missing value in genotype data simply by nearest neighbor
mm  = impute.geno(mm, im)

# generate marker blocks
mb = get.mb(mm, im, 2.0)
dim(mm)
dim(mb$data)

cut = 1e-3
a = anova1(me, mb$data, cut, "1g1mbAnova.txt")
}
\keyword{ methods }
