#############################################################
##### This is an R function used to estimate the corresponding 
##### phenotype distributions of putative QTL for backcross 
##### design. Data is in the same format as that of R/qtl.
##### The inputs are: data set name, trait to be considered;
##### the qtl position, and the mapping increment.  The outputs
##### are: estimated CDFs and their pointwise variance estimates,
##### of the two corresponding phenotype distributions, as well
##### as PDFs, means of two distributions and pointwise
##### 95% confidence intervals
##### Author:	 Fei Zou
##### For details, please check paper:
##### Nonparametric Estimation of Mixture Models, with
##### Application to Quantitative Trait Loci
##### Jason P. Fine, Fei Zou and Brian S. Yandell
##### Biostatistics (To appear)
#############################################################

library(qtl)  #include R/qtl library
data(hyper)   #use hyper as an example

#yourdat:  name of your data set (same format as R/qtl)
#whichPheno: the phenotype to be considered
#whichChrom: the  chromosome(s) to be considered
#whichLoci:  the locus(loci) on chromosome(s) considered
#Step: the mapping increment

	
noncdf<- function(yourdat = hyper, whichPheno= 1, whichChrom = c(1), 
whichLoci =c(22), Step=1){ 
	
#calculate the conditional probability given multiple markers	
yourdat1 <- calc.genoprob(yourdat, step=Step, error.prob=0.01)

#get the unique phenotype values and sort them
ytrait<- yourdat$pheno[,whichPheno]
realtt<- sort(unique(ytrait))

n<- length(ytrait) # number of phenotyped individuals
	
#calculate the number of unique phenotypes
numofphenopoints<- length(realtt)	
	
distf<-rep(0,numofphenopoints)
distg<-rep(0,numofphenopoints)
varfEst<-rep(0,numofphenopoints)
vargEst<-rep(0,numofphenopoints)

		
for(i in 1: length(whichChrom)){
    #the prob below are 3 dimensions with n ind * n pos * n genotype
    #get the conditional probabilities on ith considered chromosome
    cond<- yourdat1$geno[[whichChrom[i]]]$prob[,whichLoci[i],]

	
    #mm<-matrix(cbind(lamb,lamb2),ncol=2,byrow=FALSE)
	
    md<-t(cond)%*%cond

    inversemd<-solve(md)

    mdesign<-inversemd%*%t(cond)

    mid<-matrix(0,ncol=n,nrow=n)

  
    for(i in 1:numofphenopoints){
      count<-rep(1,n)

      for(j in 1:n){
        if(ytrait[j]>realtt[i])
        count[j]<-0
    }


  fm<-lm(count~cond[,1])

  distg[i]<-fm$coefficients[1]
  distf[i]<-fm$coefficients[2]+distg[i]

  if(i==1&&distg[i]<0) distg[i]<-0
  if(i==1&&distf[i]<0) distf[i]<-0

  if(i>1 && distg[i]<distg[i-1]) distg[i]<-distg[i-1]
  if(i>1 && distf[i]<distf[i-1]) distf[i]<-distf[i-1]
    
  
  distg[i]<-min(distg[i],1)
  distf[i]<-min(distf[i],1)

  groupF<-distf[i]*cond[,1]+distg[i]*cond[,2]
  varF<-groupF*(1-groupF)
  
    
  for(k in 1:n)
    mid[k,k]<-varF[k]
  
  varEst<-mdesign%*%mid%*%t(mdesign)


  varfEst[i]<-max(varEst[1,1],0)
  vargEst[i]<-max(varEst[2,2],0)


  
  }

}


#upper and lower 95% CI of F and G
distfl<-distf-1.96*sqrt(varfEst)
distfu<-distf+1.96*sqrt(varfEst)

distgl<-distg-1.96*sqrt(vargEst)
distgu<-distg+1.96*sqrt(vargEst)

#Point estimates for discrete F and G
pdistf<-rep(0,numofphenopoints)
pdistg<-rep(0,numofphenopoints)

pdistf[1]<-distf[1]
pdistg[1]<-distg[1]

for (i in 2:numofphenopoints){
   pdistf[i]<-distf[i]-distf[i-1]
   pdistg[i]<-distg[i]-distg[i-1]
  }

#means of F and G	
meanf<-sum(pdistf*realtt)
meang<-sum(pdistg*realtt)

output<- list(cdfF = distf, cdfG = distg, varF = varfEst, varG = vargEst,
	      pdfF = pdistf, pdfG = pdistg, meanF = meanf, meanG = meang,
	      lowerF = distfl, upperF = distfu, 
	      lowerG = distgl, upperG = distgu) 
return(output)
	
}





