################################################################################################# ########################################## RIST ############################################ ################################################################################################# library(survival) Muti_ERT_fit <-function(dataset, M=50, K=5, L=3, nmin=6, SupLogRank=1, tao=25, impute=c("random", "expected")) { newset=dataset newp=dim(dataset)[2] P=newp-2 need=ceiling(dim(dataset)[1]/nmin) time_intrest=sort(dataset[dataset[,P+1]==1, P+2]) Forest=matrix(0,2*need,7*M) Large=rep(0,M) Forestdata=matrix(0,dim(dataset)[1],M) SurvMat=list() for (i in 1:M) #fit M trees { Treefit=Fit_ERT_Tree(newset,dataset, K, nmin, SupLogRank, Step=0, tao) Forest[1:dim(Treefit$Tree)[1],(7*(i-1)+1):(7*i)]=Treefit$Tree Large[i]=dim(Treefit$Tree)[1] Forestdata[,i]=Treefit$UpdateData[,dim(dataset)[2]] SurvMat[[i]]=Treefit$Survival_All } censornum=seq(1,dim(dataset)[1])[dataset[,P+1]==0] censorset = dataset[dataset[,P+1]==0,] censor_surv = Muti_ERT_Predict(censorset, Forest, SurvMat, time_intrest)$Surv_predict cond_sample_set=list() cond_sample_rate=list() len=length(time_intrest) for (q in 1:dim(censorset)[1]) { sub_surv=c(1,censor_surv[q,]) locate=sum(time_intrest<=censorset[q,P+2])+1 con_surv1=sub_surv[locate:length(sub_surv)] con_surv2=c(1,con_surv1)-c(con_surv1,0) con_surv=con_surv2[2:length(con_surv2)] cond_sample_rate[[q]]=con_surv/sum(con_surv) cond_sample_set[[q]]=c(time_intrest[time_intrest>censorset[q,P+2]],tao) } Forest_0=Forest SurvMat_0=SurvMat Forest_seq=list() SurvMat_seq=list() if (L>0) { for (j in 1:L) { Forest=matrix(0,3*need,7*M) Large=rep(0,M) Forestdata=matrix(0,dim(dataset)[1],M) SurvMat=list() for (i in 1:M) #fit M trees after imputation { newobtime=rep(0,dim(censorset)[1]) newobtime2=rep(0,dim(censorset)[1]) for (q in 1:dim(censorset)[1]) { samplelong=length(cond_sample_rate[[q]]) if (cond_sample_rate[[q]][samplelong]==1) { newobtime[q]=tao newobtime2[q]=tao }else {newsample=rmultinom(1,size=1, prob= cond_sample_rate[[q]]) newobtime[q]=cond_sample_set[[q]][newsample==1] samplerate=cond_sample_rate[[q]][1:(samplelong-1)]/sum(cond_sample_rate[[q]][1:(samplelong-1)]) exptime=sum(cond_sample_set[[q]][1:(samplelong-1)]*samplerate) newsample2=rbinom(1, 1, prob=cond_sample_rate[[q]][samplelong]) newobtime2[q]=tao*(newsample2==1)+exptime*(newsample2==0) } } if (impute=="random") { newcensorind=1-(newobtime==tao) newset[censornum,P+1]=newcensorind newset[censornum,P+2]=newobtime } if (impute=="expected") { newcensorind=1-(newobtime2==tao) newset[censornum,P+1]=newcensorind newset[censornum,P+2]=newobtime2 } Treefit=Fit_ERT_Tree(newset,dataset, K, nmin, SupLogRank, Step=1 ,tao) Forest[1:dim(Treefit$Tree)[1],(7*(i-1)+1):(7*i)]=Treefit$Tree Large[i]=dim(Treefit$Tree)[1] Forestdata[,i]=Treefit$UpdateData[,dim(dataset)[2]] SurvMat[[i]]=Treefit$Survival_All } censor_surv = Muti_ERT_Predict(censorset, Forest, SurvMat, time_intrest)$Surv_predict cond_sample_set=list() cond_sample_rate=list() len=length(time_intrest) for (q in 1:dim(censorset)[1]) { sub_surv=c(1,censor_surv[q,]) locate=sum(time_intrest<=censorset[q,P+2])+1 con_surv1=sub_surv[locate:length(sub_surv)] con_surv2=c(1,con_surv1)-c(con_surv1,0) con_surv=con_surv2[2:length(con_surv2)] cond_sample_rate[[q]]=con_surv/sum(con_surv) cond_sample_set[[q]]=c(time_intrest[time_intrest>censorset[q,P+2]],tao) } Forest_seq[[j]]=Forest SurvMat_seq[[j]]=SurvMat } } return(list("time_intrest"=time_intrest, "Forest_seq"=Forest_seq, "SurvMat_seq"=SurvMat_seq, "Forest_0"=Forest_0, "SurvMat_0"=SurvMat_0)) } Muti_ERT_Predict <- function (testset, Forest, SurvMat, time_intrest) { XP=dim(testset)[2]-2 M=dim(Forest)[2]/7 newsubject=testset[,1:XP] newn=dim(newsubject)[1] TMT_predict=rep(0,newn) Surv_predict=matrix(0,newn,length(time_intrest)) TMT_temp=rep(0,M) Surv_temp=matrix(0, M, length(time_intrest)) for (s in 1:newn) { new_cov=newsubject[s,] for (i in 1:M) { Tree=Forest[,(7*(i-1)+1):(7*i)] Whole_predict=Evaluate_X(new_cov, Tree, SurvMat[[i]]) TMT_temp[i]=Whole_predict$TMT_pre Surv_temp[i,]=Whole_predict$Surv_pre } TMT_predict[s]=mean(TMT_temp) Surv_predict[s,]=colMeans(Surv_temp) } return(list("TMT_predict"=TMT_predict, "Surv_predict"=Surv_predict, "time_intrest"=time_intrest)) } ################### build a tree ############## Fit_ERT_Tree <- function (dataset, dataset2, K, nmin, SupLogRank=0, Step, tao) #nmin is the minimal number of observed data { data_n <<- dim(dataset)[1] One_Tree <<- matrix(0,2*data_n/nmin,7) # Global variable to save the tree BookingTable <<- matrix(0, 2*data_n/nmin ,1) # Global variable to save used locations BookingTable[1,1] <<- 1 P <<- dim(dataset)[2]-2 Tao <<- tao obs <- seq(1,dim(dataset)[1]) rate <- rep(1, dim(dataset)[1]) time_points <<- sort(dataset2[dataset2[, (P+1)]==1, (P+2) ]) Survival_All <<- matrix(0, 2*data_n/nmin , length(time_points)) dataset <- cbind(dataset,obs) backset <<- cbind(dataset2,obs,rate) Further_Split_A_Node(dataset, K, nmin, 1, SupLogRank, Step) output <- list() output$Tree <- One_Tree[BookingTable[,1]==1,] output$UpdateData <- backset output$Survival_All <- Survival_All[BookingTable[,1]==1,] out <- output } ################## split a node ################ # use functions: Stop_Split, Find_A_Split, # ################################################ Further_Split_A_Node <- function (subset, K, nmin, node, SupLogRank, Step) { Split_result=Stop_Split(subset, nmin) if (Split_result$Stop==1) { One_Tree[node, 1]<<-NA One_Tree[node, 2]<<-NA One_Tree[node, 3]<<-NA One_Tree[node, 4]<<-NA UpdateCensor(subset, node, Step) One_Tree[node, 7]<<-mean(subset[,P+2]) ########## mean of expected observed time (consideing survival function) } else { split <- Find_A_Split(subset, Split_result$SplitVariable, Split_result$SplitValue, SupLogRank) a <- split[1] ac <- split[2] NAobs=(is.na(subset[,a]>=ac)==1) noneNALeft=(subset[,a]=ac) noneNALeft[is.na(noneNALeft)==1]=0 noneNARight[is.na(noneNARight)==1]=0 Left_size=sum(subset[,a]=ac, na.rm = TRUE) goleft=rbinom(length(NAobs),1,NAobs*Left_size/(Left_size+Right_size)) goright=NAobs-goleft Left_obs=noneNALeft+goleft Right_obs=noneNARight+goright subsetL=subset[Left_obs==1,] subsetR=subset[Right_obs==1,] ########### we search for a place for left tree: LeftNode = node+1; while (BookingTable[LeftNode,1]==1) LeftNode=LeftNode+1 BookingTable[LeftNode,1] <<- 1 ############ we search a place for right tree: RightNode = LeftNode+1; while (BookingTable[RightNode,1]==1) RightNode=RightNode+1; BookingTable[RightNode,1] <<- 1; ########### finishing building the current node One_Tree[node, 1] <<- a; One_Tree[node, 2] <<- ac; One_Tree[node, 3] <<- LeftNode; One_Tree[node, 4] <<- RightNode; One_Tree[node, 5] <<- dim(subsetL)[1]; One_Tree[node, 6] <<- dim(subsetR)[1]; One_Tree[node, 7] <<- NA; #One_Tree[node, 7] <<- dim(subset)[1]; Further_Split_A_Node(subsetL, K, nmin, LeftNode, SupLogRank, Step); Further_Split_A_Node(subsetR, K, nmin, RightNode, SupLogRank, Step); } } UpdateCensor <- function (subset, node, Step) { if (Step==0) { #calculate KM estimate of failset #need output for further use Survival_All[node,] <<- KM_func(subset) } if (Step==1) { #calculate emperical estimate of failset #need output for further use obpoints=subset[,P+2] for (i in 1:length(time_points)) { Survival_All[node,i] <<- mean(obpoints>time_points[i]) } } } KM_func <- function(subset) { failset=subset[,(P+1):(P+2)] uniq=sort(unique(failset[failset[,1]==1,2])) nuniq=length(uniq) d=rep(0,nuniq) y=rep(0,nuniq) failset2=failset[failset[,1]==1,2] for (i in 1:nuniq) { d[i]=sum(failset2==uniq[i]) y[i]=length(failset[failset[,2]>=uniq[i],2]) } Survive=c(1,cumprod(1-d/y)) Surv_all=rep(0, length(time_points)) for (j in 1:length(time_points)) { Surv_all[j] <- Survive[sum(time_points[j]>=uniq)+1] } return(Surv_all) } ################# stop a spliting ################# Stop_Split <- function(subset, nmin) { nobs=sum(subset[,P+1]) # number of observed data if (nobs<(nmin*2)) {out<-list("Stop"=1,"SplitVariable"=NA, "SplitValue"=NA)} else { obsset=subset[subset[,P+1]==1,] sampleMin=rep(0,P) sampleMax=rep(0,P) for (i in 1:P) { sampleMin[i]=sort(obsset[,i])[nmin] sampleMax[i]=tail(sort(obsset[,i]),nmin)[1] } stop=1-(sum(sampleMin0) if (stop==1){out<-list("Stop"=1, "SplitVariable"=NA, "SplitValue"=NA)} else { cansample=seq(1,P)[sampleMinsplit),(P+2)] leftvar=max(c(0, var(leftY, na.rm = TRUE)), na.rm = TRUE) rightvar=max(c(0, var(rightY, na.rm = TRUE)), na.rm = TRUE) sl=length(na.omit(leftY)) sr=length(na.omit(rightY)) out<- (totalvar - sl*leftvar/n - sr*rightvar/n)/totalvar } ################################ evaluating ########################### Evaluate_DataX <- function(X, Tree, SurvMat) { n=dim(X)[1] y=rep(0,n) for (i in 1:n) { y[i]=Evaluate_X(X[i,],Tree, SurvMat) } out<-y } Evaluate_X <- function(X, Tree, SurvMat) { if(is.matrix(Tree)==0) {out<- list("TMT_pre"=Tree[7], "Surv_pre"=SurvMat)} else out<- Evaluate_It(X, Tree, 1, SurvMat) } Evaluate_It <- function(X, Tree, node, SurvMat) { if (is.na(Tree[node,2])==1) {out <- list("TMT_pre"=Tree[node,7],"Surv_pre"=SurvMat[node,]) } else { if (is.na(X[Tree[node,1]])) { GoRight = rbinom( 1, 1, Tree[node,6]/( Tree[node,6] + Tree[node, 5]) ) } else { GoRight=(X[Tree[node,1]]>=Tree[node,2]) } out<- Evaluate_It(X, Tree, Tree[node,3]+GoRight, SurvMat) } } ############################################################################################################# ############################################################################################################# # Sup Log-Rank Test ############################################################################################################# ############################################################################################################# sup.G<-function(x,m=10) { k<-m:0 (4/pi)*sum(((-1)^k)/(2*k+1)*exp(-(pi^2)*((2*k+1)^2)/(8*x^2))) } sup.g<-function(x,m=10) { k<-m:0 (pi/x^3)*sum(((-1)^k)*(2*k+1)*exp(-(pi^2)*((2*k+1)^2)/(8*x^2))) } cnorm<-function(z,thresh=3.6,delta=0.6,kk=4){ check<-F if(z<0){ z<-(-1)*z check<-T } if(z1){ for(k in 1:(kk-1)){ term<-(-1)*term*(2*k-1)/z^2 tally<-tally+term } } out<-tally*dnorm(z)/z if(zerror) { yx<-sup.G(x,m=m) dg<-sup.g(x,m=m) delta<-(1-alpha-yx)/dg x<-x+delta interror<-sup.G(x)-(1-alpha) } x } sup.r<-function(alpha, beta, error=1e-8) { u<-sup.inverse(alpha,error=error) y<-1-beta ml<-qnorm(1-alpha/2)+qnorm(1-beta) x<-ml delta<-1 while(delta>error) { yx<-cnorm(u-x)+exp(2*x*u)*cnorm(u+x) dp<-dnorm(u-x)-exp(2*u*x)*dnorm(u+x)+2*u*exp(2*u*x)*cnorm(u+x) delta<-(y-yx)/dp x<-x+delta } (x/ml)^2 } surv.Rtest<-function (time, delta, group, rho=0, gamma=0, logrank=F, error=1.0e-8) { otime <- order(time) time <- time[otime] delta <- delta[otime] n<-length(time) if((rho+gamma)==0){ weight<-rep(1,n) } else{ km.left<-KM.left(time,delta) weight<-km.left^rho*(1-km.left)^gamma } group <- group[otime] - 1 n2 <- sum(group) atrisk2 <- n2 - cumsum(group) + group n1 <- n-n2 atrisk1 <- n1 - cumsum(1 - group) + 1 - group delta1 <- delta * (1 - group) delta2 <- delta * group y1 <- tapply(atrisk1, time, "max") y2 <- tapply(atrisk2, time, "max") d1 <- tapply(delta1, time, "sum") d2 <- tapply(delta2, time, "sum") weight<-tapply(weight,time,"max") w <- (y1 * y2)/(y1 + y2) terms <- (d1/y1 - d2/y2)[w > 0] temp<-y1+y2-1 temp<-ifelse(temp<1,1,temp) cc<-1-(d1+d2-1)/temp vterms <- (cc*(d1 + d2)/(y1 + y2))[w > 0] weight<-weight[w > 0] w <- w[w > 0] terms <- (weight * w * terms)/sqrt(sum(weight^2 * w * vterms)) temp<-c(0,cumsum(terms)) xs<-max(temp, na.rm = TRUE) # changed from xs<-max(temp) to deal with missing values xi<-min(temp, na.rm = TRUE) # changed from xi<-min(temp) to deal with missing values if(abs(xs)>abs(xi)){test<-xs} else{test<-xi} x <- abs(test) m<-ceiling(max(c(1,(x*sqrt(2)/pi)*sqrt(max(c(1,log(1/(pi*error)))))-0.5))) p<-1-sup.G(x,m=m) out <- NULL out$test <- test out$p <- p if(logrank){ x<-temp[length(temp)] out$test.logrank<-x out$p.logrank<-2*cnorm(abs(x)) } out } KM.left<-function(time,delta){ n<-length(time) dtime<-tapply(time,time,"max") ddelta<-tapply(delta,time,"sum") dy<-tapply(rep(1,n),time,"sum") m<-length(dy) y<-rep(n,m)-c(0,cumsum(dy)[1:(m-1)]) km<-1 km.left<-rep(0,m) for(i in 1:m){ km.left[i]<-km km<-km*(1-ddelta[i]/y[i]) } out<-rep(0,n) for(i in 1:n){ out[i]<-min(km.left[dtime==time[i]]) } out }