###################################################################################### # # A sample script for the calculation of beta estimates and their variance estimates # ###################################################################################### # Some modification on the data set is needed before the calculation. # 1. Set delta (failure indicator) = 0 for nonsubcohort cases who are not sampled (weight=0) # : This modified failure indicator will be used in place of delta in coxph function # # 2. Set time (observed time) = 0 for nonsubcohort cases who are not sampled (weight=0) # : The data set needs to be complete, i.e., K observations per subject even if # some of them are not sampled # # 3. Assign arbitrary positive values to weights where weight = 0 # : weights option in coxph does not work if weight=0. This modification shouldn't # make any difference if we do step 1 and 2 with step 3. # # 4. Creating a data set for the analysis which is composed only of sampled individuals # Data set modification # 1. modified failure indicator: delta = 0 if delta=1 but not sampled (weight=0) DELTA.MOD <- data.new$DELTA - (data.new$DELTA==1 & WEIGHT==0) # 2. add the modified failure indicator to the existing data set data.new2 <- cbind(data.new,DELTA.MOD) # 3. modified time: time = 0 if weight = 0 data.new2$TIME <- data.new2$TIME*(WEIGHT>0) # 4. modified weight: make weights greater than zero (arbitrary weights: 2 in this case) data.new2$WEIGHT <- data.new2$WEIGHT + 2*(WEIGHT==0) # 5. Creating a data set for analysis: only with sampled individuals # SAMCASE_ALL: indicator whether a SUBJECT is sampled data.new5 <- data.new2[data.new2$SUBCOH_IND==1 | data.new2$SAMCASE_ALL > 0,] # beta estimation: WLW method with weights option # Note: Modified failure indicator (DELTA.MOD) was used in place of DELTA coxfit<-coxph(Surv(TIME,DELTA.MOD)~Z+cluster(ID)+strata(MEMBER),weights=WEIGHT,data=data.new5) # calculation of dfbeta for variance estimates dfb<-as.matrix(resid(coxfit,type='dfbeta')) # for convenience attach(data.new5) # initialization of sums of dfbetas which will be used for each component of variance estimates dfb.sum1 <- matrix(rep(0,(sum(SUBCOH_IND==1)/n.strata)*p),ncol=p) dfb.sum2 <- matrix(rep(0,(sum(SUBCOH_IND==1)/n.strata)*p),ncol=p) var3.temp <- matrix(rep(0,p^2),ncol=p) for (i in 1:n.strata){ # extracting row numbers of subcohort members in ith strata sub.ind.st <- (1:length(TIME))[MEMBER==i & SUBCOH_IND==1] # extracting row numbers of sampled non-subcohort cases in ith strata nsub.ind.case.st <- (1:length(TIME))[MEMBER==i & CASE_IND==1] # sum of dfbetas for the first variance component dfb.sum1 <- dfb.sum1 + (DELTA[sub.ind.st]+alpha.scnc*(1-DELTA[sub.ind.st]))*dfb[sub.ind.st,] # sum of dfbetas for the second variance component dfb.sum2 <- dfb.sum2 + (1-DELTA[sub.ind.st])*dfb[sub.ind.st,] # calculation of the third variance component var3.temp <- var3.temp + n.case[i]*(1-alpha.nsc[i])*alpha.nsc[i]*var(dfb[nsub.ind.case.st,]) } var1 <- (1/alpha.scnc)*t(dfb.sum1)%*%dfb.sum1 # first variance component var2 <- n.subcohort*(1 - alpha.scnc)*var(dfb.sum2) # second variance component var3 <- (1 - alpha.scnc)*var3.temp # third variance component var.temp <- (var1 + var2 + var3) # variance estimates