################################################################ # PROGRAM 13.1 # Estimating the mean outcome within levels of treatment # and confounders: Data from NHEFS ################################################################ #install.packages("readxl") # install package if required library("readxl") nhefs <- read_excel("V:/otago/NHEFS.xls") # some preprocessing of the data nhefs$cens <- ifelse(is.na(nhefs$wt82), 1, 0) fit <- glm(wt82_71 ~ qsmk + sex + race + age + I(age*age) + as.factor(education) + smokeintensity + I(smokeintensity*smokeintensity) + smokeyrs + I(smokeyrs*smokeyrs) + as.factor(exercise) + as.factor(active) + wt71 + I(wt71*wt71) + qsmk*smokeintensity, data=nhefs) summary(fit) nhefs$predicted.meanY <- predict(fit, nhefs) nhefs[which(nhefs$seqn==24770), c("predicted.meanY", "qsmk", "sex", "race", "age", "education", "smokeintensity", "smokeyrs", "exercise", "active", "wt71")] summary(nhefs$predicted.meanY[nhefs$cens==0]) summary(nhefs$wt82_71[nhefs$cens==0]) ################################################################ # PROGRAM 13.2 # Standardizing the mean outcome to the baseline confounders # Data from Table 2.2 ################################################################ id <- c("Rheia", "Kronos", "Demeter", "Hades", "Hestia", "Poseidon", "Hera", "Zeus", "Artemis", "Apollo", "Leto", "Ares", "Athena", "Hephaestus", "Aphrodite", "Cyclope", "Persephone", "Hermes", "Hebe", "Dionysus") N <- length(id) L <- c(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) A <- c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) Y <- c(0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0) interv <- rep(-1, N) observed <- cbind(L, A, Y, interv) untreated <- cbind(L, rep(0, N), rep(NA, N), rep(0, N)) treated <- cbind(L, rep(1, N), rep(NA, N), rep(1, N)) table22 <- as.data.frame(rbind(observed, untreated, treated)) table22$id <- rep(id, 3) glm.obj <- glm(Y~ A*L, data = table22) summary(glm.obj) table22$predicted.meanY <- predict(glm.obj, table22) mean(table22$predicted.meanY[table22$interv==-1]) mean(table22$predicted.meanY[table22$interv==0]) mean(table22$predicted.meanY[table22$interv==1]) ################################################################ # PROGRAM 13.3 # Standardizing the mean outcome to the baseline confounders: # Data from NHEFS ################################################################ # create a dataset with 3 copies of each subject nhefs$interv <- -1 # 1st copy: equal to original one interv0 <- nhefs # 2nd copy: treatment set to 0, outcome to missing interv0$interv <- 0 interv0$qsmk <- 0 interv0$wt82_71 <- NA interv1 <- nhefs # 3rd copy: treatment set to 1, outcome to missing interv1$interv <- 1 interv1$qsmk <- 1 interv1$wt82_71 <- NA onesample <- rbind(nhefs, interv0, interv1) # combining datasets # linear model to estimate mean outcome conditional on treatment and confounders # parameters are estimated using original observations only (nhefs) # parameter estimates are used to predict mean outcome for observations with # treatment set to 0 (interv=0) and to 1 (interv=1) std <- glm(wt82_71 ~ qsmk + sex + race + age + I(age*age) + as.factor(education) + smokeintensity + I(smokeintensity*smokeintensity) + smokeyrs + I(smokeyrs*smokeyrs) + as.factor(exercise) + as.factor(active) + wt71 + I(wt71*wt71) + I(qsmk*smokeintensity), data=onesample) summary(std) onesample$predicted_meanY <- predict(std, onesample) # estimate mean outcome in each of the groups interv=0, and interv=1 # this mean outcome is a weighted average of the mean outcomes in each combination # of values of treatment and confounders, that is, the standardized outcome mean(onesample[which(onesample$interv==-1),]$predicted_meanY) mean(onesample[which(onesample$interv==0),]$predicted_meanY) mean(onesample[which(onesample$interv==1),]$predicted_meanY) ################################################################ # PROGRAM 13.4 # Computing the 95% confidence interval of the standardized means # and their difference: Data from NHEFS ################################################################ #install.packages("boot") # install package if required library(boot) # function to calculate difference in means standardization <- function(data, indices) { # create a dataset with 3 copies of each subject d <- data[indices,] # 1st copy: equal to original one` d$interv <- -1 d0 <- d # 2nd copy: treatment set to 0, outcome to missing d0$interv <- 0 d0$qsmk <- 0 d0$wt82_71 <- NA d1 <- d # 3rd copy: treatment set to 1, outcome to missing d1$interv <- 1 d1$qsmk <- 1 d1$wt82_71 <- NA d.onesample <- rbind(d, d0, d1) # combining datasets # linear model to estimate mean outcome conditional on treatment and confounders # parameters are estimated using original observations only (interv= -1) # parameter estimates are used to predict mean outcome for observations with set # treatment (interv=0 and interv=1) fit <- glm(wt82_71 ~ qsmk + sex + race + age + I(age*age) + as.factor(education) + smokeintensity + I(smokeintensity*smokeintensity) + smokeyrs + I(smokeyrs*smokeyrs) + as.factor(exercise) + as.factor(active) + wt71 + I(wt71*wt71), data=d.onesample) d.onesample$predicted_meanY <- predict(fit, d.onesample) # estimate mean outcome in each of the groups interv=-1, interv=0, and interv=1 return(c(mean(d.onesample$predicted_meanY[d.onesample$interv==-1]), mean(d.onesample$predicted_meanY[d.onesample$interv==0]), mean(d.onesample$predicted_meanY[d.onesample$interv==1]), mean(d.onesample$predicted_meanY[d.onesample$interv==1])- mean(d.onesample$predicted_meanY[d.onesample$interv==0]))) } # bootstrap results <- boot(data=nhefs, statistic=standardization, R=5) # generating confidence intervals se <- c(sd(results$t[,1]), sd(results$t[,2]), sd(results$t[,3]), sd(results$t[,4])) mean <- results$t0 ll <- mean - qnorm(0.975)*se ul <- mean + qnorm(0.975)*se bootstrap <- data.frame(cbind(c("Observed", "No Treatment", "Treatment", "Treatment - No Treatment"), mean, se, ll, ul)) bootstrap