# Adapted from programs found here https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/ ################################################################ # PROGRAM 13.1 # Estimating the mean outcome within levels of treatment # and confounders: Data from NHEFS ################################################################ # rm(list=ls(all=TRUE)) nhefs <- read.csv("V:\\sismid2020\\nhefs.csv") nhefs <- as.data.frame(nhefs) # restricting data for uncensored, for comparison with observed outcome nhefs$cens <- as.numeric(is.na(nhefs$wt82)) nhefs0 <- subset(nhefs, cens == 0) dim(nhefs0) table(nhefs0$qsmk) # untreated vs treated summary(nhefs0$wt82_71) ################################################################ # PROGRAM 13.3 # Standardizing the mean outcome to the baseline confounders: # Data from NHEFS ################################################################ # 1st copy: equal to original one nhefs0$interv <- rep(-1, nrow(nhefs0)) # 2nd copy: treatment set to 0, outcome to missing nhefs.untr <- nhefs0 nhefs.untr$interv <- rep(0, nrow(nhefs.untr)) nhefs.untr$qsmk <- rep(0, nrow(nhefs.untr)) nhefs.untr$wt82_71 <- rep(NA, nrow(nhefs.untr)) # 3rd copy: treatment set to 1, outcome to missing nhefs.tr <- nhefs0 nhefs.tr$interv <- rep(1, nrow(nhefs.tr)) nhefs.tr$qsmk <- rep(1, nrow(nhefs.tr)) nhefs.tr$wt82_71 <- rep(NA, nrow(nhefs.tr)) # create a dataset with 3 copies of each subject onesample <- as.data.frame(rbind(nhefs0, nhefs.untr, nhefs.tr)) #onesample$qsmk dim(onesample) # Estimates # linear model to estimate mean outcome conditional on treatment & confounders, # parameters are estimated using original observations only (interv= -1), # parameter estimates are used to predict mean outcome for observations # with treatment set to 0 (interv=0) and to 1 (innterv=1); glm.obj <- glm(wt82_71~ as.factor(qsmk) + as.factor(sex) + as.factor(race) + age + I(age^2) + as.factor(education) + smokeintensity + I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) + as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2), data = onesample) summary(glm.obj) #onesample$meanY <- c(predict(glm.obj, nhefs0, type = "response"), # predict(glm.obj, nhefs.untr, type = "response"), # predict(glm.obj, nhefs.tr, type = "response")) onesample$meanY <- predict(glm.obj, onesample, type = "response") # estimate mean outcome in each of the groups interv=0, and interv=1; # this mean outcome is a weighted average of the mean outcomes # in each combination of values of treatment and confounders, # that is, the standardized outcome; #index <- sort(unique(onesample$interv)) #ia <- mean(onesample$meanY[onesample$interv == index[1]]) #ib <- mean(onesample$meanY[onesample$interv == index[2]]) #ic <- mean(onesample$meanY[onesample$interv == index[3]]) #predicted.values <- c(ia, ib, ic) #cbind(index, predicted.values) with(onesample, tapply(meanY, list(interv), mean))