# Adapted from programs found here https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/ ################################################################ # PROGRAM 14.1 # Preprocessing, ranks of extreme observations, IP weights for censoring # Data from NHEFS ################################################################ # rm(list=ls(all=TRUE)) setwd("V:\\sismid2020") # contains SAS/CSV datafile nhefs <- read.csv("nhefs.csv") nhefs <- as.data.frame(nhefs) dim(nhefs) # some preprocessing of the data nhefs$id <- 1:nrow(nhefs) nhefs$cens <- as.numeric(is.na(nhefs$wt82)) # restricting data for uncensored, for comparison with observed outcome nhefs0 <- subset(nhefs, cens == 0) dim(nhefs0) # estimation of denominator of ip weights for C cens.prob.est <- glm(as.factor(cens)~ as.factor(qsmk) + as.factor(sex) + as.factor(race) + age + I(age^2) + as.factor(education) + smokeintensity + I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) + as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2), data = nhefs, family = binomial("logit")) # summary(cens.prob.est) nhefs0$w.cens <- 1/(1-predict(cens.prob.est, nhefs0, type = "response")) ################################################################## # PROGRAM 14.2 # G-estimation of a 1-parameter structural nested mean model # Brute force search # Data from NHEFS ################################################################## #install.packages("geepack") require(geepack) nhefs.g.est <- nhefs0 ################################################################## # G-estimation: Checking multiple possible values of psi*/ ################################################################## data <- nhefs.g.est grid <- seq(from = 2,to = 5, by = 0.1) # set by = 0.001 for finer estimate j = 0 store.Hpsi.coefs <- double(length(grid)) for (i in grid){ psi = i j = j+1 data$Hpsi <- data$wt82_71 - psi * data$qsmk gee.obj <- geeglm(qsmk ~ as.factor(sex) + as.factor(race) + age + I(age^2) + as.factor(education) + smokeintensity + I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) + as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2)+Hpsi, data = data, weight = w.cens, id=id, corstr="independence", family = binomial(logit)) store.Hpsi.coefs[j] <- coef(gee.obj)["Hpsi"] cat("Iteration", j, "completed\n") } store.results <- as.data.frame(cbind(grid, abs(store.Hpsi.coefs))) names(store.results) names(store.results) <- c("grid", "Hpsi.est") store.results[store.results$Hpsi.est == min(store.results$Hpsi.est),] # for 0.001 interval: # grid Hpsi.est #1447 3.446 1.903e-06 ################################################################## # G-estimation: Checking single possible values of psi*/ ################################################################## nhefs.g.est$psi <- 3.446 nhefs.g.est$Hpsi <- nhefs.g.est$wt82_71 - nhefs.g.est$psi * nhefs.g.est$qsmk gee.obj <- geeglm(qsmk ~ as.factor(sex) + as.factor(race) + age + I(age^2) + as.factor(education) + smokeintensity + I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) + as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2)+Hpsi, data = nhefs.g.est, weight = w.cens, id=id, corstr="independence", family = binomial(logit)) summary(gee.obj) coef(gee.obj)["Hpsi"] # Hpsi #-1.9e-06