/*************************************************************** PROGRAM 15.1 Estimating the average causal effect within levels of confounders under the assumption of effect-measure modification by smoking intensity ONLY Data from NHEFS ***************************************************************/ clear use "P:\CIBook\NHEFSdata\nhefs.dta" gen byte cens = (wt82 == .) /* provisionally ignore subjects with missing values*/ *Analysis restricted to N=1566 with no missing values*/ foreach var of varlist wt82 education{ drop if `var'==. } * regression on covariates, allowing for some effect modfication regress wt82_71 c.qsmk##c.smokeintensity c.smokeintensity##c.smokeintensity sex race c.age##c.age /// ib(last).education c.smokeyrs##c.smokeyrs ib(last).exercise ib(last).active c.wt71##c.wt71 /* regression on covaraites, not allowing for effect modification*/ regress wt82_71 qsmk c.smokeintensity##c.smokeintensity sex race c.age##c.age /// ib(last).education c.smokeyrs##c.smokeyrs ib(last).exercise ib(last).active c.wt71##c.wt71 /*************************************************************** PROGRAM 15.2 Estimating and plotting the propensity score Data from NHEFS ***************************************************************/ clear use "P:\CIBook\NHEFSdata\nhefs.dta" gen byte cens = (wt82 == .) /* provisionally ignore subjects with missing values*/ /*Analysis restricted to N=1629 with no missing covariates*/ /*leave missing wt81_72 in the data*/ foreach var of varlist education{ drop if `var'==. } logit qsmk sex race c.age##c.age ib(last).education c.smokeintensity##c.smokeintensity /// c.smokeyrs##c.smokeyrs ib(last).exercise ib(last).active c.wt71##c.wt71 predict ps, pr bys qsmk: summarize ps /*plotting the estimated propensity score*/ histogram ps, width(0.05) start(0.025) frequency fcolor(none) lcolor(black) lpattern(solid) addlabel addlabopts(mlabcolor(black) mlabposition(12) mlabangle(zero)) /// ytitle(No. Subjects) ylabel(#4) xtitle(Estimated Propensity Score) xlabel(#15) /// by(, title(Estimated Propensity Score Distribution) subtitle(By Quit Smoking Status)) /// by(, legend(off)) by(qsmk, style(compact) colfirst) subtitle(, size(small) box bexpand) /*************************************************************************** PROGRAM 15.3 Stratification and outcome regression using deciles of the propensity score Data from NHEFS ***************************************************************************/ /*calculation of deciles of ps*/ xtile ps_dec = ps, nq(10) by ps_dec qsmk, sort: summarize ps /*stratification on PS deciles, allowing for effect modification*/ by ps_dec: ttest wt82_71, by(qsmk) /*regression on PS deciles, not allowing for effect modifciation*/ regress wt82_71 qsmk ib(last).ps_dec /*************************************************************** PROGRAM 15.4 Standardization and outcome regression using the propensity score Data from NHEFS ***************************************************************/ clear use "P:\CIBook\NHEFSdata\nhefs.dta" gen byte cens = (wt82 == .) /*only discard individuals with missing covariates for PS estimation*/ drop if education==. logit qsmk sex race c.age##c.age ib(last).education c.smokeintensity##c.smokeintensity /// c.smokeyrs##c.smokeyrs ib(last).exercise ib(last).active c.wt71##c.wt71 predict ps, pr gen meanY_b = . save nhefs_ps /*regression on the propensity score, not allowing for effect modification*/ regress wt82_71 qsmk ps /*standardization by propensity score, agnostic regarding effect modification*/ gen copy1 = 2 expand copy1, generate(interv) replace interv = -1 if interv == 0 replace interv = 0 if interv == 1 replace copy1 = 1 if interv == 0 expand copy1, generate(interv2) replace interv = 1 if interv2 ==1 drop interv2 drop copy1 tab interv replace wt82_71 = . if interv != -1 replace qsmk = 0 if interv == 0 replace qsmk = 1 if interv == 1 by interv, sort: summarize qsmk /*estimation in original sample*/ quietly regress wt82_71 qsmk ps predict predY by interv, sort: summarize predY /*store original sample results in a matrix for later comparison*/ gen meanY=. summarize predY if(interv == -1) replace meanY = r(mean) if(interv==-1) summarize meanY if(interv==-1) matrix input observe = (-1,`r(mean)') summarize predY if(interv == 0) replace meanY = r(mean) if(interv==0) summarize meanY if(interv==0) matrix observe = (observe \0,`r(mean)') summarize predY if(interv == 1) replace meanY = r(mean) if(interv==1) summarize meanY if(interv==1) matrix observe = (observe \1,`r(mean)') /*check that matrix is correct*/ matrix list observe by interv, sort: summarize predY program drop bootstdz /*bootstrap program*/ capture program drop bootstdz program define bootstdz, rclass u nhefs_ps, clear preserve /*draw bootstrap sample from original observations*/ bsample /*create copies with each value of qsmk in bootstrap sample*/ gen copy1 = 2 expand copy1, generate(interv) replace interv = -1 if interv == 0 replace interv = 0 if interv == 1 replace copy1 = 1 if interv == 0 expand copy1, generate(interv2) replace interv = 1 if interv2 ==1 drop interv2 drop copy1 replace wt82_71 = . if interv != -1 replace qsmk = 0 if interv == 0 replace qsmk = 1 if interv == 1 tab interv /*run regression*/ regress wt82_71 qsmk ps predict predY_b summarize predY_b if(interv == -1) replace meanY_b = r(mean) if(interv==-1) summarize meanY_b if(interv==-1) return scalar boot_obs = r(mean) summarize predY_b if(interv == 0) replace meanY_b = r(mean) if(interv==0) summarize meanY_b if(interv==0) return scalar boot_0 = r(mean) summarize predY_b if(interv == 1) replace meanY_b = r(mean) if(interv==1) summarize meanY_b if(interv==1) return scalar boot_1 = r(mean) restore end simulate boot_obs=r(boot_obs), reps(500) seed(1): bootstdz gen interv = -1 rename boot_obs meanY_b save boot_obs simulate boot_0=r(boot_0), reps(500) seed(1): bootstdz gen interv = 0 rename boot_0 meanY_b save boot_0 simulate boot_1=r(boot_1), reps(500) seed(1): bootstdz gen interv = 1 rename boot_1 meanY_b save boot_1 use boot_obs append using boot_0 append using boot_1 gen std=. summarize meanY_b if(interv == -1) replace std = r(sd) if(interv==-1) matrix input boot_std = (-1,`r(sd)') summarize meanY_b if(interv == 0) replace std = r(sd) if(interv==0) matrix boot_std= (boot_std \0,`r(sd)') summarize meanY_b if(interv == 1) replace std = r(sd) if(interv==1) matrix boot_std = (boot_std \1,`r(sd)') matrix list boot_std matrix list observe matrix lb = J(3,2,0) matrix list lb matrix ub = J(3,2,0) matrix list ub forvalues i = 1/3{ matrix lb[`i',2] = (observe[`i',2]-1.96*boot_std[`i',2]) matrix lb[`i',1] = observe[`i',1] matrix ub[`i',2] = (observe[`i',2]+1.96*boot_std[`i',2]) matrix ub[`i',1] = observe[`i',1] } matrix list lb matrix list ub matrix lb = lb[1...,2] matrix ub = ub[1...,2] matrix results=observe,lb,ub matrix list results