/*------------------------------------------------------------------------------------------------* | PROGRAM: cr_npmle_8.sas | AUTHOR: Michael Hudgens | EMAIL: mhudgens@bios.unc.edu | WEB: http://www.bios.unc.edu/~mhudgens/soft.html | DATE: 1 April 2004 | | This is a Proc IML macro to compute nonparametric maximum likelhood estimator | for interval censored competing risks survival data based on | Hudgens, Satten and Longini (Biometrics, March 2001). | Contrary to the paper's EM algorithm, maximization is done via an IML constrained | optimization routine based on Quasi-Newton Methods. Confidence intervals | are computed via bootstrap as described in Hudgens, Satten and Longini. | | Note: 1. present version does not allow for truncation | 2. as in the Biometrics paper, intervals are assumed to be of the form [l, r]. | if intervals of the form (l, r] or [l, r) are desired, it is suggested that a small | number be added or subtracted to either the left or right endpoint, respectively, of all intervals. | | INPUT PARAMETERS, DATA and OUTPUT DATA: | | &data_in should be a matrix with each row corresponding to a | subject and the 3 columns containing variables l r type, which are the left and right interval endpoints | and the failure type. Failures with unknown type (including right censored observations) | should have failure type equal to 0. Other failure types should be numbered 1, 2, ... | up to the total number of failure types present in the data. For right censored observations, | the right endpoint of the interval should be set to a large number exceeding the right endpoint | of all observations that are not right censored. An example data set: | | data zido; | input l r type; | cards; | 1 112 1 | 30 33 1 | 5 255 2 | 320 400 2 | 50 10000 0 | | ×_in is a set of exact time points at which estimates and confidence | intervals are returned | | &boots is the number of bootstrap samples drawn for CI's. | --> Let &boots=0 if no bootstrap samples are desired. | | &est_out is an output data set containing the variables: | FTYPE Q_DATA P_DATA MASS CIF | where FTYPE is the failure type, | [Q_DATA, P_DATA] is an interval potentially containing mass, | MASS is the amount of mass contained in the interval, and | CIF is the cumulative incidence function at the right endpoint. | | ×_out is an output data set containing the variables: | FTYPE TIMES LEST UEST L_CI U_CI SE | where FTYPE is the failure type, | TIMES is one of the specific time points given in ×_in, | LEST is the lower bound on the cumulative incidence function at time point, | UEST is upper bound on time point, | L_CI is the lower 95% confidence interval based on bootstrap iterations, | U_CI is the upper 95% confidence interval based on bootstrap iterations, and | SE is the estimate of the standard error based on bootstrap iterations. | | &boot_out contains each bootstrap iteration's estimate of time points specified | in ×_in. These values are returned primarily for use in the accompanying | macro: cr_net_2.sas. Note there are two values returned for each time point - a lower | and upper bound - see Biometrics paper for details. | --> If the user choses, &boot_out can be left blank to conserve memory. | | An example call to the macro: | | %cr_npmle( data_in=zido, times_in = {48 102 187 366 568 730} , boots=0, | est_out=zest, times_out=ztest, boot_out=) | | | Comment: If largest observation has unknown failure type the NPMLE is non-unique beyond the largest observation time | In this scenario, assign all mass to first cumulative incidence function | | | DISCLAIMER: | | THIS INFORMATION IS PROVIDED PROVIDED "AS IS". THERE ARE NO | WARRANTIES, EXPRESSED OR IMPLIED, AS TO MERCHANTABILITY OR | FITNESS FOR A PARTICULAR PURPOSE REGARDING THE ACCURACY OF | THE MATERIALS OR CODE CONTAINED HEREIN. | | *------------------------------------------------------------------------------------------------*/ %macro cr_npmle(data_in=,times_in=,boots=, est_out=,times_out=,boot_out=); use &data_in; read all var{l r type}; nobs= nrow(l); J=max(type); /******************************************************** module to get support (maximal cliques) ********************************************************/ start mclique(l,r,type) global(_x, nobs, nparm, J, p, q, m); ppp= unique(r); nppp= ncol(ppp); q= j(J,nppp, .); m= j(J,1, .); p= j(J,nppp, .); * is largest observation right censored? if yes, flag=1; * check (1) by looking at largest r from observations with known type; * and (2) by looking at largest l from right censored observations; flag=1; ind=type>0; rmx=max(r[loc(ind=1)]); lmx=max(l); if (rmx>lmx) then flag=0; ind=(l=lmx & type>0); if (sum(ind)>0) then flag=0; if flag=1 then do; print("Largest observation has unknown failure type for dataset: &data_in."); print("All mass assigned to cumulative incidence function for type 1 failure."); print("Subsequent warnings concerning uniqueness are regarding earlier time points."); end; do tt = 1 to J; * get unique r values for type tt or 0 events; ind=(type=tt | type=0); if (flag=1 & tt>1) then ind=((type=tt | type=0) & l npp then nq= npp; else nq= i; m[tt]=ncol(unique(q[tt,1:nq])); q[tt,1:m[tt]] = unique(q[tt,1:nq]); do i= 1 to m[tt]; do jj= npp to 1 by -1; if ( pp[jj] >= q[tt,i] ) then p[tt,i]= pp[jj]; end; end; end; /* tt = 1 to J */ nparm=sum(m); finish mclique; /******************************************************** module to assign _x matrix for likelihood ********************************************************/ start assign_x(l,r,type) global(_x, nobs, nparm, J, m, q, p); _x= j(nobs, nparm, 0); counter=1; do tt = 1 to J; do jj= 1 to m[tt]; _x[,counter]= choose( (l <= q[tt,jj] & p[tt,jj] <= r) & (tt=type | type=0), 1, 0); counter=counter+1; end; end; finish assign_x; /******************************************************** log-likelihood function (LL) ********************************************************/ start LL(theta) global(_x,nparm); f=-1000000; if (min (_x*theta`) >0) then do; xlt= log(_x * theta`); f= xlt[+]; end; return(f); finish LL; /******************************************************** gradient vector (GRAD) *******************************************************/ start GRAD(theta) global(_x,nparm); g= j(1,nparm,1000); if (min (_x*theta`) >0) then do; tmp= _x # (1 / (_x * theta`) ); g= tmp[+,]; end; return(g); finish GRAD; /******************************************************** constraints and initial vector module *******************************************************/ start init; con= j(3, nparm + 2, .); con[1, 1:nparm]= 1.e-8; /* lower bound on parameter ranges */ con[2:3, 1:nparm]= 1; /* upper bound on parameter ranges + part of linear constraint */ con[3,nparm + 1]=0; /* linear constraint is an equality constraint */ con[3,nparm + 2]=1; /* linear constraint equals 1 */ x0= j(1, nparm, 1/nparm); * initial estimates ; finish init; /************************************************************* estimate the masses using quasi-newton technique *************************************************************/ optn= {1 0}; /* options */ tc={400 1000}; call mclique(l,r,type); * get maximal cliques matrix ; call assign_x(l,r,type); * assign x matrix ; call init; call nlpqn(rc,rx,"LL",x0,optn,con,tc,,,"GRAD"); * call the optimization routine ; /************************************************************* check uniqueness of npmle via projected hessian cf. hudgens (emory dissertation, 2000), gentleman and geyer (1994, biometrika), fletcher (1987) lack of uniqueness can only arise from right censored observations ************************************************************/ start unique_pg; * get columns of _x that correspond to possible paths consistent with data; i=loc(rx>(con[1,1]*100)); proj_x=_x[,i]; call svd(u,svdq,v,proj_x); rank_proj_x=nrow(svdq[loc(svdq>1E-14)]); if (rank_proj_x1) then lower[counter,iterate]=sdf[jj,ss-1]; upper[counter,iterate]=sdf[jj,ss]; end; if (p[jj,ss]<=time[1,tt] & time[1,tt]<=q[jj,ss+1]) then do; lower[counter,iterate]=sdf[jj,ss]; upper[counter,iterate]=sdf[jj,ss]; end; end; if (q[jj,m[jj]]0 then do; do h=1 to &boots; index=j(nobs,1,0); do i=1 to nobs; index[i,1]=int(uniform(1)*nobs+1); end; ltemp=l[index]; rtemp=r[index]; typetemp=type[index]; call mclique(ltemp,rtemp,typetemp); * get maximal cliques matrix ; call assign_x(ltemp,rtemp,typetemp); * assign x matrix ; call init; call nlpqn(rc,rx,"LL",x0,optn,con,tc,,,"GRAD"); * call the optimization routine ; call ciest; iterate=h+1; call lowup; end; start QUANT(ans,arg,q); temp=arg; temp[1,rank(arg)]=arg; nq=ncol(arg); qindex=int(q*nq); qmod=mod(q*nq,1); ans=min(temp); if (qindex>0 & qmod=0) then ans=temp[1,qindex]; if (qindex>0 & qmod>0) then ans=(temp[1,qindex]+temp[1,qindex+1])/2; finish QUANT; start STE(ans,arg); nq=ncol(arg); mean=sum(arg)/nq; smean=arg-repeat(mean,1,nq); ss=sum(smean#smean) ; ans=sqrt(ss/(nq-1)); finish STE; do tt=1 to ntimes; do jj=1 to J; counter=tt+ (jj-1)*ntimes; call QUANT(temp,lower[counter,2:(&boots+1)],.025); l_ci[counter,1]=temp; call QUANT(temp,upper[counter,2:(&boots+1)],.975); u_ci[counter,1]=temp; call STE(temp,upper[counter,2:(&boots+1)]); se[counter,1]=temp; end; end; bounds=j(ntimes*J,&boots*2,0); bounds[,1:&boots]=lower[,2:(&boots+1)]; bounds[,(&boots+1):(&boots*2)]=upper[,2:(&boots+1)]; %if "&boot_out" ne "" %then %do; create &boot_out from bounds; append from bounds; close &boot_out; %end; end; /************************************************************** write estimates and bootstrap confidence intervals for timepoints of interest *************************************************************/ lest=lower[,1]; uest=upper[,1]; create ×_out var {ftype times lest uest l_ci u_ci se}; append; close ×_out; %mend cr_npmle;