*--------------------------------------------------------------------*
| PROGRAM:      npmle_con_9.sas 
| AUTHOR:       Michael Hudgens         
| EMAIL:        mhudgens@bios.unc.edu
| WEB:          http://www.bios.unc.edu/~mhudgens/soft.html         
| DATE:         25 March 2002
|		3 May 2002 	minor comments added
|		7 June 2002	1. convergence criteria changed
|				 - allow for more iterations (400) and function calls (1000)
|				2. freed several matrices to conserve memory
|				3. added nonotes option to suppress iml print out in log file
|		4 Oct 2002	allowed for complete data	
|		18 Aug 2003	changed starting values for profile likelihood base CI
|				bisection search algorithm	
|		27 Aug 2003	increased number of iterations allowed in optimization algorithm
|		1 Apr 2004	posted on UNC website
|
|
| DESCRIPTION: proc iml macro to compute maximum likelhood estimator
|       for path probabilities of repeated binary responses with missing data.
|       in turn, we compute the marginal probability of an event at
|       each time point and the cumulative probability of 1st (and 2nd) event
|       up to each time point. the mle's are computed using a sas optimizer
|       for nonlinear functions subject to linear constraints. confidence
|       intervals are obtained via profile likelihood. an accompanying manuscript
|       can be found in Statistics in Medicine (2003, 22:463-479).
|
| INPUT PARAMETERS, DATA and OUTPUT DATA:
|       &data_in should have a record for each volunteer*day combination
|       for which there is a response. variables must include volunteer, day and response.
|       other variables are permitted, but will be ignored. for example, the following
|       (fictitious) data set would work:
|
|                Obs  volunteer   day    response  rx         PROTOCOL
|
|                1    J21700      40       0       vCP205       999   
|                2    R22511     180       0       vCP205       999   
|                3    V23407      40       0       vCP205       999   
|                4    S24312     180       1       vCP205       999   
|                5    V25919      40       1       vCP205       999   
|                6    S26631     180       1       vCP205       999   
|
|       the mle is then computed for all days present in data set. if there is a day  
|       for which some volunteer does not have a response, it is assumed to be missing.
|               
|       
|       &path_out is output data set containting path probability estimates and confidence intervals
|       the variables are:
|
|               PATH      EST      LCI    UCI
|
|                 
|       &marg_out is output data set containing marginal, cumulative and cumulative
|       repeat probibabilites and CI's. the variables are: 
|
|               DAY MARGINAL LMARG UMARG CUMULATIVE LCUM  UCUM SECOND LCUM2 UCUM2
|
|       &ci indicator variable for whether profile likelihood CI's are computed 
|		use 0 for no CIs
|		use 1 for CIs for marginal and cumulative probabilities only
|		use 2 for CIs for marginal, cumulative and path probabilities (warning: can be slow)
|
| TROUBLESHOOTING:
|	in the event that convergence problems occur, there are a few options readily available.
|
|	1. consider increasing the number of function iterations allowed in the termination criteria.
|	to do this, search "termination criteria" below and increase the values accordingly.
|
|	2. one might also consider changing the optimatization call from "nlpqn" to "nlpcg".
|	to do this, search "nlpqn" below and make appropriate changes.
|
|	before trying either 1 or 2, please make a copy of this macro, leaving the original unchanged.
|
| DISCLAIMER:
| 
|       THIS INFORMATION IS PROVIDED PROVIDED "AS IS".  THERE ARE NO
|       WARRANTIES, EXPRESSED OR IMPLIED, AS TO MERCHANTABILITY OR
|       FITNESS FOR A PARTICULAR PURPOSE REGARDING THE ACCURACY OF
|       THE MATERIALS OR CODE CONTAINED HEREIN.
|
*--------------------------------------------------------------------*;

        
%macro  npmle_con(data_in=,     path_out=,      marg_out=,      ci=);
 option nonotes;

proc sort data=&data_in out=sort1;   by volunteer day;
options nolabel;
proc transpose data=sort1 out=tran1;   by volunteer;   id day;   var response;
data data1; set tran1 (drop= volunteer _NAME_);

proc iml;        

        /******** data manipulation *******/
      use data1;
       read all into temp;
         var_names=contents();
         rdays=t(num(substr(var_names,2)));      
         free var_names;
         r=rank(rdays);
         i=r;
         i[,r]=1:ncol(rdays);
         day=unique(rdays);
      _data=temp[,i];
      free temp;
         
      ntimes= ncol(_data);
      nobs=nrow(_data);
      paths=2##ntimes;
     
      index=(loc(_data^=.));
      mindex=(loc(_data=.));
      temp=nrow(index)*ncol(index);
      if (temp<ntimes*nobs) then _data[mindex]=2;
 

        /********** matrices for computing marginal probabilities *****/
        _miss=j(ntimes,paths,0);
        pathmat=j(paths,ntimes,0);
        
        do ii = 1 to ntimes;
                steps=2##(ii-1);
                len=2##(ntimes-ii);             
                start=1;
                do jj = 1 to steps;
                        index=start:(start+len-1);
                        _miss[ntimes-ii+1,index]=1;
                        start=start+2*len;
                        end;
                end;
                
        do ii = 1 to paths;
                pathmat[ii,]=_miss[ntimes:1,paths-ii+1]`;
                end;
    
       /* for 4 time points, pathmat of the form:     0         0         0         0
                                                      0         0         0         1
                                                      0         0         1         0
                                                      0         0         1         1
                                                      0         1         0         0
                                                      0         1         0         1
                                                      0         1         1         0
                                                      0         1         1         1
                                                      1         0         0         0
                                                      1         0         0         1
                                                      1         0         1         0
                                                      1         0         1         1
                                                      1         1         0         0
                                                      1         1         0         1
                                                      1         1         1         0
                                                      1         1         1         1                     */            
    
    
        /****** index for cumulative probabilities */
       cindex=j(paths,ntimes,1);
       do ii = 1 to ntimes;
          index=1:(paths/(2##ii));
          cindex[index,ii]=0;
          end;  
                                                           
       /* for 4 time points, cindex of the form:
                                                      0         0         0         0
                                                      0         0         0         1
                                                      0         0         1         1
                                                      0         0         1         1
                                                      0         1         1         1
                                                      0         1         1         1
                                                      0         1         1         1
                                                      0         1         1         1
                                                      1         1         1         1
                                                      1         1         1         1
                                                      1         1         1         1
                                                      1         1         1         1
                                                      1         1         1         1
                                                      1         1         1         1
                                                      1         1         1         1
                                                      1         1         1         1                   */


      /****** index for second event cumulative probabilities */
       cindex2=j(paths,ntimes,0);
       do ii = 2 to ntimes;
          temp=pathmat[,1:ii];
          index=unique((temp[,+]>1)#(1:paths)`);
          ntemp=ncol(index);
          cindex2[index[2:ntemp],ii]=1;
          end;                  
       free index temp;   
  /* for 4 time points, cindex of the form:           0         0         0         0
                                                      0         0         0         0
                                                      0         0         0         0
                                                      0         0         0         1
                                                      0         0         0         0
                                                      0         0         0         1
                                                      0         0         1         1
                                                      0         0         1         1
                                                      0         0         0         0
                                                      0         0         0         1
                                                      0         0         1         1
                                                      0         0         1         1
                                                      0         1         1         1
                                                      0         1         1         1
                                                      0         1         1         1
                                                      0         1         1         1                   */
        
                  
     /********************************************************
         module to assign _x matrix
     ********************************************************/
        start assign_x(x_data) global(_x,nobs,ntimes,_miss,paths);      
                _x= j(nobs, paths, 0);
                do ii = 1 to nobs;
                
                        ind=(x_data[ii,]=2);
                        nmiss=sum(ind);                 /* how many missing observations ?      */
                
                        npaths=2##nmiss;
                        counter=j(1,npaths,1);          /* how many possible paths ?            */
                
                        nmisscounter=1;
                        do jj= 1 to ntimes;
                
                                /* if data at time point not missing, easy */
                                if (x_data[ii,ntimes-jj+1]<2) then  
                                        counter[1,]=counter[1,]+2##(jj-1)*x_data[ii,ntimes-jj+1];
                        
                                /* if data missing at time point, use _miss matrix */
                                else do;                                        
                                        counter[1,]=counter[1,] + 2##(jj-1)*_miss[nmisscounter,1:npaths];
                                        nmisscounter=nmisscounter+1;
                                        end;
                                end;
                         _x[ii,counter]=1;      
                        end;
        finish assign_x;
        
      /********************************************************
        log-likelihood function (LL)
      ********************************************************/
      start LL(theta) global(_x,paths);
          f=-1000000;
         if (min (_x*theta`) >0) then do;  
                 xlt= log(_x * theta`);
                f= xlt[+];
                end;
           return(f);
      finish LL;
      
    
      /********************************************************
        gradient vector (GRAD)
      *******************************************************/
      start GRAD(theta) global(_x,paths);
         g= j(1,paths,0);
         tmp= _x # (1 /  (_x * theta`) );
         g= tmp[+,];
         return(g);
      finish GRAD;
       
      /********************************************************
        hessian (HESS)
      *******************************************************/
      start HESS(theta) global(_x,paths);
         g= j(paths,paths,0);
         tmp = diag (-  (1 /  (_x * theta`)##2 ));
         g=_x` * tmp * _x;
         return(g);
      finish HESS;

    
      /*************************************************************
        estimation using sas-iml nonlinear optimizer
      *************************************************************/
      
      optn= j(1,11,.); optn[1]= 1; optn[2]= 0;          * maximization, printing and other options;
      tc={10000 50000};					* termination criteria;
 
      
      con= j(3, paths + 2, .);                          * constraints;
      con[1, 1:paths]= 1.e-8;                           * lower bound on parameter ranges;
      con[2:3, 1:paths]= 1;                             * upper bound on parameter ranges + part of linear constraint;
      con[3,paths + 1]=0;                               * linear constraint is an equality constraint;
      con[3,paths + 2]=1;                               * linear constraint equals 1;

      x0= j(1, paths, 1/paths);                         * initial estimates;
      call assign_x(_data);                             * assign x matrix;
      free _miss _data;
      call nlpqn(rc,rx,"LL",x0,optn,con,tc,,,"GRAD");     * call the optimization routine;
      * call nlpcg(rc,rx,"LL",x0,optn,con,tc,,,"GRAD");   * alternative optimization routine;


      * print rc;                                       * return code concerning convergence;

        npmle=rx;
        marginal=rx*pathmat;                            * calculate marginal probabilities;
        cumulative=rx*cindex;                           * calculate cumulative probabilities;
        second=rx*cindex2;

      /*************************************************************
       check uniqueness of npmle via projected hessian  methods
       cf. gentleman and geyer (1994, biometrika), fletcher (1987)
      ************************************************************/
       start unique_pg;
        
                * get columns of _x that correspond to positive path probability estimates;
                i=loc(rx>(con[1,1]*100));
                proj_x=_x[,i];  
                call svd(u,q,v,proj_x);
                rank_proj_x=nrow(q[loc(q>1E-14)]);
                unique_flag=1;
                if (rank_proj_x<ncol(proj_x)) then do;
                        print "Warning: All nonzero path probability estimates are not unique for dataset: &data_in.";
                        unique_flag=0;
                end;
                free proj_x u q v;
                 
        finish UNIQUE_PG;
        
        call unique_pg; 
        free con;
     
     /*************************************************************
       profile likelihood confidence intervals for path probabilities
      ************************************************************/
      plow=j(1,paths, 0);       pup=j(1,paths, 0);      
    
      if (&ci>1) then do;
      * profile constraints ;
      pcon= j(4, paths + 2, .);         * number of rows equals two more than the number of linear constraints ;
                                        * first two rows are inequality constraints on parameteres eg [0, 1] here ;
                                        * next two rows are equality constraints (row 3 - sum to 1, row 4 - profile constraint);
      pcon[1, 1:paths]= 1.e-8;          * lower bound on parameter ranges ;
      pcon[2, 1:paths]= 1;              * upper bound on parameter ranges  ;
      pcon[3,1:paths]=1;                * coefficients on linear combination of parameters forming linear constraint ;
      pcon[3,paths + 1]=0;              * linear constraint is an equality constraint ;
      pcon[3,paths + 2]=1;              * linear constraint equals 1 ;
      pcon[4,paths + 1]=0;              * linear constraint is an equality constraint ;
                                        * other properties of second equality constraint are specified below in loop ;
      maxl=LL(npmle);
      
      do g = 1 to paths;                  
      
              * pcon below are coefficients on linear combination of parameters forming linear constraint;
              pcon[4,1:paths]=0;
              pcon[4,g]=1;
              mle=rx[1,g];
    
              do i = 1 to 2;            * i indexes upper and lower bound calculations ; 
                 diff=0;
                 if i=1 then do;        lower=mle; upper=0.999999; end;        * bracket the confidence interval endpoint;
                 if i=2 then do;        lower=1e-8; upper=mle; end;
                 eps=upper-lower;
                              
                 do  while(eps>.001);                                                   * bisection method;
 
                        pcon[4,paths + 2]= (upper+lower)/2;                             * midpoint;
    
                        * choose starting values which satisfy profile constraints;
                        mass1=pcon[4,paths+2]/sum(pcon[4,1:paths]);
                        mass2=(1-pcon[4,paths+2])/sum(1-pcon[4,1:paths]);
                        x0=mass1#t(pcon[4,1:paths]) + mass2#t(1-pcon[4,1:paths]);
    
                        call nlpqn(rc1,rx,"LL",x0,optn,pcon,tc,,,"GRAD"); 
                        diff=2*(maxl-LL(rx));   

                        if (i=1) then do;
                                if (diff<3.841) then lower=(upper+lower)/2;
                                if (diff>3.841) then upper=(upper+lower)/2;
                                end;
           
                        if (i=2) then do;
                                if (diff<3.841) then upper=(upper+lower)/2;
                                if (diff>3.841) then lower=(upper+lower)/2;
                                end;
                        eps=upper-lower;
                         
                end;
            
                if (i=1) then pup[1,g]=upper;
                if (i=2) then plow[1,g]=lower;
            
            
            end;
        end;
      end; 
     
       /*************************************************************
       profile likelihood confidence intervals for marginal,] cumulative and cumulative repeat probabilities
      ************************************************************/
        low=j(3,ntimes, 0);     up=j(3,ntimes, 0);

      if (&ci>0) then do;
      * profile constraints ;
      pcon= j(4, paths + 2, .);         * number of rows equals two more than the number of linear constraints ;
                                        * first two rows are inequality constraints on parameteres eg [0, 1] here ;
                                        * next two rows are equality constraints (row 3 - sum to 1, row 4 - profile constraint);
      pcon[1, 1:paths]= 1.e-8;          * lower bound on parameter ranges ;
      pcon[2, 1:paths]= 1;              * upper bound on parameter ranges  ;
      pcon[3,1:paths]=1;                * coefficients on linear combination of parameters forming linear constraint ;
      pcon[3,paths + 1]=0;              * linear constraint is an equality constraint ;
      pcon[3,paths + 2]=1;              * linear constraint equals 1 ;
      pcon[4,paths + 1]=0;              * linear constraint is an equality constraint ;
                                        * other properties of second equality constraint are specified below in loop ;
      maxl=LL(npmle);
      
      do g = 1 to 3;                    * g indexes marginal, cumulative and second ;
        start=1;
        if g>1 then start=2;            * cumulative and second need only be computed after first time point;
        do h = start to ntimes;     
      
                * coefficients on linear combination of parameters forming linear constraint assigned below;
                if (g=1) then do;       pcon[4,1:paths]=t(pathmat[,h]);         mle=marginal[1,h];      end;
                if (g=2) then do;       pcon[4,1:paths]=t(cindex[,h]);          mle=cumulative[1,h];    end;
                if (g=3) then do;       pcon[4,1:paths]=t(cindex2[,h]);         mle=second[1,h];        end;
 
              do i = 1 to 2;            * i indexes upper and lower bound calculations ; 
                 diff=0;
                 if i=1 then do;        lower=mle; upper=0.999999; end;        * bracket the confidence interval endpoint;
                 if i=2 then do;        lower=1e-8; upper=mle; end;
  
                 eps=upper-lower;
                              
                 do  while(eps>.001);                                                   * bisection method;
 
                        pcon[4,paths + 2]= (upper+lower)/2;                             * midpoint;
    
                        * choose starting values which satisfy profile constraints;
                        mass1=pcon[4,paths+2]/sum(pcon[4,1:paths]);
                        mass2=(1-pcon[4,paths+2])/sum(1-pcon[4,1:paths]);
                        x0=mass1#t(pcon[4,1:paths]) + mass2#t(1-pcon[4,1:paths]);
    
                        call nlpqn(rc1,rx,"LL",x0,optn,pcon,tc,,,"GRAD"); 
                        diff=2*(maxl-LL(rx));   

                        if (i=1) then do;
                                if (diff<3.841) then lower=(upper+lower)/2;
                                if (diff>3.841) then upper=(upper+lower)/2;
                                end;
           
                        if (i=2) then do;
                                if (diff<3.841) then upper=(upper+lower)/2;
                                if (diff>3.841) then lower=(upper+lower)/2;
                                end;
                        eps=upper-lower;
                         
                end;
            
                if (i=1) then up[g,h]=upper;
                if (i=2) then low[g,h]=lower;
            
            
            end;
        end;
        end;
     end; 
     
     
        /************************************************************** 
          write to marginal/cumulative output file
        **************************************************************/
   
        lmarg=round(low[1,],.01); umarg=round(up[1,],.01); lcum=round(low[2,],.01); ucum=round(up[2,],.01); lcum2=round(low[3,],.01); ucum2=round(up[3,],.01);  
        marginal=round(marginal,.01);
        cumulative=round(cumulative,.01);
        second=round(second,.01);
    
        create &marg_out var {day marginal lmarg umarg cumulative lcum ucum second lcum2 ucum2};        
                append; close &marg_out;
                
        path=rowcat(char(pathmat,1));   est=round(npmle`,.01);     lci=round(plow`,.01);      uci=round(pup`,.01);

        create &path_out var {path est lci uci};
                append; close &path_out;                
	  
         
option notes;        
%mend  npmle_con;