%MACRO REDMON( DATA = _LAST_, /* NAME OF SAS DATASET */ X = X, /* NAME OF PREDICTOR VARIABLE */ Y = Y, /* NAME OF RESPONSE VARIABLE */ WEIGHT =, /* OPTIONAL WEIGHT VARIABLE */ METHOD =BEST, /* SPECIFY directn OF TREND */ XMODE =, /* SPECIFY MAXIMUM OR MINIMUM FOR UMBRELLA FIT */ ALPHA =, /* TARGET OVERALL TYPE-I ERROR */ SLS =, /* BACKWARD ELIMINATION SIGNIFICANCE LEVEL TO STAY */ PLOT =, /* REQUEST HIGH RESOLUTION PLOTS */ TEMPCHAR=__, /* FIRST TWO CHARACTERS OF OUTPUT DATA SETS */ MINSETS =1, /* MINIMUM NUMBER OF LEVEL SETS DESIRED */ MAXSETS =999999999, /* MAXIMUM NUMBER OF LEVEL SETS DESIRED */ EBAR =CHOOSE, /* EXACT PVALUE IF N<200, APPROXIMATE OTHERWISE */ NITER =2000, /* NUMBER OF ITERATIONS FOR PIECEWISE-LINEAR ESTIMATE */ TWOPHASE=YES, /* REQUEST TWO-PHASE LINEAR FIT */ CLEANUP =NO, /* REMOVE OUTPUT DATASETS */ DETAILS =NO ); /* SAS MACRO FOR REDUCED MONOTONIC REGRESSION */ /* version 2.0a last modified 7/29/01 */ ************************************************************************************; %local error; %let error=0; %if %length(&X)=0 %then %do; %put E R R O R : Independent variable(s) must be specified.; %let error=1; %end; %if %length(&Y) = 0 %then %do; %put E R R O R : Dependent variable must be specified.; %let error=1; %end; %let METHOD=%upcase(&Method); %if not %eval( (%upcase(&Method) eq BEST) or (%upcase(&Method) eq UP) or (%upcase(&Method) eq UPDOWN) or (%upcase(&Method) eq DOWNUP) or (%upcase(&Method) eq RAWDATA) or (%upcase(&Method) eq DOWN)) %then %do; %put E R R O R: Invalid Method (&Method); %let error=1; %end; proc print data=&data(obs=0); var &X &Y &WEIGHT; run; %if &syserr ne 0 %then %let error=1; %if ((%datatyp(&minsets)=CHAR)) %then %do; %put E R R O R: MINSETS= should be numeric; %let error=1; %end; %if ((%datatyp(&maxsets)=CHAR)) %then %do; %put E R R O R: MAXSETS= should be numeric; %let error=1; %end; %if (&minsets gt &maxsets) %then %do; %put E R R O R: MINSETS should not be greater than MAXSETS; %let error=1; %end; %if (%length(&alpha)>0) %then %do; %if (%datatyp(&alpha)=CHAR) %then %do; %put E R R O R: ALPHA= should be numeric; %let error=1; %end; %IF(%sysevalf(&alpha>1) or %sysevalf(&alpha<0)) %THEN %DO; %put E R R O R: ALPHA should be between 0 and 1; %let error=1; %END; %end; %if (%length(&sls)>0) %then %do; %if (%datatyp(&sls)=CHAR) %then %do; %put E R R O R: SLS= should be numeric; %let error=1; %end; %IF(%sysevalf(&sls>1) or %sysevalf(&sls<0)) %THEN %DO; %put E R R O R: SLS should be between 0 and 1; %let error=1; %END; %end; %if &error=1 %then %goto theend; %if ((&alpha ne) and (&sls ne)) %then %do; %put; %put W A R N I N G: ALPHA= and SLS= should not both be specified. ;; %put . . . . . . .: ALPHA= will be ignored.;; %end; %if (&alpha eq) %then %let alpha=.05; %if (&sls ne) %then %put N O T E: SLS= &sls has been requested by the user.; %if (&alpha gt .2) %then %put W A R N I N G: ALPHA=&alpha > .2 ==> Formula used to determine SLS may not be accurate.; %if (&alpha lt .01) %then %put W A R N I N G: ALPHA=&alpha < .01 ==> Formula used to determine SLS may not be accurate.; %let version=REDMON 2.0; ************************************************************************************; ****************************CALCULATE LOESS FIT*************************************; ods listing close; ods output fitsummary=&tempchar.fit(where= (label1 in('Residual Sum of Squares','Equivalent Number of Parameters'))); proc loess data=&data ; model &y=&x /dfmethod=exact degree=2; %if (&weight ne) %then %do; weight &weight;where &weight>0;;%end;run; proc transpose data=&tempchar.fit(keep=nvalue1) out=&tempchar.loess(drop=_name_);run; ods listing; data &tempchar.data0; set &data; __sort__=_N_;run; PROC IML; START GETSLS2(METHOD,N,ALPHA); **********************************; ********** IF N > 10 *************; **********************************; IF ALPHA=0 then return(0); IF METHOD="ISO" THEN DO; CUT1=.09; CUT2=.35; CUT3=.85;END; IF METHOD="MONO" THEN DO; CUT1=.10; CUT2=.35; CUT3=.90;END; c1 =log10(cut1);c2=log10(cut2);c3=log10(cut3); IPARMS = {0.274183629 -0.61339802 0.0719624247 1.2611012317 0.0317470792 0.003155934 0.0732998728 -0.027452909 0.0076558526 0.1631296174 0.0217868174 20.988615482 0.004849829 0.9057589487 17.045849192 -0.004498265 -0.139142841 -1.445676052}`; MPARMS = { -0.204249698 -0.541116967 0.0518902964 1.0750432418 0.163572902 -0.029442796 0.0331070031 0.0045048746 0.0001108053 0.0783485229 0.0485448463 -2.551458784 -0.004644009 0.6984586961 127.14358373 0.010996428 -0.105769239 -23.01426022}`; *** CREATE REGRESSION VARIABLES *** CREATE CORRECT VARIABLES; INT=1;LOGN=LOG10(N);LOGN2=LOGN**2; LNX = LOG10(ALPHA);LOGNLNX = LOGN*LNX;LOGN2LNX=LOGN2*LNX; LNX2 = LNX**2; LOGNLNX2=LOGN*LNX2; LNN2LNX2=LOGN2*LNX2; S1=MAX(0,lnx-c1)**2;S2=MAX(0,lnx-c2)**2;S3=MAX(0,lnx-c3)**2; LOGNS1=LOGN*S1;LOGNS2=LOGN*S2;LOGNS3=LOGN*S3; LOGN2S1=LOGN2*S1;LOGN2S2=LOGN2*S2;LOGN2S3=LOGN2*S3; X=INT||LOGN||LOGN2||LNX||LOGNLNX||LOGN2LNX||LNX2||LOGNLNX2||LNN2LNX2||S1||S2||S3||LOGNS1||LOGNS2||LOGNS3||LOGN2S1||LOGN2S2||LOGN2S3; IF METHOD="ISO" THEN DO; **** ISOTONIC QUANTILE ****; IF ALPHA >= 1-1/N THEN ASTAR = 1; ELSE IF (N<=50) THEN DO; IF (ALPHA <= 1/exp(lgamma(N+1))) THEN ASTAR = 0; ELSE ASTAR=min(1,10**(X*IPARMS)); END; ELSE ASTAR = min(1,10**(X*IPARMS)); END; *** END IF METHOD="ISO" ***; IF METHOD="MONO" THEN DO; *** MONOTONIC QUANTILE ***; IF (N<=50) THEN DO; IF (ALPHA <= 2/exp(lgamma(N+1))) THEN ASTAR = 0; ELSE ASTAR=min(1,10**(X*MPARMS)); END; ELSE ASTAR = min(1,10**(X*MPARMS)); END; *** END IF METHOD ="MONO"; RETURN(ASTAR); FINISH; ***********************************************************************************; ***********************************************************************************; START GETSLS1(METHOD,N,ALPHA); **********************************; *********** IF N < 10 ************; **********************************; IF N < 11 THEN DO; iparms={ -0.333333333 2 0 0 0 0, -0.032505144 0.7289546211 1.1158420043 -0.115954327 -0.086689866 -2.51114634, -0.00340173 0.4358599033 1.0985380616 -0.059505263 -0.034615598 2.2949399171 , -0.000071855 0.3383265822 1.0315538836 -0.011586897 -0.056733998 3.282091571, 0.000158649 0.2983828763 0.9817900899 0.00037095 -0.076633943 3.7080799225 , 0.0001194891 0.2687178021 0.9362563641 0.0067186214 -0.086020047 3.8065494943, 0.0001357423 0.2441620358 0.9011831041 0.0142412769 -0.095635737 3.9048289996, 0.0000446032 0.2297589914 0.8665104196 0.0134899479 -0.097464078 4.009904154}; mparms={ -0.333333333 1 0 0 0 0 , -0.037869437 0.4284554817 0.2390149202 0 -0.131138732 0.459634995, -0.00388654 0.2331141885 0.2467902018 -0.015020859 -0.02357981 0.612502056, -0.000407838 0.1819129037 0.2179957771 -0.009553287 -0.000383983 0.831936387, -0.000014936 0.1558718227 0.2011540262 -0.000545641 -0.000516251 0.9467970359, -0.000012193 0.1393318743 0.1908597483 0.0044980836 -0.003093665 1.026669995, -0.000038927 0.1283303159 0.1828617837 0.0046909015 -0.003256167 1.0745714796, -0.000082433 0.1195370928 0.1797960651 0.0056329456 -0.006357914 1.087573025}; c1=.05; c2=.10; c3=.70; INT=1; X=ALPHA; X2=X**2;s1=MAX(0,X-c1);S2=MAX(0,X-c2);S3=MAX(0,X-c3); XX=INT||X||X2||S1||S2||S3; *** ISOTONIC QUANTILE ***; IF ALPHA<=1/exp(lgamma(N+1)) THEN ISLS=0; ELSE IF ALPHA>=1-1/N THEN ISLS = 1; ELSE ISLS=min(1,XX*t(IPARMS[N-2,])); *** MONOTONIC QUANTILE ***; IF ALPHA<=2/exp(lgamma(N+1)) THEN MSLS=0; ELSE MSLS=min(1,XX*t(MPARMS[N-2,])); IF METHOD="ISO" THEN RETURN(ISLS); IF METHOD="MONO" THEN RETURN(MSLS); END; ******** END IF N < 11 ***********; FINISH; ***********************************************************************************; **********************************************************************************; START PADJUST1(METHOD,N,PRAW); **********************************; *********** IF N < 10 ************; **********************************; IF N < 11 THEN DO; iparms={ -0.333333333 2.0000000000 0.0000000000 0 0 0, -0.032505144 0.7289546211 1.1158420043 -0.115954327 -0.086689866 -2.51114634, -0.00340173 0.4358599033 1.0985380616 -0.059505263 -0.034615598 2.2949399171 , -0.000071855 0.3383265822 1.0315538836 -0.011586897 -0.056733998 3.282091571, 0.000158649 0.2983828763 0.9817900899 0.00037095 -0.076633943 3.7080799225 , 0.0001194891 0.2687178021 0.9362563641 0.0067186214 -0.086020047 3.8065494943, 0.0001357423 0.2441620358 0.9011831041 0.0142412769 -0.095635737 3.9048289996, 0.0000446032 0.2297589914 0.8665104196 0.0134899479 -0.097464078 4.009904154}; mparms={ -0.333333333 1.0000000000 0.0000000000 0 0 0.000000000, -0.037869437 0.4284554817 0.2390149202 0 -0.131138732 0.459634995, -0.00388654 0.2331141885 0.2467902018 -0.015020859 -0.02357981 0.612502056, -0.000407838 0.1819129037 0.2179957771 -0.009553287 -0.000383983 0.831936387, -0.000014936 0.1558718227 0.2011540262 -0.000545641 -0.000516251 0.9467970359, -0.000012193 0.1393318743 0.1908597483 0.0044980836 -0.003093665 1.026669995, -0.000038927 0.1283303159 0.1828617837 0.0046909015 -0.003256167 1.0745714796, -0.000082433 0.1195370928 0.1797960651 0.0056329456 -0.006357914 1.087573025}; c1=.05; c2=.10; c3=.70; IF PRAW=. THEN RETURN(.); IF PRAW=1 THEN RETURN(1); IF PRAW=0 THEN DO; IF METHOD="ISO" THEN RETURN (1/exp(lgamma(N+1))); IF METHOD="MONO" THEN RETURN (2/exp(lgamma(N+1)));END; IF METHOD="ISO" THEN PARMS=IPARMS[N-2,]; ELSE IF METHOD="MONO" THEN PARMS=MPARMS[N-2,]; b0=parms[1];b1=parms[2];b2=parms[3]; IF praw>(b0+b1*c1+b2*c1**2) then do; b0=b0-parms[4]*c1;b1=b1+parms[4];end; IF praw>(b0+b1*c2+b2*c2**2) then do; b0=b0-parms[5]*c2;b1=b1+parms[5];end; IF praw>(b0+b1*c3+b2*c3**2) then do; b0=b0+parms[6]*c3**2;b1=b1-2*c3*parms[6];b2=b2+parms[6];end; if b2=0 then x = (praw-b0)/b1; else x = 1/b2*(-b1+sqrt(b1**2+4*b2*praw-4*b2*b0))/2; *return(max(praw,min(1,x))); if method="ISO" then return(max(min(1,x),1/exp(lgamma(N+1)))); if method="MONO" then return(max(min(1,x),2/exp(lgamma(N+1)))); END; ******** END IF N < 11 ***********; **********************************; ********** IF N > 10 *************; **********************************; FINISH; ***********************************************************************************; ***********************************************************************************; ***********************************************************************************; **********************************************************************************; START PADJUST2(METHOD,N,PRAW); **********************************; ********** IF N > 10 *************; **********************************; if praw=. then return(.); if praw=1 then return(1); else if praw=0 then do; if N>50 then return(0); else if method="ISO" then return(1/exp(lgamma(N+1))); else if method="MONO" then return(2/exp(lgamma(N+1))); end; else do; IF METHOD="ISO" THEN DO; CUT1=.09; CUT2=.35; CUT3=.85;END; IF METHOD="MONO" THEN DO; CUT1=.10; CUT2=.35; CUT3=.90;END; c1 =log10(cut1);c2=log10(cut2);c3=log10(cut3); cuts=c1||c2||c3; logn=log10(N);logn2=logn**2; IPARMS = {0.274183629 -0.61339802 0.0719624247 1.2611012317 0.0317470792 0.003155934 0.0732998728 -0.027452909 0.0076558526 0.1631296174 0.0217868174 20.988615482 0.004849829 0.9057589487 17.045849192 -0.004498265 -0.139142841 -1.445676052}`; MPARMS = { -0.204249698 -0.541116967 0.0518902964 1.0750432418 0.163572902 -0.029442796 0.0331070031 0.0045048746 0.0001108053 0.0783485229 0.0485448463 -2.551458784 -0.004644009 0.6984586961 127.14358373 0.010996428 -0.105769239 -23.01426022}`; IF METHOD = "ISO" THEN B = IPARMS; IF METHOD = "MONO" THEN B = MPARMS; lny=log10(praw); b0 = b[1]+b[2]*logn+b[3]*logn2; b1 = b[4]+b[5]*logn+b[6]*logn2; b2 = b[7]+b[8]*logn+b[9]*logn2; do i = 1 to 3; lncut=(cuts[i]); if(lny>b0+b1*lncut+b2*lncut**2) then do; incrmnt = b[10+i-1]+b[10+3+i-1]*logn+b[10+2*3+i-1]*logn2; b0=b0+incrmnt*lncut**2;b1=b1-incrmnt*2*lncut; b2=b2+incrmnt; end;end; arg=b1**2+4*b2*lny-4*b2*b0; if arg<0 then padj=10**(-b1/2/b2); else do; lnx = 1/b2*(-b1+sqrt(b1**2+4*b2*lny-4*b2*b0))/2; padj = min(1,10**lnx); end; return(max(padj,praw)); end; FINISH; ***********************************************************************************; ***********************************************************************************; ***********************************************************************************; ***********************************************************************************; START PAVA(y_in,w_in,directn,y_out1,groupid,y_out2,w_out2,frstout,lastout,sse,kpava); /* PERFORMS ISOTONIC REGRESSION ON y_in, CALCULATES SSE */ /* y_in ==> vector of independent variables, sorted */ /* w_in ==> vector of weights */ /* DIRECTN ==> "UP" or "DOWN" for isotonic or antitonic */ /* y_out1 ==> vector of fitted values (same length as y_in) */ /* GROUPID ==> vector of group id's (same length as y_in) */ /* y_out2 ==> vector of unique fitted values (length = #pava sets)*/ /* w_out2 ==> vector of weights (length = #pava sets) */ /* frstout ==> index of first member of group (length=#pava sets) */ /* lastout ==> index of last member of group (length=#pava sets) */ /* sse ==> sum(w_in#(y_in-y_out1)##2) */ /* kpava ==> # pava sets */ y=y_in; w=w_in; if directn = "DOWN" then y=-y; n=nrow(y);kt=t(1:n); if n=1 then goto end; do while (1); same=1; do i = 2 to n; if (kt[i] ^= kt[i-1]) then if y[i-1]>=y[i] then do; if y[i-1]>y[i] then same=0; k1=kt[i]; k2=kt[i-1]; do j=1 to n; if kt[j]=k1 then kt[j]=k2; end; wnew=w[i-1]+w[i]; ynew=(w[i-1]*y[i-1]+w[i]*y[i])/wnew; do j=1 to n; if kt[j]=k2 then do; y[j]=ynew;w[j]=wnew;end; end; *same=0; end; end; if same then goto end; end; end: if directn="DOWN" then y=-y; y_out1=y; sse=sum(w_in#(y_in-y_out1)##2); index = t(1:N); IFIRST=J(N,1,0); ILAST = J(N,1,0); IFIRST[1]=1; ILAST[N]=1; groupid=J(N,1,.); groupid[1]=1;mylevel=1; do i = 1 to (N-1); if y[i] ^= y[i+1] then do; ilast[i]=1; ifirst[i+1]=1; mylevel=mylevel+1;end; groupid[i+1]=mylevel; end; y_out2=y[loc(ifirst)]; w_out2=w[loc(ifirst)]; frstout=index[loc(ifirst)]; lastout =index[loc(ilast) ]; kpava=nrow(y_out2); finish; ***********************************************************************************; ***********************************************************************************; start reduce(y_in,w_in,last_in,sse_in,n_in,sls,kmin,kmax,yhat,groupid, y_out,w_out, frstout,lastout,sse_out,kred,f_out,p_out,p2lev,details,history); /* PERFORMS BACKWARD ELIMINATION BY AVERAGING ADJACENT MEANS */ /* y_in ==> vector of sample means */ /* w_in ==> vector of weights */ /* last_in ==> index of last subject in each group */ /* sse_in ==> within-group sse (weighted) i.e. estimate of variance */ /* n_in ==> total number of observations in all k groups */ /* sls ==> selection level to stay */ /* kmin ==> stop backward elimination to keep #groups >= kmin */ /* kmax ==> continue backward elimination until #groups <= kmax */ /* yhat ==> vector of fitted values (same length as y_in) */ /* groupid ==> vector of group id's (same length as y_in) */ /* y_out ==> vector of unique fitted values (length = #reduced sets) */ /* w_out ==> vector of weights (length = #reduced sets) */ /* frstout ==> index of first member of each reduced group */ /* lastout ==> index of last member of group (based on last_in) */ /* sse_out ==> weighted sse between original data and reduced model */ /* kred ==> number of reduced sets */ /* f_out ==> ftests for each group remaining in the model */ /* p_out ==> pvalues for each group remaining in the model */ /* p2lev ==> pvalue of last two remaining level sets (missing if kred>2) */ p2lev=1; k=nrow(y_in); sse=sse_in; mn=y_in; w=w_in; n=n_in; w2=J(k,1,.);mn2=w2;ss2=w2;ssh=w2; history={"HISTORY NOT REQUESTED"};; if (k=1) then do; p2lev=1; history={"ONLY 1 LEVEL SET IN ORIGINAL REGRESSION"};goto end; end; if (sse_in = 0) then do; p2lev=0; history={"PERFECT FIT TO DATA (SSE=0), NO ELIMINATION POSSIBLE"}; goto end; end; if (details=1) then do; history=J(k,k,.); rownames=rowcatc(J(k,1,"levset_")||char(t(1:k))||J(k,1,",")||char(t(2:(k+1)))); colnames=rowcatc(J(k,1,"iteration_")||char(t(1:k))); mattrib history rowname=rownames colname=colnames; end; PREV=t({.}||(1:(k-1))); NEXT=t((2:k)||{.}); *** GET INITITAL SSH- AND P- VALUES ***; do i=1 to (k-1); w2[i]= w[i]+w[i+1]; mn2[i]=(w[i]*mn[i]+w[i+1]*mn[i+1])/w2[i]; ssh[i]=w[i]*(mn[i]-mn2[i])**2+w[i+1]*(mn[i+1]-mn2[i])**2; end; iii=1; if(details=1)then history[,iii]=1-probf(SSH/(SSE/(N-k)),1,N-k); E=LOC(SSH=MIN(SSH))[1]; F=SSH[E]/(SSE/(N-k)); P=1-probf(F,1,N-k); ********************************; do while(((P>SLS)|k>kmax)&(k>kmin)); k=k-1; SSE=SSE+SSH[e]; new=next[e]; /* ASSIGN MEAN AND WEIGHT OF AMALGAMATED SET */ w[new]=w2[e];mn[new]=mn2[e]; /* UPDATE PREV AND NEXT */ PREV[NEXT[e]]=PREV[e]; IF PREV[e]^=. THEN NEXT[PREV[e]]=NEXT[e]; PREV[e]=0;NEXT[e]=0; /* IF NOT THE LAST TWO */ if (NEXT[new]^=.) then do; w2[new]= w[new]+w[NEXT[new]]; mn2[new]=(w[new]*mn[new]+w[NEXT[new]]*mn[NEXT[new]])/w2[new]; ssh[new]=w[new]*(mn[new]-mn2[new])**2+w[NEXT[new]]*(mn[NEXT[new]]-mn2[new])**2; end; /* IF NOT THE FIRST TWO */ if (prev[new] ^=.) then do; w2[PREV[new]]=w[PREV[new]]+w[new]; mn2[PREV[new]]=(w[PREV[new]]*mn[PREV[new]]+w[new]*mn[new])/w2[PREV[new]]; ssh[PREV[new]] =w[PREV[new]]*(mn[PREV[new]]-mn2[PREV[new]])**2+w[new]*(mn[new]-mn2[PREV[new]])**2; end; ssh[e]=.;mn[e]=.;w[e]=.; mn2[e]=.; if (k>1) then do; E=LOC(SSH=MIN(SSH))[1]; F=SSH[E]/(SSE/(N-k)); P=1-probf(F,1,N-k); p2lev=min(p,p2lev); iii=iii+1; if(details=1)then history[,iii]=1-probf(SSH/(SSE/(N-k)),1,N-k); end; end; if (details=1) then do;history=history[1:(nrow(y_in)-1),1:iii];end; end: i=loc(mn ^= .); ssh=ssh[i]; if sse=0 then ssetmp=.; else ssetmp=sse; f_out =ssh/(ssetmp/(N-k)); p_out =1-probf(F_out,1,N-k); y_out=mn[i]; w_out=w[i]; kred=nrow(y_out); lastout=last_in[i]; frstout =J(kred,1,.); frstout[1]=1; if kred>1 then do i = 2 to kred; frstout[i]=lastout[i-1]+1; end; sse_out=sse; index=t(1:n); yhat=J(n,1,.); groupid=J(n,1,.);level=t(1:kred); do i=1 to kred; ii=loc((index>=frstout[i])&(index<=lastout[i])); yhat[ii]=y_out[i]; groupid[ii]=i;end; finish; ***********************************************************************************; /*BEGIN*/ METHOD="&METHOD"; /************************* READ DATA INTO IML **********************************/ use &tempchar.loess var{col1 col2}; read all var {col1 col2} into loess; close &tempchar.loess; sse6=loess[1];k6=loess[2]; %IF(&METHOD=UP) %THEN %STR(SORT &TEMPCHAR.DATA0 OUT = &TEMPCHAR.DATA BY &X DESCENDING &Y;); %ELSE %IF(&METHOD=BEST) %THEN %STR(SORT &TEMPCHAR.DATA0 OUT = &TEMPCHAR.DATA BY &X DESCENDING &Y;); %ELSE %IF(&METHOD=DOWN) %THEN %STR(SORT &TEMPCHAR.DATA0 OUT = &TEMPCHAR.DATA BY &X &Y;); USE &tempchar.DATA VAR{&Y};READ ALL VAR {&Y} INTO YTMP;CLOSE &tempchar.DATA; USE &tempchar.DATA VAR{&Y &X &WEIGHT __SORT__} WHERE ((&X ^= .)&(&Y ^= .) %IF(&WEIGHT NE) %THEN &(&WEIGHT>0););;; READ ALL VAR {&Y} INTO Y; READ ALL VAR {&X} INTO X; READ ALL VAR {__SORT__} INTO SORTKEY; NTMP=nrow(YTMP);N=NROW(Y); %IF (&WEIGHT NE) %THEN %STR(READ ALL VAR {&WEIGHT} INTO W;); %ELSE %STR(W = J(N,1,1);); CLOSE &tempchar.DATA; /********************************************************************************/ /*********************** CALCULATE ORIGINAL REGRESSION FIT *************************/ %IF &METHOD=UP %THEN %STR(CALL PAVA (Y,W,"UP" ,yhat1,groupid1,y1,w1,first1,last1,sse1,k1);); %IF &METHOD=DOWN %THEN %STR(CALL PAVA (Y,W,"DOWN",yhat1,groupid1,y1,w1,first1,last1,sse1,k1);); %IF &METHOD=BEST %THEN %DO; CALL PAVA (Y,W,"UP" ,yhat1 ,groupid1 ,y1 ,w1 ,first1 ,last1 ,sse1 ,k1 ); SORT &TEMPCHAR.DATA0 OUT = &TEMPCHAR.DATA BY &X &Y;;; USE &tempchar.DATA VAR{&Y &X &WEIGHT __SORT__} WHERE ((&X ^= .)&(&Y ^= .) %IF(&WEIGHT NE) %THEN &(&WEIGHT>0););;; READ ALL VAR {&Y} INTO Y2; READ ALL VAR {&X} INTO X2;; READ ALL VAR {__SORT__} INTO SORTKEY2; %IF (&WEIGHT NE) %THEN %DO;READ ALL VAR {&WEIGHT} INTO W2;%END; %IF (&WEIGHT EQ) %THEN %DO;W2 = J(N,1,1);%END; CALL PAVA (Y2,W2,"DOWN",tmp1 ,tmp2 ,tmp3,tmp4,tmp5 ,tmp6 ,sse1b,tmp7); IF SSE1B10 THEN SLS=GETSLS2(METH,N,&ALPHA); ); %ELSE %STR(SLS=&SLS;); /***********************************************************************************/ /********************** PERFORM BACKWARD ELIMINATION********************************/ %IF (%UPCASE(&DETAILS)=YES)%THEN %DO; DETAILS=1; %END;%ELSE %DO; DETAILS=0;%END; minsets=&minsets;maxsets=&maxsets;if maxsets<1 then maxsets=1; call reduce(y1,w1,last1,sse1,n,sls,minsets,maxsets, yhat2 ,groupid2 ,y2 ,w2 ,first2 ,last2 ,sse2 ,k2 ,f2,p2,p2lev,DETAILS,history); if ((k2>2)&(SSE2>0)) then do; p2levtmp=p2lev; call reduce(y2,w2,last2,sse2,n,0,2,maxsets, yhat3 ,groupid3 ,y3 ,w3 ,first3 ,last3 ,sse3 ,k3 ,f3,p3,p2lev,0,jumk); p2lev=min(p2levtmp,p2lev); end; xmin2=x[first2];xmax2=x[last2];resid2 = y-yhat2; tmp=J(k2,1,.); do i = 1 to k2; tmp[i]=ssq((sqrt(w)#resid2)[loc(groupid2=i)]); end; stddev2 = SQRT(tmp/w2); ************** GET ADJUSTED PVALUES ******************; padj=J(k2,1,.); IF ((METHOD ="BEST")) THEN METH="MONO"; IF ((METHOD="UP")|(METHOD="DOWN")) THEN METH="ISO"; if (k2>1)&((METH="ISO")|(METH="MONO")) then do i = 1 to k2-1; if N<11 then padj[i]=padjust1(METH,N,p2[i]); if N>10 then padj[i]=padjust2(METH,N,p2[i]); end; if N<11 then p2levadj=padjust1(METH,N,p2lev); if N>10 then p2levadj=padjust2(METH,N,p2lev); ***************************************************; /***********************************************************************************/ %IF(&METHOD=BEST) %THEN %LET METHOD2=BEST MONOTONIC; %ELSE %IF(&METHOD=UP) %THEN %LET METHOD2=ISOTONIC (INCREASING); %ELSE %IF(&METHOD=DOWN) %THEN %LET METHOD2=ISOTONIC (DECREASING); %IF (&WEIGHT=) %THEN %LET WTLABEL=[NONE]; %ELSE %LET WTLABEL=&WEIGHT; tmp={"VERSION:" "&VERSION","REGRESSION METHOD:" "&METHOD2", "EXPLANATORY VARIABLE:" "&X", "RESPONSE VARIABLE:" "&Y", "WEIGHT VARIABLE:" "&WTLABEL"}; tmp2=char(SLS); tmp3={"SIGNIFICANCE LEVEL TO STAY:"}|| tmp2; %if (&SLS EQ) %then %let alphalab=α%else %let alphalab=NA; jkj2={ "ALPHA:" "&ALPHALAB"}; tmp4=char(NTMP);tmp5={"# OF OBSERVATIONS READ"}||tmp4; tmp6=char(N); tmp7={"# OF OBSERVATIONS USED"}||tmp6; REDMON=tmp//tmp5//tmp7//tmp3//jkj2;print REDMON; /********************** CREATE SUMMARY STATISTICS **********************************/ x0 = J(n,1,1) ; yhat=x0*inv(t(w#x0)*x0)*t(x0)*(w#y); SSE0=sum(w#(y-yhat)##2); /* INTERCEPT ONLY */ x1 = orpol(x-x[:],1); yhat=x1*inv(t(w#x1)*x1)*t(x1)*(w#y); SSE3=sum(w#(y-yhat)##2);; /* LINEAR REGRESSION */ x2 = orpol(x-x[:],2); yhat=x2*inv(t(w#x2)*x2)*t(x2)*(w#y); SSE4=sum(w#(y-yhat)##2);; /* QUADRATIC REGRESSION */ **** TWO-PHASE LINEAR REGRESSION WITH DATA DRIVEN KNOT ****; uniq=unique(x);nuniq=ncol(uniq);start=uniq[2];stop=uniq[nuniq-1]; sse5=sse0; knot=.; %if (%UPCASE(&twophase)=YES) %THEN %DO; do i = 1 to &NITER; change=start+i/&NITER*(stop-start); x2=0<>(x-change); xx=x0||x||x2; tmpbeta=inv(t(w#xx)*xx)*t(xx)*(w#y); yhat=xx*tmpbeta; SSEx=sum(w#(y-yhat)##2); if ssex=N))]=.; F = (SSE0-SSE)/(Q-1)/(SSETMP/(N-Q));F[6]=.;P= 1-probf(F,Q-1,N-Q); MODEL={"&METHOD2",'REDUCED REGRESSION','PIECEWISE LINEAR REGRESSION','QUADRATIC REGRESSION','LINEAR REGRESSION','LOCAL QUADRATIC (LOESS)','INTERCEPT ONLY'}; *********** CALCULATE PVALUE FOR EBAR-SQUARE TEST ************; %LET EBAR=%UPCASE(&EBAR);EBARMETH="&EBAR"; EBAR = RSQUARE[1]; K=N; IF (((N<200)&(EBARMETH^="APPROXIMATE"))|EBARMETH="EXACT") THEN DO; /* CALCULATE EXACT PVALUE FOR EBAR-SQUARE STATISTIC */ /* FIRST CALCULATE PLK'S */ plk=J(n,n,0); plk[1,1]=1;plk[2,1]=1/2;plk[2,2]=1/2; do k=2 to n; plk[k,1]=1/k;end; do k=2 to min(n,100);plk[k,k]=1/exp(lgamma(k+1));end; do k=2 to n; do l=2 to k-1; plk[k,l]=1/k*plk[k-1,l-1]+(k-1)/k*plk[k-1,l];end;end; k = N; /* CALCULATE EXACT PVALUE */ pebar=0; do l = 2 to k-1; pebar=pebar+plk[k,l]*(1-probbeta(ebar,(l-1)/2,(n-l)/2));end; if N<100 then pebar=pebar+1/exp(lgamma(N+1)); END; ELSE DO; /* CALCULATE APPROXIMATE PVALUE FOR EBAR-SQUARE STATISTIC */ j=t(2:k);kk1=sum(1/j);kk2=sum(3/j-1/j/j); a=kk1/(N-1)/(1-1/k); b=(kk2+kk1**2)/(N-1)/(N+1)/(1-1/k); c=a*(a-b)/(b-a**2); d=(1-a)*(a-b)/(b-a**2); hcd=1-probbeta(ebar,c,d); pebar=hcd*(1-1/k); END; %if (&method=BEST) %then %do; pebar=min(1,2*pebar); ;%end; P[1]=pebar;P[2]=p2levadj; PRINT "********SUMMARY STATISTICS********"; PRINT " "; create &TEMPCHAR.STATS var{MODEL SSE NO_PARMS RSQUARE P}; append; list all; close &TEMPCHAR.STATS; /***********************************************************************************/ &tempchar.X=X;&tempchar.Y=Y; &tempchar.W=W; create &TEMPCHAR.FILE0 var{sortkey &tempchar.x &tempchar.y &tempchar.w groupid1 yhat1 resid1 groupid2 yhat2 resid2}; append; close &TEMPCHAR.FILE0; IF (N<6) then do; file print; put "WARNING: Small sample size (n=" (char(n,1)) ") may cause errors in LOESS fit"; end; %IF (%UPCASE(&TWOPHASE) ne YES) %THEN %DO; file print; PUT "NOTE: TWO-PHASE LINEAR REGRESSION WAS NOT REQUESTED"; %END; print "********ORIGINAL REGRESSION FIT********"; print " "; xmin=xmin1;xmax=xmax1;weight=w1; yhat=y1; create &TEMPCHAR.FILE1 var{xmin xmax weight yhat}; append; list all; close &TEMPCHAR.FILE1; print "********REDUCED REGRESSION FIT********"; print " "; xmin=xmin2;xmax=xmax2;weight=w2; yhat=y2; stddev=stddev2; f=f2; p=p2; create &TEMPCHAR.FILE2 var{xmin xmax weight yhat stddev f p padj}; append; list all; close &TEMPCHAR.FILE2; *** NOTES AND WARNINGS ***; file print; %if (&WEIGHT ne) %then %do; PUT "WARNING: Using WEIGHT= option may invalidate P-values, inflate Type-I error rate." ; %end; if (k2>1) then do; if any((padj[1:(k2-1)]<.001) & (padj[1:(k2-1)]>.)) then PUT "WARNING: P-value adjustment may not be accurate for P <.001"; if any((p[1:(k2-1)]=padj[1:(k2-1)])&(padj[1:(k2-1)]>.)) then put "NOTE: At least one p-value was too small to be adjusted (p = padj)"; end; %IF (%UPCASE(&TWOPHASE)=YES) %THEN %DO; print "********TWO-PHASE LINEAR REGRESSION FIT *********"; print " "; xmin=(pwl1//uniq[1]);knot=pwl2//knot;xmax=pwl3//(uniq[nuniq]); label ={"YHAT(X)", " X"}; create &TEMPCHAR.FILE3 var{label xmin knot xmax}; append; list all; close &TEMPCHAR.FILE3; if (pwl1=pwl2)&(pwl2=pwl3) then put "NOTE: The estimated two-phase linear regression model is a flat line"; if (pwl1<=pwl2)&(pwl2<=pwl3) then put "NOTE: The estimated two-phase linear regression model is monotonic increasing"; else if (pwl1>=pwl2)&(pwl2>=pwl3) then put "NOTE: The estimated two-phase linear regression model is monotonic decreasing"; else put "NOTE: The estimated two-phase linear regression model is non-monotonic"; IF (N<3) then do; file print; put "WARNING: Small sample size (n=" (char(n,1)) ") causes errors in two-phase linear fit" ; end; %END; if (details=1) then do; print "******** DETAILS OF BACKWARD ELIMINATION ********";; if ((sse1=0)|(k1=1)) then do; print history; colnames={" "};end; else do; rownames=rowcatc(J(k1-1,1,"levset_")||char(t(1:(k1-1)))||J(k1-1,1,",")||char(t(2:(k1)))); colnames=rowcatc(J(k1-k2+1,1,"iteration_")||char(t(1:(k1-k2+1)))); mattrib history rowname=rownames colname=colnames label="P-VALUES AT SUCCESSIVE ITERATIONS OF BACKWARD ELIMINATION"; print history; end; create &tempchar.file3 from history[colname=colnames];append from history;close &tempchar.file3; end; if(0) then do; end; /*QUIT;*/ /********************** PLOTS *****************************************/ %if ((&plot ne) and (%upcase(&plot) ne NO)) %then %do; %let gdevice=&sysdevic; data _NULL_; set &TEMPCHAR.FILE2 end=eof nobs=__n; call symput("numobs",left(trim(__n))); if not eof then call symput("HREF"||left(_N_),trim(left(trim(xmax)))); run; %let href=; %do i=1 %to %eval(&numobs-1); %let href=&href &&href&i; %end; goptions reset=(axis, legend, pattern, symbol, footnote) hpos=0 vpos=0 htext= ftext= ctext= target= gaccess= gsfmode=; goptions ctext=blue graphrc interpol=join rotate=landscape; GOPTIONS GUNIT=PCT HTEXT=2 FTEXT=SWISSB CTEXT=BLACK; %if (%upcase(%scan(&plot,1))=FILE) %then %str(GOPTIONS GACCESS=GSASFILE DEVICE=PS LFACTOR=5;); symbol2 c=DEFAULT i=STEPSLJ v=NONE; symbol1 c=DEFAULT i=NONE v=DOT cv=BLACK h=.1 CM; axis1 color=blue width=2.0; axis2 color=blue width=2.0; axis3 color=blue width=2.0; %let dir=%scan(&plot,2); proc gplot data=&TEMPCHAR.FILE0(rename=(&tempchar.x=&x &tempchar.y=&y)); %if (%upcase(%scan(&plot,1))=FILE) %then %str(FILENAME GSASFILE "&dir._PLOT.PS";);; title2 "Reduced Monotonic Regression"; plot (&Y YHAT2) * &X / overlay haxis=axis1 vaxis=axis2; plot (RESID2 ) * &X / overlay haxis=axis1 vaxis=axis2 vref=0 %if (&href ne) %then href=(&href);; label /*x="&X" y="&Y"*/ yhat2="Residuals of &Y"; run;quit; goptions device=&gdevice; /**********************************************************************/ %end;; /************************* REMOVE TEMPORARY DATASETS *******************/ %IF (%UPCASE(&CLEANUP) EQ YES) %THEN %DO; proc datasets nolist library=work; delete &tempchar.fit &tempchar.loess &tempchar.data &tempchar.stats &tempchar.file0 &tempchar.file1 &tempchar.file2;run; %END; /**********************************************************************/ /************************* ADD LABELS TO PERMANENT DATASETS ***********/ %ELSE %DO; proc datasets nolist library=work; modify &tempchar.file0; rename &tempchar.x = &x &tempchar.y =&y; %IF(&WEIGHT NE) %THEN %DO; rename &tempchar.W=&WEIGHT;%END; label GROUPID1="GROUP ID FOR ORIGINAL REGRESSION" YHAT1 ="PREDICTED VALUE OF &Y FROM ORIGINAL REGRESSION" RESID1 ="RESIDUAL FROM ORIGINAL REGRESSION" GROUPID2="GROUP ID FOR REDUCED REGRESSION" YHAT2 ="PREDICTED VALUE OF &Y FROM REDUCED REGRESSION" RESID2 ="RESIDUAL FROM REDUCED REGRESSION" SORTKEY ="ROW NUMBER OF OBSERVATION IN INPUT DATA SET &DATA"; modify &tempchar.file1; label XMIN ="MINIMUM VALUE OF &X" XMAX ="MAXIMUM VALUE OF &X" YHAT ="PREDICTED VALUE OF &Y"; modify &tempchar.file2; label XMIN ="MINIMUM VALUE OF &X" XMAX ="MAXIMUM VALUE OF &X" YHAT ="PREDICTED VALUE OF &Y" STDDEV ="STANDARD DEVIATION OF &Y IN GROUP"; run; %END; /**********************************************************************/ %THEEND:; %MEND;