* em07.sas xref: em05.tex, em07.c em07.r does: MLE using EM algorithm Mixture of 2 normals (with known variances) Z ~ Bern(theta) X0 ~ N(mu0, 1) X1 ~ N(mu1, 1) mu0 < mu1 Y = (1-Z) X0 + Z X1 **************************************************; title1 "MLE using EM algorithm - mixture of 2 normals"; proc iml; **************************************************; start main (dummy); truparms = parmvec(0, 2, 0.3); n = 100; estparms = 0; /* to prevent error message */ y = simulate (truparms, n); conv = estimate (y, estparms); run summary (truparms, estparms, y); finish; ***************************************************; start estimate (y, parms); /* input: data y[1:n], column vector output: parms <- estimates mu0, mu1, theta (column vector) returns 1 if converged, 0 otherwise */ tol = 1e-9; maxiter = 200; if (nrow(y) < 3) then return (0); parms = initvals (y); /* initial values */ pll = monitor (y, parms, 0); /* previous ll */ reset noname; conv=0; do i = 1 to maxiter while (^conv); esave = E_step (y, parms); parms = M_step (esave); ll = monitor (y, parms, i); conv = (abs(ll-pll) < tol); /* converged ? */ pll = ll; end; return(conv); finish; ***************************************************; start E_step (y, parms); /* returns a matrix, col1=c0, col2=c1, col3=w. w <- E[Z | Y] c0 <- E[X0 | Y] c1 <- E[X1 | Y] */ run extract(mu_0, mu_1, theta, parms); d = mu_1 - mu_0; s = 0.5*(mu_1 + mu_0); prior_odds = theta/(1.0-theta); psi = prior_odds # exp(d*(y-s)); w = psi/(1+psi); c0 = w#mu_0 + (1-w)#y; c1 = w#y + (1-w)#mu_1; return (c0 || c1 || w); finish; ***************************************************; start M_step (c0_c1_w); /* obtain new parameter estimates based on w, c0, c1 */ parms = c0_c1_w [:,]; run extract(mu0, mu1, theta, parms); * force mu0 < mu1; if (mu0 > mu1) then parms = parmvec(parms[2], parms[1], 1-parms[3]); return(parms`); finish; ***************************************************; start loglkhd (y, parms); /* the log-likelihood based on y[1:n] */ run extract(mu0, mu1, theta, parms); f = theta * phi(y-mu1) + (1-theta) * phi(y-mu0); return( sum(log(f)) - 0.5*nrow(y)*log(8*atan(1)) ); finish; **************************************************; start initvals (y); /* retruns initial values based on y[1:n] */ ybar = y[:]; sd = sqrt(ssq(y-ybar)/nrow(y)); return (parmvec(ybar-sd, ybar+sd, 0.5)); finish; ***************************************************; start monitor (y, parms, iter); /* print log-lkhd and estimates */ ll = loglkhd (y, parms); run extract(mu0, mu1, theta, parms); print "Iteration" iter "l=" ll "theta=" theta "mu0=" mu0 "mu1=" mu1 ; return(ll); finish; ***************************************************; start summary (truparms, estparms, y); n = nrow(y); print "Sample size " n; run extract(mu0, mu1, theta, truparms); print "True parameter values:" "theta=" theta "mu0=" mu0 "mu1=" mu1 ; run extract(mu0, mu1, theta, estparms); print "Parameter Estimates:" "theta=" theta "mu0=" mu0 "mu1=" mu1 ; ll = loglkhd(y, estparms); print "Log-likelihood = " ll; finish; ***************************************************; start simulate (parms, n); /* input: parms, n (ignored) output: y[1:n], column vec, fixed data set. */ y = { /* simulated with parms: 0, 2, 0.3 */ 0.0007668, -0.939475, 0.8456077, -0.493304, 0.3805769, 2.6011146, 2.08912, 0.1207658, 2.1957545, -0.959282, 0.1548358, 0.3394777, 0.4788278, 1.1161578, 0.7300012, -2.120032, -1.085473, 0.2274149, 0.8690178, -0.932938, 0.4580891, 1.2779732, -1.400311, 3.0479398, -1.200237, 2.4490362, 1.1866626, -0.693796, -1.651684, -2.222105, 0.1401175, 1.9019002, 0.6396009, -1.382346, -0.718541, -0.279965, 0.4651524, 4.3747447, 1.3001852, -1.359992, 0.2648149, 1.524874, 1.6180265, 1.7134072, -0.870176, -2.099657, 0.3258149, 1.9970998, 3.4406849, 0.5807176, -0.93498, 1.2195432, 1.0252208, 2.7270676, -1.145333, 0.9615463, -0.595377, 0.320387, 3.0322296, 0.0399386, 0.6169643, 1.0346161, -1.311096, 0.7927389, -0.37912, 1.2072907, 0.3468948, 0.5203926, -0.932413, -0.597823, -0.854034, 2.5486596, -0.502315, 3.2556189, -0.598408, 0.3365086, 0.4282481, 2.5592742, 1.9806215, -0.912323, 0.4092242, -0.365883, 0.9473954, 0.4847945, 0.0288647, 0.4406987, -1.126491, 4.0417427, -1.564552, 2.8244872, 1.8998997, 3.807505, -0.419282, 2.7611861, 2.0530032, 1.2842716, 0.8478354, -1.082433, 2.3424769, 0.2944996 }; return(y); finish; ***************************************************; start simulate2 (parms, n); /* input: parms, n output: y[1:n], column vec, simulated from the mixture model or real data */ seed = 13579; seed = j(n, 1, seed); run extract(mu0, mu1, theta, parms); z = (ranuni(seed) <= theta); mu = (1-z)*mu0 + z*mu1; return(mu + rannor(seed)); finish; ***************************************************; start parmvec(mu0, mu1, theta); return(mu0 // mu1 // theta); finish; ***************************************************; start extract(mu0, mu1, theta, parms); mu0 = parms[1]; mu1 = parms[2]; theta= parms[3]; finish; **************************************************; start phi (t); return(exp(-0.5#t#t)); finish; ***************************************************; * IML execution starts here ->; run main(0)