* em07.sas
 xref: em05.tex, em07.c em07.r
 does: MLE using EM algorithm
       Mixture of 2 normals (with known variances)
        Z  ~ Bern(theta)
        X0 ~ N(mu0, 1)
        X1 ~ N(mu1, 1)
        mu0 < mu1
        Y = (1-Z) X0 + Z X1
**************************************************;
title1 "MLE using EM algorithm - mixture of 2 normals";
proc iml;
**************************************************;

start main (dummy);

  truparms = parmvec(0, 2, 0.3);
  n     = 100;
  estparms = 0; /* to prevent error message */

  y    = simulate (truparms, n);
  conv = estimate (y, estparms);
  run summary  (truparms, estparms, y);

finish;
***************************************************;

start estimate (y, parms);
/*
input: data y[1:n], column vector
output: parms <- estimates mu0, mu1, theta  (column vector)
returns 1 if converged, 0 otherwise
*/

  tol     = 1e-9;
  maxiter = 200;

  if (nrow(y) < 3) then return (0);

  parms = initvals (y);            /* initial values */
  pll   = monitor (y, parms, 0);   /* previous ll */
  reset noname;

  conv=0;
  do  i = 1 to maxiter while (^conv);
    esave = E_step  (y, parms);
    parms = M_step  (esave);
    ll    = monitor (y, parms, i);
    conv  =  (abs(ll-pll) < tol);    /* converged ? */
    pll   = ll;
  end;

  return(conv);
finish;
***************************************************;

start E_step (y, parms);

/*
returns a matrix, col1=c0, col2=c1, col3=w.
  w  <- E[Z  | Y]
  c0 <- E[X0 | Y]
  c1 <- E[X1 | Y]
*/
  run extract(mu_0, mu_1, theta, parms);
  d = mu_1 - mu_0;
  s = 0.5*(mu_1 + mu_0);
  prior_odds = theta/(1.0-theta);

  psi = prior_odds # exp(d*(y-s));
  w   = psi/(1+psi);
  c0  = w#mu_0  + (1-w)#y;
  c1  = w#y     + (1-w)#mu_1;

  return (c0 || c1 || w);
finish;
***************************************************;

start M_step (c0_c1_w);

/* obtain new parameter estimates based on w, c0, c1 */

  parms = c0_c1_w [:,];

  run extract(mu0, mu1, theta, parms);
  * force  mu0 < mu1;
  if (mu0 > mu1) then
    parms = parmvec(parms[2], parms[1], 1-parms[3]);

  return(parms`);
finish;
***************************************************;

start loglkhd  (y, parms);

/* the log-likelihood  based on y[1:n] */

  run extract(mu0, mu1, theta, parms);
  f = theta       * phi(y-mu1) +
      (1-theta)   * phi(y-mu0);

  return( sum(log(f)) - 0.5*nrow(y)*log(8*atan(1)) );
finish;
**************************************************;

start initvals  (y);

/* retruns  initial values based on y[1:n] */

  ybar = y[:];
  sd   = sqrt(ssq(y-ybar)/nrow(y));

  return (parmvec(ybar-sd, ybar+sd, 0.5));
finish;
***************************************************;

start monitor (y, parms, iter);

 /* print log-lkhd and estimates */

  ll = loglkhd (y, parms);
  run extract(mu0, mu1, theta, parms);

  print "Iteration"  iter
        "l="      ll
        "theta="  theta
        "mu0="    mu0
        "mu1="    mu1 ;

  return(ll);
finish;
***************************************************;

start summary  (truparms, estparms, y);

  n = nrow(y);
  print "Sample size "  n;

  run extract(mu0, mu1, theta, truparms);
  print "True parameter values:"
    "theta=" theta
    "mu0="   mu0
    "mu1="   mu1   ;

  run extract(mu0, mu1, theta, estparms);
  print "Parameter Estimates:"
    "theta=" theta
    "mu0="   mu0
    "mu1="   mu1    ;

  ll = loglkhd(y, estparms);
  print "Log-likelihood = "  ll;

finish;
***************************************************;

start simulate (parms, n);
/*
 input: parms, n (ignored)
 output: y[1:n], column vec, fixed data set.
*/
  y =  { /* simulated with parms: 0, 2, 0.3 */
    0.0007668, -0.939475, 0.8456077, -0.493304, 0.3805769, 2.6011146,
    2.08912, 0.1207658, 2.1957545, -0.959282, 0.1548358, 0.3394777,
    0.4788278, 1.1161578, 0.7300012, -2.120032, -1.085473, 0.2274149,
    0.8690178, -0.932938, 0.4580891, 1.2779732, -1.400311, 3.0479398,
    -1.200237, 2.4490362, 1.1866626, -0.693796, -1.651684, -2.222105,
    0.1401175, 1.9019002, 0.6396009, -1.382346, -0.718541, -0.279965,
    0.4651524, 4.3747447, 1.3001852, -1.359992, 0.2648149, 1.524874,
    1.6180265, 1.7134072, -0.870176, -2.099657, 0.3258149, 1.9970998,
    3.4406849, 0.5807176, -0.93498, 1.2195432, 1.0252208, 2.7270676,
    -1.145333, 0.9615463, -0.595377, 0.320387, 3.0322296, 0.0399386,
    0.6169643, 1.0346161, -1.311096, 0.7927389, -0.37912, 1.2072907,
    0.3468948, 0.5203926, -0.932413, -0.597823, -0.854034, 2.5486596,
    -0.502315, 3.2556189, -0.598408, 0.3365086, 0.4282481, 2.5592742,
    1.9806215, -0.912323, 0.4092242, -0.365883, 0.9473954, 0.4847945,
    0.0288647, 0.4406987, -1.126491, 4.0417427, -1.564552, 2.8244872,
    1.8998997, 3.807505, -0.419282, 2.7611861, 2.0530032, 1.2842716,
    0.8478354, -1.082433, 2.3424769, 0.2944996
  };

  return(y);

finish;
***************************************************;

start simulate2 (parms, n);
/*
 input: parms, n
 output: y[1:n], column vec, simulated from the mixture model
         or real data
*/
  seed = 13579;
  seed = j(n, 1, seed);
  run extract(mu0, mu1, theta, parms);
  z  = (ranuni(seed) <= theta);
  mu = (1-z)*mu0 + z*mu1;

  return(mu + rannor(seed));
finish;
***************************************************;

start parmvec(mu0, mu1, theta);
  return(mu0 // mu1 // theta);
finish;
***************************************************;

start extract(mu0, mu1, theta, parms);
  mu0  = parms[1];
  mu1  = parms[2];
  theta= parms[3];
finish;
**************************************************;
start  phi (t); return(exp(-0.5#t#t)); finish;
***************************************************;
* IML execution starts here ->;
run main(0)