/* em07.cpp * xref: em07.* * input: * output: * does: Simulate mixture of 2 normals with equal known varainces. Z ~ Bern(theta) X0 ~ N(mu_0, 1) X1 ~ N(mu_1, 1) mu_0 < mu_1 - MLE using EM algorithm */ /***************************************************/ #include #include #include #include #include #include using std::cin; using std::cout; using std::endl; /****************************************************/ typedef std::valarray dvec; struct parmvec { // parameter vector double mu_0, mu_1, theta; parmvec(double mu0val=0, double mu1val=0, double thetaval=0.5) : mu_0 (mu0val), mu_1 (mu1val), theta (thetaval) { } }; /****************************************************/ double phi(double t) {return exp(-0.5*t*t);} /***************************************************/ double ssq (const dvec &x, double init_val = 0, double subtract = 0) { if (subtract == 0.0) { for (size_t i = 0; i != x.size(); ++i) { init_val += x[i] * x[i]; } } else { for (size_t i = 0; i != x.size(); ++i) { double tmp = x[i] - subtract; init_val += tmp * tmp; } } return init_val; } double mean(const dvec &x) {return x.sum() / x.size(); } double var(const dvec &x) {return ssq(x, 0.0, mean(x)) / (x.size()-1);} /***************************************************/ void E_step (dvec &w, dvec &c0, dvec &c1, const dvec &y, const parmvec &parms) /* w <- E[Z | Y] */ /* c0 <- E[X0 | Y] */ /* c1 <- E[X1 | Y] */ { double d = parms.mu_1 - parms.mu_0; double s = 0.5*(parms.mu_1 + parms.mu_0); double prior_odds = parms.theta / (1.0 - parms.theta); dvec psi = prior_odds * exp(d*(y-s)); w = psi/(1.0+psi); c0 = w*parms.mu_0 + (1.0-w)*y; c1 = w*y + (1.0-w)*parms.mu_1; } /***************************************************/ void M_step (const dvec &w, const dvec &c0, const dvec &c1, parmvec &parms) /* obtain new parameter estimates */ { parms = parmvec(mean(c0), mean(c1), mean(w)); // mu_0, mu_1, theta if (parms.mu_0 > parms.mu_1) // force mu_0 <= mu_1 parms = parmvec(parms.mu_1, parms.mu_0, 1.0 - parms.theta); } /***************************************************/ double loglkhd (const dvec &y, const parmvec &parms) /* the log-likelihood based on y[1:n] */ { dvec l = log(parms.theta*(y-parms.mu_1).apply(phi) + (1.0-parms.theta)*(y-parms.mu_0).apply(phi) ); return l.sum() - 0.5*y.size()*log(2.0*atan2(0.0, -1.0)); } /***************************************************/ parmvec initvals (const dvec &y) /* returns initial values based on y[1:n] */ { double ybar = mean(y); double sd = sqrt(var(y)); return parmvec(ybar-sd, ybar+sd, 0.5); } /***************************************************/ double monitor (const dvec &y, const parmvec &parms, int iter) /* print log-lkhd and estimates */ { double ll = loglkhd (y, parms); double mu_0, mu_1, theta; cout << "Iteration "<< iter << ": l=" << ll << ", theta=" << parms.theta << ", mu_0=" << parms.mu_0 << ", mu_1=" << parms.mu_1 << endl; return ll; } /***************************************************/ int estimate (parmvec &parms, const dvec &y) /* input: data y[1,...,n] */ /* output: parms <- estimates */ /* returns 1 if converged, 0 otherwise */ { const double tol = 1e-9; const int maxiter = 200; double ll, pll; int conv; const size_t n = y.size(); dvec w(n), c0(n), c1(n); if (n < 3) return 0; parms = initvals (y); /* initial values */ pll = monitor (y, parms, 0); /* previous ll */ for (int i=1; i <= maxiter; ++i) { E_step (w, c0, c1, y, parms); M_step (w, c0, c1, parms); ll = monitor (y, parms, i); conv = (fabs(ll-pll) < tol); /* converged ? */ if (conv) break; pll = ll; } return conv; } /***************************************************/ void summary (const parmvec tru_parms, const parmvec est_parms, const dvec &y) { parmvec parms(tru_parms); cout << "\nSample size n=" << y.size() << endl; cout << "True parameter values:" << " theta=" << parms.theta << ", mu_0=" << parms.mu_0 << ", mu_1=" << parms.mu_1 << endl; parms = est_parms; cout << "Parameter Estimates:" << " theta=" << parms.theta << ", mu_0=" << parms.mu_0 << ", mu_1=" << parms.mu_1 << endl; cout << "Log-likelihood = " << loglkhd(y, est_parms) << endl; } /***************************************************/ void simulate (const parmvec parms, dvec &y, int n) // y[] <- data // n, parms: ignored { const double a[] = { // simulated with parms: 0, 2, 0.3 0.0007668, -0.939475, 0.8456077, -0.493304, 0.3805769, 2.6011146, 2.08912, 0.1207658, 2.1957545, -0.959282, 0.1548358, 0.3394777, 0.4788278, 1.1161578, 0.7300012, -2.120032, -1.085473, 0.2274149, 0.8690178, -0.932938, 0.4580891, 1.2779732, -1.400311, 3.0479398, -1.200237, 2.4490362, 1.1866626, -0.693796, -1.651684, -2.222105, 0.1401175, 1.9019002, 0.6396009, -1.382346, -0.718541, -0.279965, 0.4651524, 4.3747447, 1.3001852, -1.359992, 0.2648149, 1.524874, 1.6180265, 1.7134072, -0.870176, -2.099657, 0.3258149, 1.9970998, 3.4406849, 0.5807176, -0.93498, 1.2195432, 1.0252208, 2.7270676, -1.145333, 0.9615463, -0.595377, 0.320387, 3.0322296, 0.0399386, 0.6169643, 1.0346161, -1.311096, 0.7927389, -0.37912, 1.2072907, 0.3468948, 0.5203926, -0.932413, -0.597823, -0.854034, 2.5486596, -0.502315, 3.2556189, -0.598408, 0.3365086, 0.4282481, 2.5592742, 1.9806215, -0.912323, 0.4092242, -0.365883, 0.9473954, 0.4847945, 0.0288647, 0.4406987, -1.126491, 4.0417427, -1.564552, 2.8244872, 1.8998997, 3.807505, -0.419282, 2.7611861, 2.0530032, 1.2842716, 0.8478354, -1.082433, 2.3424769, 0.2944996 }; y = dvec(a, sizeof a / sizeof a[0]); return; } /*************************************************/ int main () { const int n = 100; parmvec tru_parms(0.0, 2.0, 0.3); // mu_0, mu_1, theta parmvec est_parms; dvec y; simulate (tru_parms, y, n); /* get data */ estimate (est_parms, y); /* estimate parameters */ summary (tru_parms, est_parms, y); }