initialcov<-function(X,meanvec) { for (i in 1:ncol(X)) { X[is.na(X[,i]),i]=meanvec[i] } covmat=cov(X)*(nrow(X)-1)/nrow(X) return(covmat) } empredict<-function(X, emmean, emcov) { completeX=X rowmiss=sort(unique(row(X)[is.na(X)])) nrowmiss=length(rowmiss) #number of rows with missing T2=0 for (i in 1:nrowmiss) { wholerow=X[rowmiss[i],] x2=wholerow[!is.na(wholerow)] colmiss=col(t(as.matrix(wholerow)))[is.na(t(as.matrix(wholerow)))] mu1=emmean[colmiss] #missing mu2=emmean[-colmiss] #available sigma11=emcov[colmiss,colmiss] sigma12=emcov[colmiss,-colmiss] sigma21=emcov[-colmiss,colmiss] sigma22=emcov[-colmiss,-colmiss] x1=mu1+sigma12%*%solve(sigma22)%*%(x2-mu2) wholerow[is.na(wholerow)]=x1 completeX[rowmiss[i],]=wholerow matprod=completeX[rowmiss[i],]%*%t(completeX[rowmiss[i],]) x1x1=sigma11-sigma12%*%solve(sigma22)%*%sigma21+x1%*%t(x1) matprod[colmiss,colmiss]=x1x1 T2=T2+matprod } T1=apply(completeX,2,sum) rownomiss=seq(1,nrow(X))[-rowmiss] nrownomiss=length(rownomiss) for (i in 1:nrownomiss) { wholerow=X[rownomiss[i],] T2=T2+wholerow%*%t(wholerow) } return(list(T1=T1, T2=T2)) } emestimate<-function(T1, T2, n) { means=T1/n covs=T2/n-means%*%t(means) return(list(means=means, covs=covs)) } EM<-function(X, delta=0.0001) #X is a n*p matrix with missing { x=as.matrix(X) #initial estimate emmean=apply(X,2,mean,na.rm=TRUE) emcov=initialcov(X, emmean) if (min(eigen(emcov)$values)<0) { stop("the initial estimate of covariance matrix is negative definite") } n=nrow(X) #number of rows meandiff=9999999 covdiff=9999999 while(meandiff>delta & covdiff>delta) { #prediction T1=empredict(X, emmean, emcov)$T1 T2=empredict(X, emmean, emcov)$T2 #estimate tempmean=emestimate(T1, T2, n)$means tempcov=emestimate(T1, T2, n)$covs meandiff=sqrt(sum((tempmean-emmean)^2)) covdiff=sqrt(sum((tempcov-emcov)^2)) emmean=tempmean emcov=tempcov } return(list(emmean=emmean, emcov=emcov)) } > X [,1] [,2] [,3] [1,] NA 0 3 [2,] 7 2 6 [3,] 5 1 2 [4,] NA NA 5 > EM(X,delta=0.00000001) $emmean [1] 6.055589 1.115385 4.000000 $emcov [,1] [,2] [,3] [1,] 0.5974356 0.3743413 1.2152566 [2,] 0.3743413 0.6200690 0.8653846 [3,] 1.2152566 0.8653846 2.5000000