twosampleinitialcov<-function(X1, X2, meanvec1, meanvec2) { for (i in 1:ncol(X1)) { X1[is.na(X1[,i]),i]=meanvec1[i] } for (i in 1:ncol(X2)) { X2[is.na(X2[,i]),i]=meanvec2[i] } X=rbind(X1,X2) covmat=cov(X)*(nrow(X)-1)/nrow(X) return(covmat) } empredict<-function(X, emmean, emcov) { completeX=X rowmiss=sort(unique(row(X)[is.na(X)])) nrowmiss=length(rowmiss) #number of rows with missing T2=0 for (i in 1:nrowmiss) { wholerow=X[rowmiss[i],] x2=wholerow[!is.na(wholerow)] colmiss=col(t(as.matrix(wholerow)))[is.na(t(as.matrix(wholerow)))] mu1=emmean[colmiss] #missing mu2=emmean[-colmiss] #available sigma11=emcov[colmiss,colmiss] sigma12=emcov[colmiss,-colmiss] sigma21=emcov[-colmiss,colmiss] sigma22=emcov[-colmiss,-colmiss] x1=mu1+sigma12%*%solve(sigma22)%*%(x2-mu2) wholerow[is.na(wholerow)]=x1 completeX[rowmiss[i],]=wholerow matprod=completeX[rowmiss[i],]%*%t(completeX[rowmiss[i],]) x1x1=sigma11-sigma12%*%solve(sigma22)%*%sigma21+x1%*%t(x1) matprod[colmiss,colmiss]=x1x1 T2=T2+matprod } T1=apply(completeX,2,sum) rownomiss=seq(1,nrow(X))[-rowmiss] nrownomiss=length(rownomiss) for (i in 1:nrownomiss) { wholerow=X[rownomiss[i],] T2=T2+wholerow%*%t(wholerow) } return(list(T1=T1, T2=T2)) } twosampleemestimate<-function(T1.1, T1.2, T2.1, T2.2, n1, n2) { means1=T1.1/n1 means2=T1.2/n2 covs=(T2.1/n1-means1%*%t(means1))*n1/(n1+n2)+(T2.2/n2-means2%*%t(means2))*n2/(n1+n2) return(list(means1=means1, means2=means2, covs=covs)) } twosampleEM<-function(X1, X2, delta=0.0001) #X1 and X2 are two matrices with missing assume different mean and same covariance { X1=as.matrix(X1) X2=as.matrix(X2) #initial estimate emmean1=apply(X1,2,mean,na.rm=TRUE) emmean2=apply(X2,2,mean,na.rm=TRUE) emcov=twosampleinitialcov(X1, X2, emmean1, emmean2) if (min(eigen(emcov)$values)<0) { stop("the initial estimate of covariance matrix is negative definite") } n1=nrow(X1) n2=nrow(X2) meandiff1=9999999 meandiff2=9999999 covdiff=9999999 while(meandiff1>delta & meandiff2>delta & covdiff>delta) { #prediction T1.1=empredict(X1, emmean1, emcov)$T1 T2.1=empredict(X1, emmean1, emcov)$T2 T1.2=empredict(X2, emmean2, emcov)$T1 T2.2=empredict(X2, emmean2, emcov)$T2 #T2=T2.1+T2.2 #estimate tempmean1=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)$means1 tempmean2=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)$means2 tempcov=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)$covs meandiff1=sqrt(sum((tempmean1-emmean1)^2)) meandiff2=sqrt(sum((tempmean2-emmean2)^2)) covdiff=sqrt(sum((tempcov-emcov)^2)) emmean1=tempmean1 emmean2=tempmean2 emcov=tempcov } return(list(emmean1=emmean1, emmean2=emmean2, emcov=emcov)) } twosamp=read.table("E:/Study/RA/2009/20090210/cochlear09.dt") sampA=twosamp[twosamp[,1]=="A",2:5] sampB=twosamp[twosamp[,1]=="B",2:5] emout=twosampleEM(sampA,sampB, delta=0.00000001) > emout $emmean1 V2 V3 V4 V5 28.51950 49.14400 55.91789 64.22370 $emmean2 V2 V3 V4 V5 19.39571 38.24143 44.24044 46.81729 $emcov V2 V3 V4 V5 [1,] 384.0486 396.9089 321.3101 294.0010 [2,] 396.9089 563.7274 488.0587 481.6738 [3,] 321.3101 488.0587 520.6385 505.2430 [4,] 294.0010 481.6738 505.2430 543.9159