Author: Joseph B. Lang, Department of Statistics and Actuarial Science, Univ of Iowa, Iowa City, IA 52242 USA
Created: 1/4/09 (Revised 5/18/09)
The examples in this document illustrate how to use the R program ci.table (versions 1.1,1.2, and 2.0) for computing confidence intervals for contingency table parameters. The examples herein include those appearing in the first reference below.
Primary References:
Lang, J.B. (2008). "Score and Profile Likelihood Confidence Intervals for Contingency Table Parameters," Statistics in Medicine, 27, 5975-5990. DOI: 10.1002/sim.3391
Lang, J.B. (2004). "Multinomial Poisson Homogeneous Models for Contingency Tables," Annals of Statistics, 32, 340-383.
Lang, J.B. (2005). "Homogeneous Linear Predictor Models for Contingency Tables," Journal of the American Statistical Association, 100, 121-134.
Availability: To obtain a free copy of ci.table and accompanying programs, email the author, Joseph B. Lang, at joseph-lang@uiowa.edu.
Disclaimer: The ci.table code has not been thoroughly tested!
########################################################################### # BINOMIAL PROBABILITY: Example 2.1 in Lang, J.B. (2008), Stat Medicine. ########################################################################### # # Observed 1 success in 12 trials. # Model: y = (1,11) <- Y ~ mult(12, p1,p2). # # Goal: Compute an approximate 95% CI for the # success probability p1. # y <- scan() 1 11 S.fct <- function(p) { p[1] } a1 <- ci.table(y,S.fct,lowerbound=0,upperbound=1,save=T); a2 <- ci.table(y,S.fct,0,1,type="lr",save=T) a3 <- ci.table(y,S.fct,0,1,type="pd",save=T) cbind(a1[c(1:10,20,22)],a2[c(1:10,20,22)],a3[c(1:10,20,22)])
[,1] [,2] [,3] Shat 0.08333333 0.08333333 0.08333333 ase 0.0797856 0.0797856 0.0797856 add.to.zero.for.ase 0.08333333 0.08333333 0.08333333 lowCI 0.01486509 0.004951072 0.01184658 highCI 0.3538799 0.3185427 0.3437542 cc 0.95 0.95 0.95 type "score" "likelihood ratio" "power divergence" pdlambda 1 0 0.6666667 lowWCI -0.07304355 -0.07304355 -0.07304355 highWCI 0.2397102 0.2397102 0.2397102 calls.to.mph.fit 13 13 14 time.elapsed 0.06 0.07 0.06
> a1$version [1] "ci.table (ver 1.1, 2/17/09)"
> a4 <- ci.table(y,S.fct,0,1,type="wald",save=T); as.matrix(a4) Shat 0.08333333 ase 0.0797856 add.to.zero.for.ase 0.08333333 type "wald" lowWCI -0.07304355 highWCI 0.2397102 version "ci.table (ver 1.2, 2/27/09)" time.elapsed 0 ############################################################# # CONDITIONAL PROBABILITY ############################################################# # # y=(0, 39, 18, 11) <- Y ~ mult(n=68, p1,p2,p3,p4) # Goal: Compute an approximate 95% CI for p1/(p1+p2). # y <- scan() 0 39 18 11 S.fct <- function(p) { p[1]/(p[1]+p[2]) } ci.table(y,S.fct,lowerbound=0,upperbound=1)
[,1] Shat 0 ase 0.003107673 add.to.zero.for.ase 0.01470588 lowCI 0 highCI 0.08966646 cc "95%" type "score" pdlambda 1 lowWCI -0.006090928 highWCI 0.006090928 lowCI.TestStat Numeric,0 highCI.TestStat 3.841440 cv 3.841459 iterlow 0 iterhigh 4 calls.to.mph.fit 9 version "ci.table (ver 1.1, 2/17/09)" time.elapsed 0.06
############################################################# # CONDITIONAL PROBABILITY (Stratified Sampling) ############################################################# # # y = (0, 39 // 18, 11) <- Y ~ prod mult(39, p1, p2 // 29, p3, p4) # That is, # y <- Y ~ MP(ν, p| strata=c(1,1,2,2),fixed="all") # where ν = (39, 29). # # Goal: Compute an approximate 95% CI for p1. # y <- scan() 0 39 18 11 strata <- c(1,1,2,2) S.fct <- function(p) { p[1] } ci.table(y,S.fct,lowerbound=0,upperbound=1,strata=strata)
[,1] Shat 0 ase 0.003107673 add.to.zero.for.ase 0.01470588 lowCI 0 highCI 0.08966646 cc "95%" type "score" pdlambda 1 lowWCI -0.006090928 highWCI 0.006090928 lowCI.TestStat Numeric,0 highCI.TestStat 3.841431 cv 3.841459 iterlow 0 iterhigh 4 calls.to.mph.fit 9 version "ci.table (ver 2.0, 5/18/09)" time.elapsed 0.09 ############################################################# # DIFFERENCE BETWEEN CONDITIONAL PROBABILITIES ############################################################# # # y=(0, 39, 18, 11) <- Y ~ mult(n=68, p1,p2,p3,p4) # Goal: Compute an approximate 95% CI for p1/(p1+p2) - p3/(p1+p3). # y <- scan() 0 39 18 11 S.fct <- function(p) { p[1]/(p[1]+p[2]) - p[3]/(p[3]+p[4]) } ci.table(y,S.fct,lowerbound=-1,upperbound=1)
[,1] Shat -0.6206897 ase 0.09015582 add.to.zero.for.ase 0.01470588 lowCI -0.7731197 highCI -0.4400254 cc "95%" type "score" pdlambda 1 lowWCI -0.7973918 highWCI -0.4439875 lowCI.TestStat 3.841456 highCI.TestStat 3.841459 cv 3.841459 iterlow 3 iterhigh 3 calls.to.mph.fit 13 version "ci.table (ver 1.1, 2/17/09)" time.elapsed 0.17
################################################################### # GAMMA VARIANT: Example 2.3 in Lang, J.B. (2008), Stat Medicine. ################################################################### # # y = (25, 25, 12 // 0, 1, 3) ~ prod Mult(62, p11,p12,p13 // 4, p21,p22,p23) # # Goal: Compute an approximate 95% CI for Gamma* parameter # as described in (Lang 2008, Stat Medicine) # y <- scan() 25 25 12 0 1 3 strata <- c(1,1,1,2,2,2) S.fct <- function(p) { p <- matrix(p, 2,3,byrow=T) P.case.gt.control <- (p[2,2]+p[2,3])*p[1,1] + p[2,3]*p[1,2] P.case.lt.control <- p[1,2]*p[2,1] + p[1,3]*(p[2,1]+p[2,2]) P.case.neq.control <- P.case.gt.control + P.case.lt.control P.case.gt.control/P.case.neq.control } ci.table(y,S.fct,0,1,strata=strata)
[,1] Shat 0.9358289 ase 0.06654827 add.to.zero.for.ase 0.01515152 lowCI 0.5250232 highCI 0.98971 cc "95%" type "score" pdlambda 1 lowWCI 0.8053967 highWCI 1.066261 lowCI.TestStat 3.841451 highCI.TestStat 3.841457 cv 3.841459 iterlow 4 iterhigh 3 calls.to.mph.fit 14 version "ci.table (ver 2.0, 5/18/09)" time.elapsed 0.37
ci.table(y,S.fct,0,1,strata=strata,type="wald") Shat 0.9358289 ase 0.06654827 add.to.zero.for.ase 0.01515152 type "wald" lowWCI 0.8053967 highWCI 1.066261 version "ci.table (ver 2.0, 5/18/09)" time.elapsed 0
### Alternative code... gammastar.fct <- function(p) { nr <- nrow(p) nc <- ncol(p) probC <- 0;probD <- 0 for (i in 1:(nr-1)) { for (j in 1:(nc-1)) { Aij <- 0 for (h in (i+1):nr) { for (k in (j+1):nc) { Aij <- Aij + p[h,k] } } probC <- probC + p[i,j]*Aij } } for (i in 1:(nr-1)) { for (j in 2:nc) { Aij <- 0 for (h in (i+1):nr) { for (k in 1:(j-1)) { Aij <- Aij + p[h,k] } } probD <- probD + p[i,j]*Aij } } probC/(probC+probD) } strata <- c(1,1,1,2,2,2) S.fct <- function(p) { p <- matrix(p,2,3,byrow=T) gammastar.fct(p) } y <- scan() 25 25 12 0 1 3 a <- ci.table(y,S.fct,0,1,strata=strata,save=T) b <- ci.table(y,S.fct,0,1,strata=strata,type="pd",save=T) cbind(a,b)
a b Shat 0.9358289 0.9358289 ase 0.06654827 0.06654827 add.to.zero.for.ase 0.01515152 0.01515152 lowCI 0.5250232 0.5367156 highCI 0.98971 0.9917275 cc 0.95 0.95 type "score" "power divergence" pdlambda 1 0.6666667 lowWCI 0.8053967 0.8053967 highWCI 1.066261 1.066261 lowCI.TestStat 3.841451 3.841464 highCI.TestStat 3.841457 3.841417 cv 3.841459 3.841459 iterlow 4 5 iterhigh 3 3 gSvalues Numeric,13 Numeric,14 statvalues Numeric,13 Numeric,14 normvalues Numeric,13 Numeric,14 warnvalues Character,13 Character,14 calls.to.mph.fit 14 15 version "ci.table (ver 2.0, 5/18/09)" "ci.table (ver 2.0, 5/18/09)" time.elapsed 0.48 1.24
################################################################### # GLOBAL ODDS RATIO ################################################################### # # y = (25, 25, 12 // 0, 1, 3) ~ prod Mult(62, p11,p12,p13 // 4, p21,p22,p23) # # Goal: Compute an approximate 95% CI for a Global Odds Ratio parameter # as described in (Lang 2008, Stat Medicine) # y <- scan() 25 25 12 0 1 3 strata <- c(1,1,1,2,2,2) S.fct <- function(p) { p <- matrix(p, 2,3,byrow=T) p[1,1]*(p[2,2]+p[2,3])/(p[2,1]*(p[1,2]+p[1,3])) } ci.table(y,S.fct,0,NA,strata=strata)
Shat Inf ase 1452.629 add.to.zero.for.ase 0.01515152 lowCI 0.6611751 highCI Inf cc "95%" type "score" pdlambda 1 lowWCI Inf highWCI Inf lowCI.TestStat 3.84153 highCI.TestStat Numeric,0 cv 3.841459 iterlow 4 iterhigh 0 calls.to.mph.fit 8 version "ci.table (ver 2.0, 5/18/09)" time.elapsed 0.13 ####################################################### # DIFFERENCE BETWEEN PRODUCT-MULTINOMIAL PROBABILITIES: # Example 2.2 in Lang, J.B. (2008), Stat Medicine. ####################################################### # #Source (secondary): Agresti 2002:65 #Early study of the death penalty in Florida (Radelet) #Victim Black... # White Defendant 0/9 received Death Penalty # Black Defendant 6/103 received Death Penalty # # Model: y=(0,9 // 6, 97) <- Y ~ prod mult(9, p1, p2 // 103, p3, p4) # Goal: Compute an approximate 95% CI for p1 - p3. # y <- c(0,9,6,97) strata <- c("white","white","black","black") S.fct <- function(p) { p[1]-p[3] } a1 <- ci.table(y,S.fct,-1,1,strata=strata,save=T) #Consider the alternative full-multinomial data model... # y = (0, 9, 6, 97) <- Y ~ mult(112, p1,p2,p3,p4) # Goal: Compute an approximate 95% CI for p1/(p1+p2) - p3/(p3+p4). # # S.fct <- function(p) { p[1]/(p[1]+p[2]) - p[3]/(p[3]+p[4]) } a2 <- ci.table(y,S.fct,-1,1,save=T) cbind(a1,a2)
a1 a2 Shat -0.05825243 -0.05825243 ase 0.02534787 0.02534787 add.to.zero.for.ase 0.008928571 0.008928571 lowCI -0.1213022 -0.1213022 highCI 0.2427237 0.2427237 cc 0.95 0.95 type "score" "score" pdlambda 1 1 lowWCI -0.1079333 -0.1079333 highWCI -0.008571523 -0.008571523 lowCI.TestStat 3.841458 3.841458 highCI.TestStat 3.841449 3.841449 cv 3.841459 3.841459 iterlow 4 4 iterhigh 5 5 gSvalues Numeric,15 Numeric,15 statvalues Numeric,15 Numeric,15 normvalues Numeric,15 Numeric,15 warnvalues Character,15 Character,15 calls.to.mph.fit 16 16 version "ci.table (ver 2.0, 5/18/09)" "ci.table (ver 2.0, 5/18/09)" time.elapsed 0.29 0.25 ci.table(y,S.fct,-1,1,type="pd")
Shat -0.05825243 ase 0.02534787 add.to.zero.for.ase 0.008928571 lowCI -0.1189673 highCI 0.2169931 cc "95%" type "power divergence" pdlambda 0.6666667 lowWCI -0.1079333 highWCI -0.008571523 lowCI.TestStat 3.841459 highCI.TestStat 3.841456 cv 3.841459 iterlow 4 iterhigh 5 calls.to.mph.fit 16 version "ci.table (ver 2.0, 5/18/09)" time.elapsed 0.27 ## Alternative (artificial) data that is even more sparse... ci.table(y=c(0,9,0,97),S.fct,-1,1)
[,1] Shat 0 ase 0.01082153 add.to.zero.for.ase 0.009433962 lowCI -0.03809444 highCI 0.2991438 cc "95%" type "score" pdlambda 1 lowWCI -0.02120981 highWCI 0.02120981 lowCI.TestStat 3.8415 highCI.TestStat 3.841436 cv 3.841459 iterlow 3 iterhigh 5 calls.to.mph.fit 15 version "ci.table (ver 1.1, 2/17/09)" time.elapsed 0.29 ####################################################################### # KAPPA COEFFICIENT: Example 2.4 in Lang, J.B. (2008), Stat Medicine. ####################################################################### # # y = (4,0,0,0,1,0,0,0,15) <- Y ~ mult(20, p11,p12,...,p33) # Goal: Compute an approximate 95% CI for the # unweighted kappa coefficient. # y <- c(4,0,0,0,1,0,0,0,15) S.fct <- function(p) { p <- matrix(p,3,3,byrow=T) s1 <- p[1,1]+p[2,2]+p[3,3] prow <- apply(p,1,sum) pcol <- apply(p,2,sum) s2 <- prow[1]*pcol[1]+prow[2]*pcol[2]+prow[3]*pcol[3] (s1-s2)/(1-s2) } a1 <- ci.table(y,S.fct,-1,1,save=T); a2 <- ci.table(y,S.fct,-1,1,type="lr",save=T) a3 <- ci.table(y,S.fct,-1,1,type="pd",save=T) cbind(a1[c(1:10,20,22)],a2[c(1:10,20,22)],a3[c(1:10,20,22)])
[,1] [,2] [,3] Shat 1 1 1 ase 0.06592083 0.06592083 0.06592083 add.to.zero.for.ase 0.05 0.05 0.05 lowCI 0.635835 0.766156 0.665659 highCI 1 1 1 cc 0.95 0.95 0.95 type "score" "likelihood ratio" "power divergence" pdlambda 1 0 0.6666667 lowWCI 0.8707976 0.8707976 0.8707976 highWCI 1.129202 1.129202 1.129202 calls.to.mph.fit 9 8 10 time.elapsed 4.65 4.61 5
a1$version
[1] "ci.table (ver 1.1, 2/17/09)" #######################################################################
CI.TABLE INPUT and OUTPUT: (The text below is found in ci.table.Rcode.txt)
# # Implementation Instructions: # # 1. Source ci.table.Rcode.txt (this file). This creates the two (2) R functions: # ci.table and PD.fct. # Syntax: R> source("ci.table.Rcode.txt") # # 2. Source mph.Rcode.txt. This creates the nine (9) R functions: # mph.fit, mph.summary, num.deriv.fct, create.U, create.Z.ZF, # check.homog, check.HLP, block.fct, M.fct # Syntax: R> source("mph.Rcode.txt") # # For examples of how to use ci.table, go to # www.stat.uiowa.edu/~jblang/ci.table.documentation # # ci.table <- function(y,S.fct,lowerbound=NA,upperbound=NA,type="score",pdlambda=2/3, cc=0.95, strata=rep(1,length(y)),fixed.strata="all", add.to.zero.for.ase=NA,y.eps=0,norm.diff.conv=100, norm.score.conv=1e-5,diffc=0.0001,step=1,app.low=1,app.up=1, maxiter=25,max.mph.iter=300,epsm=-1,print.output=TRUE,save=FALSE) { #ci.table (version 2.0) # # Computes a score, likelihood ratio, power-divergence, or Wald # confidence interval for a contingency table parameter, S(p), # where p is a vector of table probabilities. # The contingency table counts y are modeled as a realization of # a multinomial-Poisson random vector. # # In symbols, # # ci.table computes a confidence interval for S(p) based on data # y <- Y ~ MP(nu, p | strata=s, fixed.strata=F). # # For details on the MP distribution specification, see the mph.fit documentation. # #Author: Joseph B. Lang #Created: June 2008, last updated: 2/27/09, 5/18/09 # #Primary References: # # Lang, J.B. (2008). "Score and Profile Likelihood Confidence Intervals for # Contingency Table Parameters," Statistics in Medicine, 27, 5975-5990. # DOI: 10.1002/sim.3391 # # Lang, J.B. (2004). "Multinomial Poisson Homogeneous Models for Contingency # Tables," Annals of Statistics, 32, 340-383. # # Lang, J.B. (2005). "Homogeneous Linear Predictor Models for Contingency Tables," # Journal of the American Statistical Association, 100, 121-134. # #Subroutines: ci.table uses PD.fct and mph.fit (and its subroutines). # See the implementation instructions above. # #Input: # # Required: y =vector of table counts # S.fct = function object with single argument p, the table probabilities. # S.fct(p) is the parameter of interest. # # Note: The table counts are [cross-]classification counts. Generically, # y[1] = number of observations with (C = 1), # y[2] = number of observations with (C = 2), # etc, # where C is the [composite] classification variable. # # The [composite] variable C has [joint] distribution characterized by the # probability vector P, defined as P[i] = P(C=i). # # The table probabilities p are interpreted as p = D^{-1}(ZZ'P)P, # where P is the vector of joint probabilities and Z is the strata matrix # corresponding to strata=s. Unless strata=rep(1,length(y)), # that is, unless a single sample is taken from a single strata, the table # probabilities are conditional probabilities. # # Optional: lowerbound= smallest possible value for S(p). # upperbound= largest possible value for S(p). # Defaults: lowerbound=NA means no lower bound # upperbound=NA means no upper bound # !THE USER IS STRONGLY ENCOURAGED TO ENTER NUMERIC VALUES FOR # LOWERBOUND AND UPPERBOUND! (This can mitigate convergence # problems.) # type = interval type, either "score", "lr", "pd", or "wald" # Default: type="score" # If type == "lr" the likelihood-ratio (equiv profile likelihood) # confidence interval is computed. # If type == "pd" the power-divergence statistic with lambda=pdlambda # is used to compute the confidence interval. # If type == "wald" only the wald confidence interval is computed. # If type is not any of the four options, the score interval is computed. # pdlambda = the parameter used to define the power-divergence statistic. # This parameter is ignored unless type=="pd". Default: pdlambda=2/3. # cc = the confidence coefficient. Default: cc=0.95. # # strata = vector of the same length as y that gives the stratum # membership identifiers. Default: strata = rep(1,length(y)); # i.e. the default is the single stratum (non-stratified) setting. # Examples, strata=A or strata=c(1,1,1,2,2,2,3,3,3) or # strata=paste(sep="","A=",A,",B=",B) # fixed.strata = the object that gives information on which stratum have # fixed sample sizes. It can equal one of the keywords, # fixed.strata = "all" or fixed.strata = "none", or it can # be a vector of stratum membership identifiers, e.g. # fixed.strata=c(1,3) or fixed.strata=c("pop1","pop5"). # (default: fixed.strata="all") # add.to.zero.for.ase = positive constant added to zero counts for # purposes of computing standard errors for Wald intervals. # Default: add.to.zero.for.ase = NA, which implies # add.to.zero.for.ase = min(0.1,1/sum(y)) # y.eps = amount to initially add to each count in y, as used in mph.fit # norm.diff.conv = convergence criterion used in mph.fit. # Default: norm.diff.conv = 100 (i.e. this criterion is # effectively turned off) # norm.score.conv = convergence criterion used in mph.fit. # diffc = convergence criterion based on the difference between # current and previous test stat values. # step = step halving parameter as used in mph.fit # app.low, app.up rate parameters that control how fast the # intermediate critical values approach the ultimate critical # value (which for cc=0.95 is 3.8415). # maxiter = maximum number of iterations for the CI root finding algorithm. # max.mph.iter = maximum number of iterations as used in mph.fit # epsm = parameter that controls how far to move from # the empirical estimate Shat (or gShat) when # deriving initial iterates for the CI root # solving algorithm. For example, for the upper root, # the inital iterates are gShat+eps, gShat+2*eps, gShat+3*eps, # where, if epsm >= 0, eps=epsm, and if epsm < 0, # eps = 0.5*ase. Default: epsm = -1. # print.output = logical variable. If TRUE then print output, otherwise # suppress printing. # save = logical variable. If save=FALSE no output is saved. If # save=TRUE and "a <- ci.table(...)" is submitted then # output is saved in "a". # Output # Shat = S.fct(phat), phat = sample proportions # ase = approx std error of S.fct(phat) # add.to.zero.for.ase = amount added to zero counts for computing ase. # lowCI, highCI = endpoints of confidence interval # cc = nominal confidence coefficient # type = interval type, either "score" "likelihood ratio" # or "power divergence" # pdlambda = power-divergence lambda value used # lowWCI, highWCI = endpoints of Wald confidence interval # lowCI.TestStat, # highCI.TestStat = test statistic values when the parameter equals the # lower and upper endpoint of the confidence interval. # (should be close to the critical value) # cv = qchisq(cc,1) # iterlow, iterhigh= number of iterations required to find lower and # upper endpoints # gSvalues, statvalues, normvalues, warnvalues = keep track of the g(S(p)) values, # TestStat values, norm score values, and warning messages that # result from mph.fit. # calls.to.mph.fit = total number of times restricted MLe had to # be computed. # version = version of ci.table that was used # time.elapsed = time in seconds that it took to compute the confidence interval. # # Note 1: To mitigate boundary problems, if a lower and/or upper bound are input, then # a transformed version of the parameter that can take values in -Inf to +Inf # is used. After finding confidence bounds for the transformed parameter, # the endpoints are transformed back to the original scale. # Note 2: When Shat = - or +Inf, setting y.eps equal to some positive constant can # lead to convergence of the algorithm. (To make sure the original counts # are ultimately used to find the ci bounds, the program should be run # again with a very different y.eps value.) # Note 3: Fine tune with step, app.up, app.low, y.eps, and epsm values. If # lots of 0 counts setting step to a smaller value and perhaps y.eps equal # to some positive number may help with convergence. If TestStat values are # sensitive to small changes setting app.low and app.up to smaller values # like 0.5 or 0.25 may help. # #
Last Updated: 05/18/2009 02:13:16 PM, Joseph B. Lang