﻿ Just the Basics:  MPH.FIT for Standard Contingency Table Models

Home > Basic Numerical Examples

Just the Basics:   Using mph.fit to fit standard contingency table models

Author:  Joseph B. Lang,  Department of Statistics and Actuarial Science, University of Iowa,  Iowa City, IA USA 52245   (Last Updated:  5/22/09)

The  numerical examples below were fitted using mph.fit, VERSION 3.1.    This document only includes input, not output.

Primary References:

Lang, J.B (2004).  "Multinomial-Poisson Homogeneous Models for Contingency Tables,"   Annals of Statistics, 32, 340-383 .

Lang, J.B. (2005). "Homogeneous Linear Predictor Models for Contingency Tables,"   JASA, 100, 121-134

EXAMPLE 1.  Test whether a binomial probability equals 0.5.
EXAMPLE 2.  Test whether a multinomial probability vector...(i) is uniform, (ii) equals a specified value.
EXAMPLE 3.  Test whether a multinomial probability vector satisfies a particular constraint.
EXAMPLE 4.  Test of Independence in a 2x2 Table.
EXAMPLE 5.  Test of Independence in a 4x4 Table.  (Using Log-Linear Model.)
EXAMPLE 6.  Test Marginal Homogeneity in a 3x3 Table
EXAMPLE 7.   Log-linear Model for 2x2x2 Table
EXAMPLE 8.   Fit Linear-by-Linear Log-Linear Model
EXAMPLE 9.   Marginal Standardization of a Contingency Table
EXAMPLE 10. Fit Proportional and Non-Proportional Odds Cumulative Logit Model
```#############################################################################################
#
# EXAMPLE 1.  Test whether a binomial probability equals 0.5
#
#  y=(15, 22) <-  Y ~ MP(nu,p|strata=1,fixed="all");  i.e. Y ~ multinomial
#
#  In other symbols,
#
#     y=(15, 22) <-  Y=(Y[1], Y[2])  ~ multinomial(37, p=(p[1],p[2])).
#
#  GOAL:    Test H0:  p[1] = 0.5   vs.  H1:  not H0.

a1 <- mph.fit(y=c(15, 22), constraint=function(p){p[1]-0.5})

#Alternative specifications...
a2 <- mph.fit(y=c(15, 22), constraint=function(p){p[1]-p[2]})
a3 <- mph.fit(y=c(15, 22), constraint=function(p){log(p[1]/p[2])})
a4 <- mph.fit(y=c(15, 22), constraint=function(m){m[1]-m[2]},h.mean=T)
a5 <- mph.fit(y=c(15, 22), link=function(p){p}, X=matrix(1,2,1))

#
#  Alternatively, assume that
#
#      y=(15, 22) <- Y ~ MP(nu, p|strata=1,fixed="none"); i.e. Y ~ indep Poisson.
#
#  In other symbols,
#      y=(15, 22) <- Y = (Y[1],Y[2]),  where  Y[i] indep ~  Poisson(nu p[i]), i=1,2.
#
#  GOAL:     Test H0:  p[1] = 0.5  vs.  H1: not H0.

b1 <- mph.fit(y=c(15, 22), constraint=function(p){p[1]-0.5},fixed.strata="none")

mph.summary(a1,T)
mph.summary(b1,T)

##############################################################################################
#
#  EXAMPLE 2. Test whether a multinomial probability vector is uniform.
#             Test whether a multinomial probability vector equals a specific value.
#
#   y <- Y=(Y[1],...,Y[6]) ~ MP(nu, p|strata=1, fixed="all");  i.e. Y ~ multinomial.
#
#   In other symbols,
#
#   y <- Y ~ multinomial(15, p=(p[1],...,p[6]))
#
#   GOAL:   Test H0: p[1]=p[2]=...=p[6]   vs. H1:  not H0.
#

y <- rmultinom(1,15,rep(1,6))
a1 <- mph.fit(y,L.fct = function(p){p}, X=matrix(1,6,1), y.eps=0.1)

#Alternative specification...
a2 <- mph.fit(y,h.fct = function(p){as.matrix(p[-6]-1/6)},y.eps=0.1)

mph.summary(a1,T); mph.summary(a2,T)

#Test whether p = (1, 2, 3, 1, 2, 3)/12...
#
p0 <- c(1,2,3,1,2,3)/12
b <-  mph.fit(y,h.fct= function(p){as.matrix(p[-6]-p0[-6])},y.eps=0.1)
mph.summary(b,T)

###############################################################################################
#
#  EXAMPLE 3.  Test whether a multinomial probability vector satisfies a particular constraint.
#
#   Data Source:  Agresti 25:2002.
#
#   y=(30,63,63) <- Y ~ MP(nu, p| strata=1, fixed="all");  i.e. Y ~ multinomial.
#
#   In other symbols,
#
#   y=(30,63,63)  <- Y ~ multinomial(156, p=(p[1],p[2],p[3]))
#
#   GOAL:    Test H0: p[1]+p[2] = p[1]/(p[1]+p[2])   vs. H1: not H0.
#

y <- c(30, 63, 63)
h.fct <- function(p) {
(p[1] + p[2]) - p[1]/(p[1]+p[2])
}
a <- mph.fit(y, h.fct=h.fct)
mph.summary(a,T)

##############################################################################################
#
#  EXAMPLE 4.  Test of Independence in a 2x2 Table.
#
#  y=(y[1,1],y[1,2],y[2,1],y[2,2]) = (25,18,13,21) <- Y ~ MP(nu, p|strata=1, fixed="all");  i.e. Y ~ multinomial
#
#  In other symbols,
#  y =(y[1,1],y[1,2],y[2,1],y[2,2]) <- Y ~ multinomial(77, p=(p[1,1],p[1,2],p[2,1],p[2,2]))
#
#  GOAL:    Test H0: p[1,1]*p[2,2]/p[1,2]/p[2,1] = 1    vs. H1: not H0.

d <- scan(what=list(A="",B="",count=0))
1 1 25
1 2 18
2 1 13
2 2 21

d <- data.frame(d)

a1 <- mph.fit(y=d\$count, h.fct=function(p){log(p[1]*p[4]/p[2]/p[3])})

# Alternative specifications of independence....
a2 <- mph.fit(y=d\$count, h.fct=function(p){p <- matrix(p,2,2,byrow=T); log(p[1,1]*p[2,2]/p[1,2]/p[2,1])})
a3 <- mph.fit(y=d\$count, h.fct=function(p){p[1]*p[4]/p[2]/p[3] - 1})
a4 <- mph.fit(y=d\$count, h.fct=function(p){p[1]/(p[1]+p[2]) - p[3]/(p[3]+p[4])})
a5 <- mph.fit(y=d\$count, L.fct="logm", X=model.matrix(~A+B,data=d))

# Suppose we wished to output observed and fitted values of log OR, OR, and P(B=1|A=1)-P(B=1|A=2)...

L.fct <- function(p) {
L <- as.matrix(c(
log(p[1]*p[4]/p[2]/p[3]),
p[1]*p[4]/p[2]/p[3],
p[1]/(p[1]+p[2]) - p[3]/(p[3]+p[4])
))
rownames(L) <- c("log OR","OR","P(B=1|A=1) - P(B=1|A=2)")
L
}

a6 <- mph.fit(y=d\$count,h.fct=function(p){log(p[1]*p[4]/p[2]/p[3])}, L.fct=L.fct,X=diag(3))

#  Unrestricted Model...
b <- mph.fit(y=d\$count, L.fct=L.fct, X=diag(3))

mph.summary(a6,T)
mph.summary(b,T)

##############################################################################################
#
#  EXAMPLE 5.  Test of Independence in a 4x4 Table.  (Using Log-Linear Model.)
#
#  Data Source:  Table 2.8, Agresti, 57:2002.
#
#  y <- Y ~ MP(nu, p|strata=1, fixed="all");  i.e. Y ~ multinomial
#
#  In other symbols,
#  y <- Y ~ multinomial(96, p=(p[1,1],p[1,2],p[2,1],p[2,2]))
#
#  GOAL:    Test H0: p[1,1]*p[2,2]/p[1,2]/p[2,1] = 1    vs. H1: not H0.
#

d <- scan(what=list(Income="",JobSatisf="",count=0))
<15 VD 1
<15 LD 3
<15 MS 10
<15 VS 6
15-25 VD 2
15-25 LD 3
15-25 MS 10
15-25 VS 7
25-40 VD 1
25-40 LD 6
25-40 MS 14
25-40 VS 12
>40 VD 0
>40 LD 1
>40 MS 9
>40 VS 11

d <- data.frame(d)

mph.summary(a)

#Alternatively,
mph.summary(b)

###############################################################################################
#
#  EXAMPLE 6.  Test Marginal Homogeneity in a 3x3 Table
#
#  Data Source:  Table 10.16, Agresti, 445:2002.
#
#  y <- Y ~ MP(nu, p |strata=1, fixed="all");  i.e. Y ~ multinomial.
#
#  Specifically,
#  y <- Y ~ multinomial(160, p=(p[1,1],...,p[3,3]))
#
#  GOAL:    Test H0:  p[1,+] = p[+,1], p[2,+] = p[+,2], p[3,+] = p[+,3] vs.  H1: not H0.

d <- scan(what=list(Siskel="",Ebert="",count=0))
Pro Pro     64
Pro Mixed    9
Pro Con     10
Mixed Pro   11
Mixed Mixed 13
Mixed Con    8
Con Pro     13
Con Mixed    8
Con Con     24

d <- data.frame(d)

h.fct <- function(p){
p.Siskel <- M.fct(d\$Siskel)%*%p
p.Ebert  <- M.fct(d\$Ebert)%*%p
as.matrix(c(p.Siskel[-3] - p.Ebert[-3]))
}
a1 <- mph.fit(y=d\$count, h.fct=h.fct)
mph.summary(a1,T)

#  Suppose that we wish to report on the observed and fitted marginal probabilities
#

L.fct <- function(p) {
p.Siskel <- M.fct(d\$Siskel)%*%p
p.Ebert <- M.fct(d\$Ebert)%*%p
L <- as.matrix(c(p.Siskel,p.Ebert))
rownames(L) <- c(paste(sep="", "P(Siskel=",levels(d\$Siskel),")"),
paste(sep="", "P(Ebert=", levels(d\$Ebert),")"))
L
}
a2 <- mph.fit(y=d\$count,h.fct=h.fct,L.fct=L.fct,X=diag(6))
mph.summary(a2,T)

#  M.fct(factor)%*%p  gives the marginal probabilities corresponding to the
#  levels of 'factor'.   The  marginal probabilities are ordered by the levels of 'factor'.
#
#  Alternatively,  in this rectangular table setting, we can find the marginal
#  probabilities using the apply(...) function.  In this case, the marginal
#  probabilities are ordered as they are entered in the data set.
#

h.fct <- function(p) {
p <- matrix(p,3,3,byrow=T)
p.Siskel <- apply(p,1,sum)
p.Ebert <- apply(p,2,sum)
as.matrix(c(p.Siskel[-3] - p.Ebert[-3]))
}

L.fct <- function(p) {
p <- matrix(p,3,3,byrow=T)
p.Siskel <- apply(p,1,sum)
p.Ebert <- apply(p,2,sum)
L <- as.matrix(c(p.Siskel,p.Ebert))
rownames(L) <- c("P(Siskel=Pro)","P(Siskel=Mixed)","P(Siskel=Con)",
"P(Ebert=Pro)", "P(Ebert=Mixed)", "P(Ebert=Con)")
L
}
b <- mph.fit(y=d\$count,h.fct=h.fct,L.fct=L.fct,X=diag(6))

##################################################################################################
#
# EXAMPLE 7.   Log-linear Model for 2x2x2 Table
#
# Data Source:  Table 8.16, Agresti 347:2002
#
#  y <- Y ~ MP(nu, p| strata=1, fixed="all");  i.e. Y ~ multinomial.
#
#  Specifically,
#
#  y <- Y ~ multinomial(621, p).
#
# The counts in y are cross-classification counts for variables
# G=Gender,  I=Information Opinion,   H= Health Opinion.
#
# GOAL:  Fit the loglinear models [GI, GH, IH] and [G, IH].
#

d <- scan(what=list(G ="",I="",H="",count=0))
Male   Support Support  76
Male   Support Oppose  160
Male   Oppose  Support   6
Male   Oppose  Oppose   25
Female Support Support 114
Female Support Oppose  181
Female Oppose  Support  11
Female Oppose  Oppose   48

d <- data.frame(d)

#Fit loglinear model [GI, GH, IH]...

a1 <- mph.fit(y=d\$count, link="logm", X=model.matrix(~G+I+H + G:I + G:H + I:H, data=d))

#Fit loglinear model [G, IH]...

a2 <- mph.fit(y=d\$count, link="logm", X=model.matrix(~G+I+H + I:H,data=d))

#Different Sampling Distribution Assumptions:
#
#Alternatively,  assume
#   y <- Y ~ MP(nu, p| strata=1, fixed="none");    that is,  Y ~ indep Poisson.
#
#   In other symbols,
#   y <- Y,  where Y[i] indep ~  Poisson(m[i] = nu p[i]).
#                     Here,  nu is the unknown expected sample size.

b2 <- mph.fit(y=d\$count, link="logm", X=model.matrix(~G+I+H + I:H, data=d), fixed="none")

#Alternatively,  assume
#   y <- Y ~ MP(nu, p| strata=Gender, fixed="all").   That is, Y ~ prod multinomial.
#
#   In other symbols,
#   y <- Y = (Y[1,1,1],Y[1,1,2],...,Y[2,2,2]),  where  (Y[i,1,1],...,Y[i,2,2]) indep ~ multinomial(n[i], p[i,,]).
#             Here,  p[i,j,k] = P(I=j,H=k|G=i) and    n[1]=267 and n[2]=354 are the a priori fixed
#             samples sizes for males and females.

c2 <- mph.fit(y=d\$count, link="logm", X=model.matrix(~G+I+H + I:H, data=d), strata=d\$G)

#Alternatively, assume
#   y <- Y ~ MP(nu, p| strata=Gender, fixed="none").  That is, Y ~ prod Poisson.
#
#   In other symbols,
#   y <- Y = (Y[1,1,1],Y[1,1,2],...,Y[2,2,2]),  where Y[i,j,k] indep ~ Poisson(m[i,j,k] = nu[i] p[i,j,k]).
#             Here,  p[i,j,k] = P(I=j, H=k| G=i) and  nu[1] and nu[2] are the unknown expected
#             sample sizes for males and for females.

d2 <- mph.fit(y=d\$count, link="logm", X=model.matrix(~G+I+H + I:H, data=d), strata=d\$G, fixed="none")

cbind(a2\$m,b2\$m,c2\$m,d2\$m, sqrt(diag(a2\$covm)),sqrt(diag(b2\$covm)),sqrt(diag(c2\$covm)),sqrt(diag(d2\$covm)))
cbind(a2\$p,b2\$p,c2\$p,d2\$p, sqrt(diag(a2\$covp)),sqrt(diag(b2\$covp)),sqrt(diag(c2\$covp)),sqrt(diag(d2\$covp)))

##########################################################################################################
#
# EXAMPLE 8.  Fit Linear-by-Linear Log-Linear Model
#
# Data Source:  Table 8.15,  Agresti, 345:2002
#
# y <- Y ~ MP(nu, p|strata=1, fixed="all");  i.e. Y ~ multinomial
#
# Specifically,
# y <- Y ~ multinomial(1425, p)
#
# GOAL:  Assess the fit of the linear-by-linear log-linear model.
#

d <- scan(what=list(Schooling="",Abortion = "", count=0))
<HS  Disapprove  209
<HS  Middle      101
<HS  Approve     237
HS   Disapprove  151
HS   Middle      126
HS   Approve     426
>HS  Disapprove   16
>HS  Middle       21
>HS  Approve     138

Schooling.score <- -1*(d\$Schooling=="<HS") + 0*(d\$Schooling=="HS") + 1*(d\$Schooling==">HS")
Abortion.score  <- -1*(d\$Abortion=="Disapprove") + 0*(d\$Abortion=="Middle") +  1*(d\$Abortion=="Approve")

d <- data.frame(d,Schooling.score,Abortion.score)

a <- mph.fit(y=d\$count, link="logm",X=model.matrix(~Schooling + Abortion + Schooling.score:Abortion.score,data=d))
mph.summary(a,T)

############################################################################################################
#
#  EXAMPLE 9.  Marginal Standardization of a Contingency Table
#
#  Data Source:  Table 8.15, Agresti 345:2002.
#
#  GOAL:
#     For a two-way table,  find the standardized values of y, say y*,
#     that  satisfy  (i)  y* has the same odds ratios as y and
#                    (ii) y* has row and column totals equal to 100.
#
#  Note:  This is equivalent to the problem of finding the fitted
#         values for the following model...
#             x <- Y ~ multinomial(n, p=(p[1,1],...,p[3,3]))
#                  p[1,+] = p[2,+] = p[3,+] = p[+,1] = p[+,2] = p[+,3] = 1/3
#                  p[1,1]*p[2,2]/p[2,1]/p[1,2] = or[1,1]
#                  p[1,2]*p[2,3]/p[2,2]/p[1,3] = or[1,2]
#                  p[2,1]*p[3,2]/p[3,1]/p[2,2] = or[2,1]
#                  p[2,2]*p[3,3]/p[3,2]/p[2,3] = or[2,2],
#                  where  or[i,j] = y[i,j]*y[i+1,j+1]/y[i+1,j]/y[i,j+1] are
#                  the observed (y) odds ratios.
#         If m is the vector of fitted values, then y* = m*300/sum(m)
#         are the standardized values of y.
#         Here  x can be any vector of 9 counts.
#         Choosing x so that the sum is 300 leads to sum(m) = 300, so that
#         y* = m in this case.
#
#
d <- scan(what=list(Schooling="",Abortion = "", count=0))
<HS  Disapprove  209
<HS  Middle      101
<HS  Approve     237
HS   Disapprove  151
HS   Middle      126
HS   Approve     426
>HS  Disapprove   16
>HS  Middle       21
>HS  Approve     138

d <- data.frame(d)

h.fct <- function(p) {
p.Schooling <- M.fct(d\$Schooling)%*%p
p.Abortion  <- M.fct(d\$Abortion )%*%p
p <- matrix(p,3,3,byrow=T)
as.matrix(c(
p.Schooling[-3]-1/3,p.Abortion[-3]-1/3,
p[1,1]*p[2,2]/p[2,1]/p[1,2] - 209*126/151/101,
p[1,2]*p[2,3]/p[2,2]/p[1,3] - 101*426/126/237,
p[2,1]*p[3,2]/p[3,1]/p[2,2] - 151*21/16/126,
p[2,2]*p[3,3]/p[3,2]/p[2,3] - 126*138/21/426
))
}

b <- mph.fit(y=d\$count,h.fct=h.fct)
ystar <-   b\$m*300/sum(b\$m)
matrix(round(ystar,1),3,3,byrow=T)

x <- c(rep(33,8),36)
b <- mph.fit(y=x,h.fct=h.fct)
ystar <- b\$m
matrix(round(ystar,1),3,3,byrow=T)

################################################################################################################
#
#  EXAMPLE 10.   Cumulative Logit Model
#
#  Data Source:  Table 7.19, Agresti, 306:2002.
#
#  y <- Y ~ MP(nu, p| strata=Therapy*Gender, fixed="all");  i.e. Y ~ prod multinomial
#
#  Here,  y[i,j,k] is the cross-classification count corresponding to
#  Therapy=i, Gender=j, Response=k.
#
#  The table probabilities are defined as p[i,j,k] = P(Response=k|Therapy=i,Gender=j).
#
#  Goal:  Fit the cumulative logit proportional odds model that includes
#         the main effect of Therapy and Gender.
#

d <- scan(what=list(Therapy="",Gender="",Response="",count=0))
Sequential Male Progressive 28
Sequential Male NoChange 45
Sequential Male Partial 29
Sequential Male Complete 26
Sequential Female Progressive 4
Sequential Female NoChange 12
Sequential Female Partial 5
Sequential Female Complete 2
Alternating Male Progressive 41
Alternating Male NoChange 44
Alternating Male Partial 20
Alternating Male Complete 20
Alternating Female Progressive 12
Alternating Female NoChange 7
Alternating Female Partial 3
Alternating Female Complete 1

d <- data.frame(d)
strata <- paste(sep="",d\$Therapy,".",d\$Gender)
d <- data.frame(d,strata)

d3 <- subset(d,Response!="Complete")
levels(d3\$Response) <- c(NA,"NoChange","Partial","Progressive")

L.fct <- function(p) {
p <- matrix(p,4,4,byrow=T)
clogit <- c()
for (s in 1:4) {
clogit <- c(clogit,
log(sum(p[s,1])  /sum(p[s,2:4])),
log(sum(p[s,1:2])/sum(p[s,3:4])),
log(sum(p[s,1:3])/sum(p[s,4]))
)
}
L <- as.matrix(clogit)
rownames(L) <- c(paste(sep="","log odds(R < ",2:4,"|",d3\$strata,")"))

L
}

a <- mph.fit(d\$count,link=L.fct,X=model.matrix(~ -1 + Response + Therapy + Gender,data=d3) ,strata=strata)

#  Fit the related non-proportional odds cumulative logit model