Dichotomous Logit and Probit

The purpose of this session is to show you how to use R's "canned" procedures for doing dichotomous Logit and Probit analysis. This includes obtaining predicted probabilities, predictions of the dependent variable, coefficients and marginal effects for the variables, model diagnostics, hypothesis tests, and the heteroskedastic Probit model. In addition, we provide programs that obtain each of these outputs the "hard" way for illustrative purposes only.

# The purpose of this session is to show you how to use some of R's "canned" procedures for doing

# dichotomous Probit and Logit analysis. We will also obtain predicted probabilities,

# predictions of the dependent variable, coefficients, marginal effects for the variables,

# model diagnostics, and hypothesis tests. In addition, we provide programs that obtain each

# of these outputs the "hard" way for illustrative purposes only.

# The data is a sample of 32 observations from a study by Spector and Mazzeo on pre- and post-tests

# for economic literacy. The variables are:

# GPA = grade point average

# TUCE = test score on teaching college level economics

# PSI = program participation variable (school district)

# GRADE = response: 1 if grade on post is higher than on pre,0 otherwise.

# First read the data.

Spector <- read.table("Documents/My Teaching/Maximum Likelihood/Data/AldrichNelson.txt", header=TRUE)

attach(Spector)

summary(Spector)

# We will first estimate a Probit and Logit using glm.

library(car)

library(stats)

library(MASS)

# Probit

probitglm.out <- glm(GRADE ~ GPA + TUCE + PSI, family = binomial(link=probit), data = Spector)

summary(probitglm.out)

# Put the summary into an R object

probitsummary <- summary(probitglm.out)

names(probitglm.out)

names(probitsummary)

# We can do likelihood ratio, Wald, and F tests on each of the individual coefficients in the model

# as follows. Note the upper case Anova. Note that the F test is inappropriate when using a binomial

# family distribution such as this one. It is here just for illustration.

Anova(probitglm.out, test = 'LR')

Anova(probitglm.out, test = 'Wald')

Anova(probitglm.out, test = 'F')

# We can do likelihood ratio and F tests for subsets of coefficients as follows. Note the lower case

# anova. Again, the F is inappropriate with the binomial family distribution.

anova(update(probitglm.out, GRADE ~ GPA + TUCE + PSI -PSI), probitglm.out, test = 'Chisq')

anova(update(probitglm.out, GRADE ~ GPA + TUCE + PSI -PSI), probitglm.out, test = 'F')

anova(update(probitglm.out, GRADE ~ GPA + TUCE + PSI -TUCE-PSI), probitglm.out, test = 'Chisq')

anova(update(probitglm.out, GRADE ~ GPA + TUCE + PSI -TUCE-PSI), probitglm.out, test = 'F')

# Logit

logitglm.out <- glm(GRADE ~ GPA + TUCE + PSI, family = binomial, data = Spector)

summary(logitglm.out)

# Following are some interesting values that can be extracted from the model for later use.

# The value of the log-likelihood at the maximum

loglik <- -1/2*probitglm.out$deviance

loglik

# Akaike's Information Criterion

aic <- probitglm.out$aic

aic

# Model Coefficients

beta <-probitglm.out$coefficients

beta

# Model Residuals

residuals <- probitglm.out$residuals

residuals

# Fitted Probabilities

fitted.probs <- probitglm.out$fitted.values

fitted.probs

# Covariance of the coefficients.

covb <- probitsummary$cov.unscaled

covb

# Now, get the underlying latent index, z, from some of this stuff and show how to get

# the set of probabilities for each case the hard way. Note that these correspond

# to fitted.probs above.

beta <- probitglm.out$coefficients

x <- cbind(1,as.matrix(Spector[,1:3]))

z <- x %*% beta

pz <- pnorm(c(z))

# Generating the model predictions and a prediction success table.

Predict <- recode(fitted.probs, 'lo:.499999=0 ; .5:hi=1; ; ', as.factor.result=FALSE)

Predict

table(Predict,GRADE)

# Calculate marginal effects with all variables at their means from the probit

# coefficients and a scale factor. These can be interpreted much like slopes.

# See table 21.1 on p. 675 of Greene.

beta <- probitglm.out$coefficients

xbar <- as.matrix(colMeans(cbind(1,Spector[1:3])))

zxbar <- t(xbar) %*% beta

scalefactor <- dnorm(zxbar)

scalefactor

margin <- scalefactor * beta[2:4]

margin

# Calculate marginal effects with all variables at their means from the logit

# coefficients and a scale factor. Again, these can be interpreted much like slopes.

# Compare the results to those on p. 675 of Greene.

beta <- logitglm.out$coefficients

xbar <- as.matrix(colMeans(cbind(1,Spector[1:3])))

zxbar <- t(xbar) %*% beta

lambda <- 1/(1+exp(-zxbar))

scalefactor <- lambda * (1-lambda)

scalefactor

margin <- scalefactor * beta[2:4]

margin

# The Zelig library is more flexible allowing us to do the same things as are done

# in STATA with Clarify, so we switch to Zelig for the remainder of the lesson.

# You might want to visit the Zelig web site to learn more about this program.

# It is at http://gking.harvard.edu/zelig. Look especially at the documentation

# either on line or in pdf format.

# Note that the shift to logit from probit is as easy as changing 'model = "probit"' to 'model = "logit"'.

library(Zelig)

probit.out <- zelig(GRADE ~ GPA + TUCE + PSI, model = "probit", data = Spector)

summary(probit.out)

# Execute the following to get the auxiliary outputs from probit.out and the summary object.

# Now for interpretation of the model.

#Simulate the predicted probability of making an A, PSI=0, rest at mean

x.out <- setx(probit.out, PSI = 0)

s.out <- sim(probit.out, x = x.out)

summary(s.out)

#Simulate the predicted probability of making an A, PSI=1, rest at mean

x.out <- setx(probit.out, PSI = 1)

s.out <- sim(probit.out, x = x.out)

summary(s.out)

# We could just take the difference between these to get a first difference. However, Zelig

# can do this in one step.

# Simulated first difference and risk ratios when Shifting PSI from 0 to 1 #

psi.low <- setx(probit.out, PSI = 0)

psi.high <- setx(probit.out, PSI = 1)

s.out <- sim(probit.out, x = psi.low, x1 = psi.high)

summary(s.out)

# You can also plot the results from the simulations in Zelig.

plot(s.out)

# Another example

# First Difference when Shifting GPA from 2 to 3 with PSI=0 and TUCE at mean#

gpa.low <- setx(probit.out, GPA=2, PSI=0)

gpa.high <- setx(probit.out, GPA = 3, PSI=0)

s.out <- sim(probit.out, x = gpa.low, x1 = gpa.high)

summary(s.out)

plot(s.out)

# ROC plots can be used to gauge the relative fit of two models using prediction success.

# In R, the best model is the one that comes closest to the top-right corner of the graph.

# A completely uninformative model would be one which falls along the diagonal reference line.

# For example, consider the models below where PSI has been omitted from the

# second model.

probit1.out <- zelig(GRADE ~ GPA + TUCE + PSI, model="probit", data=Spector)

probit2.out <- zelig(GRADE ~ GPA + TUCE , model="probit", data=Spector)

rocplot(probit1.out,probit2.out)

# Again, all that is needed to move to logit is changing the model = "probit" option

# to model = "logit". Just change the output object to logit.out for the other

# calculations. The only calculation that is different is for the marginal effects.

# Let's run the logit and calculate the marginal effects, leaving the other calculations

# above for you to do on your own.

logit.out <- zelig(GRADE ~ GPA + TUCE + PSI, model = "logit", data = Spector)

summary(logit.out)

# Now let's estimate a Probit the hard way and obtain the interesting quantities.

# First, create the data matrix, dependent vector, and other necessary quantities.

x <- cbind(1,GPA,TUCE,PSI)

y <- as.matrix(GRADE)

z <- as.matrix(PSI)

K <- ncol(x)

k <- ncol(z)

n <- nrow(x)

K1 <- K + 1

Kk <- K + k

# Define Probit Log Likelihood

llik.probit<-function(par, X, Y){

Y <- as.matrix(y)

X <- as.matrix(x)

b <-as.matrix(par[1:K])

phi<-pnorm(X%*%b, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE)

sum(Y*log(phi)+(1-Y)*log(1-phi))

}

#Fit Probit Model

values.start <- lm(GRADE ~ GPA + TUCE + PSI)$coef

mod.probit<-optim(values.start, llik.probit, Y=Y, X=X, method="BFGS",

control=list(maxit=2000, fnscale=-1, trace), hessian=T)

mod.probit

# Save the log likelihood for later use

LR <- mod.probit$value

# Calculate the variance matrix from the Hessian matrix.

v <- -solve(mod.probit$hessian)

# Calculate the standard errors from the variance matrix.

se <- sqrt( diag(v))

# Calculate the z statistics from the coefficients and standard errors

b <- mod.probit$par

zstat <-b/se

zstat

# Calculate p values from the z statistics

pz <- 2*(1-pnorm(abs(zstat)))

# Put the results together in a table.

table <- cbind(b,se,zstat,pz)

table

# Obtain the underlying latent index, z.

z <- x %*% b

# Calculate the probabilities for each value of z

pz <- pnorm(c(z))

# Now let's estimate a heteroskedastic probit model and test for heteroskedasticity using a likelihood

# ratio test. Posit the heteroskedasticity is a function of PSI

#Define Heteroskedastic Probit Log-Likelihood

llik.hprobit<-function(par, X, Y, Z){

Y <- as.matrix(y)

X <- as.matrix(x)

Z <- as.matrix(z)

b <- as.matrix(par[1:K])

g <- as.matrix(par[K1:Kk])

phi<-pnorm((X %*% b)/exp(Z %*% g), mean=0, sd=1, lower.tail = TRUE, log.p = FALSE)

sum(Y*log(phi)+(1-Y)*log(1-phi))

}

#Fit Heteroskedastic Probit Model

values.start <- c(coefficients(probit.out),1)

values.start

mod.hprobit<-optim(values.start, llik.hprobit, method="BFGS",

control=list(maxit=2000, fnscale=-1, trace), hessian=T)

mod.hprobit

# Save the log likelihood for later use

LU <- mod.hprobit$value

# Calculate the variance matrix from the Hessian matrix.

v <- -solve(mod.hprobit$hessian)

# Calculate the standard errors from the variance matrix.

se <- sqrt( diag(v))

# Calculate the z statistics from the coefficients and standard errors

b <- mod.hprobit$par

zstat <-b/se

zstat

# Calculate p values from the z statistics

pz <- 2*(1-pnorm(abs(zstat)))

# Put the results together in a table.

table <- cbind(b,se,zstat,pz)

table

# Obtain the underlying latent index, zh.

zh <- x %*% b[1:4]

# Calculate the probabilities for each value of zh

pz <- pnorm(c(zh))

# Do a likelihood ratio test for the heteroskedastic model vs. the homoskedastic model

j <- Kk - K1+1

lrtest <- -2*(LR-LU)

lrtest

lrpvalue <- 1-pchisq(lrtest, df=j)

lrpvalue

# Now estimate a logit the hard way.

# Define Logit Log Likelihood

llik.logit<-function(par, X, Y){

Y <- as.matrix(y)

X <- as.matrix(x)

b <-as.matrix(par[1:K])

Lambda<-1/(1+exp(-X%*%b))

sum(Y*log(Lambda)+(1-Y)*log(1-Lambda))

}

#Fit Logit Model

values.start <- lm(GRADE ~ GPA + TUCE + PSI)$coef

mod.logit<-optim(values.start, llik.logit, Y=Y, X=X, method="BFGS",

control=list(maxit=2000, fnscale=-1, trace), hessian=T)

mod.logit

# Save the log likelihood for later use

LR <- mod.logit$value

# Calculate the variance matrix from the Hessian matrix.

v <- -solve(mod.logit$hessian)

# Calculate the standard errors from the variance matrix.

se <- sqrt( diag(v))

# Calculate the z statistics from the coefficients and standard errors

b <- mod.logit$par

zstat <-b/se

zstat

# Calculate p values from the z statistics

pz <- 2*(1-pnorm(abs(zstat)))

# Put the results together in a table.

table <- cbind(b,se,zstat,pz)

table

# Obtain the underlying logit latent index, z.

z <- x %*% b

# Calculate the logit probabilities for each value of z

pz <- 1/(1+exp(-c(z)))