?? ch06.r
字號:
#-*- R -*-## Script from Fourth Edition of `Modern Applied Statistics with S'# Chapter 6 Linear Statistical Modelslibrary(MASS)library(lattice)options(echo = T,width=65, digits=5, height=9999)trellis.device(postscript, file="ch06.ps", width=8, height=6, pointsize=9)options(contrasts = c("contr.helmert", "contr.poly"))# 6.1 A linear regression examplexyplot(Gas ~ Temp | Insul, whiteside, panel = function(x, y, ...) { panel.xyplot(x, y, ...) panel.lmline(x, y, ...) }, xlab = "Average external temperature (deg. C)", ylab = "Gas consumption (1000 cubic feet)", aspect = "xy", strip = function(...) strip.default(..., style = 1))gasB <- lm(Gas ~ Temp, data = whiteside, subset = Insul=="Before")gasA <- update(gasB, subset = Insul=="After")summary(gasB)summary(gasA)varB <- deviance(gasB)/gasB$df.resid # direct calculationvarB <- summary(gasB)$sigma^2 # alternativegasBA <- lm(Gas ~ Insul/Temp - 1, data = whiteside)summary(gasBA)gasQ <- lm(Gas ~ Insul/(Temp + I(Temp^2)) - 1, data = whiteside)summary(gasQ)$coef# R: options(contrasts = c("contr.helmert", "contr.poly"))gasPR <- lm(Gas ~ Insul + Temp, data = whiteside)anova(gasPR, gasBA)oldcon <- options(contrasts = c("contr.treatment", "contr.poly"))gasBA1 <- lm(Gas ~ Insul*Temp, data = whiteside)summary(gasBA1)$coefoptions(oldcon)# 6.2 Model formulae and model matricesdat <- data.frame(a = factor(rep(1:3, 3)), y = rnorm(9, rep(2:4, 3), 0.1))obj <- lm(y ~ a, dat)(alf.star <- coef(obj))Ca <- contrasts(dat$a) # contrast matrix for `a'drop(Ca %*% alf.star[-1])dummy.coef(obj)N <- factor(Nlevs <- c(0,1,2,4))contrasts(N)contrasts(ordered(N))N2 <- Ncontrasts(N2, 2) <- poly(Nlevs, 2)N2 <- C(N, poly(Nlevs, 2), 2) # alternativecontrasts(N2)fractions(ginv(contr.helmert(n = 4)))Cp <- diag(-1, 4, 5); Cp[row(Cp) == col(Cp) - 1] <- 1Cpfractions(ginv(Cp))# 6.3 Regression diagnostics(hills.lm <- lm(time ~ dist + climb, data = hills))frame()par(fig = c(0, 0.6, 0, 0.55))plot(fitted(hills.lm), studres(hills.lm))abline(h = 0, lty = 2)# identify(fitted(hills.lm), studres(hills.lm), row.names(hills))par(fig = c(0.6, 1, 0, 0.55), pty = "s")qqnorm(studres(hills.lm))qqline(studres(hills.lm))par(pty = "m")hills.hat <- lm.influence(hills.lm)$hatcbind(hills, lev = hills.hat)[hills.hat > 3/35, ]cbind(hills, pred = predict(hills.lm))["Knock Hill", ](hills1.lm <- update(hills.lm, subset = -18))update(hills.lm, subset = -c(7, 18))summary(hills1.lm)summary(update(hills1.lm, weights = 1/dist^2))lm(time ~ -1 + dist + climb, hills[-18, ], weight = 1/dist^2)# hills <- hills # make a local copy (needed in S-PLUS)hills$ispeed <- hills$time/hills$disthills$grad <- hills$climb/hills$dist(hills2.lm <- lm(ispeed ~ grad, data = hills[-18, ]))frame()par(fig = c(0, 0.6, 0, 0.55))plot(hills$grad[-18], studres(hills2.lm), xlab = "grad")abline(h = 0, lty = 2)# identify(hills$grad[-18], studres(hills2.lm), row.names(hills)[-18])par(fig = c(0.6, 1, 0, 0.55), pty = "s")qqnorm(studres(hills2.lm))qqline(studres(hills2.lm))par(pty = "m")hills2.hat <- lm.influence(hills2.lm)$hatcbind(hills[-18,], lev = hills2.hat)[hills2.hat > 1.8*2/34, ]# 6.4 Safe predictionquad1 <- lm(Weight ~ Days + I(Days^2), data = wtloss)quad2 <- lm(Weight ~ poly(Days, 2), data = wtloss)new.x <- data.frame(Days = seq(250, 300, 10), row.names = seq(250, 300, 10))predict(quad1, newdata = new.x)predict(quad2, newdata = new.x)# predict.gam(quad2, newdata = new.x) # S-PLUS only# 6.5 Robust and resistant regression# library(lqs)phones.lm <- lm(calls ~ year, data = phones)attach(phones); plot(year, calls); detach()abline(phones.lm$coef)abline(rlm(calls ~ year, phones, maxit=50), lty = 2, col = 2)abline(lqs(calls ~ year, phones), lty =3, col = 3)# legend(locator(1), lty = 1:3, col = 1:3,# legend = c("least squares", "M-estimate", "LTS"))summary(lm(calls ~ year, data = phones), cor = F)summary(rlm(calls ~ year, maxit = 50, data = phones), cor = F)summary(rlm(calls ~ year, scale.est = "proposal 2", data = phones), cor = F)summary(rlm(calls ~ year, data = phones, psi = psi.bisquare), cor = F)lqs(calls ~ year, data = phones)lqs(calls ~ year, data = phones, method = "lms")lqs(calls ~ year, data = phones, method = "S")summary(rlm(calls ~ year, data = phones, method = "MM"), cor = F)# library(robust) # S-PLUS only# phones.lmr <- lmRob(calls ~ year, data = phones)# summary(phones.lmr)# plot(phones.lmr)hills.lmhills1.lm # omitting Knock Hillrlm(time ~ dist + climb, data = hills)summary(rlm(time ~ dist + climb, data = hills, weights = 1/dist^2, method = "MM"), cor = F)lqs(time ~ dist + climb, data = hills, nsamp = "exact")summary(hills2.lm) # omitting Knock Hillsummary(rlm(ispeed ~ grad, data = hills), cor = F)summary(rlm(ispeed ~ grad, data = hills, method="MM"), cor=F)# summary(lmRob(ispeed ~ grad, data = hills))lqs(ispeed ~ grad, data = hills)# 6.6 Bootstrapping linear modelslibrary(boot)fit <- lm(calls ~ year, data = phones)ph <- data.frame(phones, res = resid(fit), fitted = fitted(fit))ph.fun <- function(data, i) { d <- data d$calls <- d$fitted + d$res[i] coef(update(fit, data=d))}(ph.lm.boot <- boot(ph, ph.fun, R = 999))fit <- rlm(calls ~ year, method = "MM", data = phones)ph <- data.frame(phones, res = resid(fit), fitted = fitted(fit))(ph.rlm.boot <- boot(ph, ph.fun, R = 999))# 6.7 Factorial designs and designed experimentsoptions(contrasts=c("contr.helmert", "contr.poly"))(npk.aov <- aov(yield ~ block + N*P*K, data = npk))summary(npk.aov)alias(npk.aov)coef(npk.aov)options(contrasts=c("contr.treatment", "contr.poly"))npk.aov1 <- aov(yield ~ block + N + K, data = npk)summary.lm(npk.aov1)se.contrast(npk.aov1, list(N == "0", N == "1"), data = npk)model.tables(npk.aov1, type = "means", se = T)mp <- c("-", "+")(NPK <- expand.grid(N = mp, P = mp, K = mp))if(F) {blocks13 <- fac.design(levels = c(2, 2, 2), factor= list(N=mp, P=mp, K=mp), rep = 3, fraction = 1/2)blocks46 <- fac.design(levels = c(2, 2, 2), factor = list(N=mp, P=mp, K=mp), rep = 3, fraction = ~ -N:P:K)NPK <- design(block = factor(rep(1:6, each = 4)), rbind(blocks13, blocks46))i <- order(runif(6)[NPK$block], runif(24))NPK <- NPK[i,] # Randomizedlev <- rep(2, 7)factors <- list(S=mp, D=mp, H=mp, G=mp, R=mp, B=mp, P=mp)(Bike <- fac.design(lev, factors, fraction = ~ S:D:G + S:H:R + D:H:B + S:D:H:P))replications(~ .^2, data=Bike)}# 6.8 An unbalanced four-way layoutattach(quine)table(Lrn, Age, Sex, Eth)Means <- tapply(Days, list(Eth, Sex, Age, Lrn), mean)Vars <- tapply(Days, list(Eth, Sex, Age, Lrn), var)SD <- sqrt(Vars)par(mfrow = c(1, 2), pty="s")plot(Means, Vars, xlab = "Cell Means", ylab = "Cell Variances")plot(Means, SD, xlab = "Cell Means", ylab = "Cell Std Devn.")detach()boxcox(Days+1 ~ Eth*Sex*Age*Lrn, data = quine, singular.ok = T, lambda = seq(-0.05, 0.45, len = 20))logtrans(Days ~ Age*Sex*Eth*Lrn, data = quine, alpha = seq(0.75, 6.5, len = 20), singular.ok = T)quine.hi <- aov(log(Days + 2.5) ~ .^4, quine)quine.nxt <- update(quine.hi, . ~ . - Eth:Sex:Age:Lrn)dropterm(quine.nxt, test = "F")quine.lo <- aov(log(Days+2.5) ~ 1, quine)addterm(quine.lo, quine.hi, test = "F")quine.stp <- stepAIC(quine.nxt, scope = list(upper = ~Eth*Sex*Age*Lrn, lower = ~1), trace = F)quine.stp$anovadropterm(quine.stp, test = "F")quine.3 <- update(quine.stp, . ~ . - Eth:Age:Lrn)dropterm(quine.3, test = "F")quine.4 <- update(quine.3, . ~ . - Eth:Age)quine.5 <- update(quine.4, . ~ . - Age:Lrn)dropterm(quine.5, test = "F")# 6.9 Predicting computer performancepar(mfrow = c(1, 2), pty = "s")boxcox(perf ~ syct + mmin + mmax + cach + chmin + chmax, data = cpus, lambda = seq(0, 1, 0.1))cpus1 <- cpusattach(cpus)for(v in names(cpus)[2:7]) cpus1[[v]] <- cut(cpus[[v]], unique(quantile(cpus[[v]])), include.lowest = T)detach()boxcox(perf ~ syct + mmin + mmax + cach + chmin + chmax, data = cpus1, lambda = seq(-0.25, 1, 0.1))par(mfrow = c(1, 1), pty = "m")set.seed(123)cpus2 <- cpus[, 2:8] # excludes names, authors' predictionscpus2[, 1:3] <- log10(cpus2[, 1:3])#cpus.samp <- sample(1:209, 100)cpus.samp <-c(3, 5, 6, 7, 8, 10, 11, 16, 20, 21, 22, 23, 24, 25, 29, 33, 39, 41, 44, 45,46, 49, 57, 58, 62, 63, 65, 66, 68, 69, 73, 74, 75, 76, 78, 83, 86,88, 98, 99, 100, 103, 107, 110, 112, 113, 115, 118, 119, 120, 122,124, 125, 126, 127, 132, 136, 141, 144, 146, 147, 148, 149, 150, 151,152, 154, 156, 157, 158, 159, 160, 161, 163, 166, 167, 169, 170, 173,174, 175, 176, 177, 183, 184, 187, 188, 189, 194, 195, 196, 197, 198,199, 202, 204, 205, 206, 208, 209)cpus.lm <- lm(log10(perf) ~ ., data = cpus2[cpus.samp, ])test.cpus <- function(fit) sqrt(sum((log10(cpus2[-cpus.samp, "perf"]) - predict(fit, cpus2[-cpus.samp,]))^2)/109)test.cpus(cpus.lm)cpus.lm2 <- stepAIC(cpus.lm, trace=F)cpus.lm2$anovatest.cpus(cpus.lm2)# 6.10 Multiple comparisonsimmer.aov <- aov((Y1 + Y2)/2 ~ Var + Loc, data = immer)summary(immer.aov)model.tables(immer.aov, type = "means", se = T, cterms = "Var")if(F) {multicomp(immer.aov, plot = T)oats1 <- aov(Y ~ N + V + B, data = oats)summary(oats1)multicomp(oats1, focus = "V")multicomp(oats1, focus = "N", comparisons = "mcc", control = 1)lmat <- matrix(c(0,-1,1,rep(0, 11), 0,0,-1,1, rep(0,10), 0,0,0,-1,1,rep(0,9)),,3, dimnames = list(NULL, c("0.2cwt-0.0cwt", "0.4cwt-0.2cwt", "0.6cwt-0.4cwt")))multicomp(oats1, lmat = lmat, bounds = "lower", comparisons = "none")}(tk <- TukeyHSD(immer.aov, which = "Var"))plot(tk)oats1 <- aov(Y ~ N + V + B, data = oats)(tk <- TukeyHSD(oats1, which = "V"))plot(tk)## An alternative under R is to use package multcomp (which requires mvtnorm)## This code is for multcomp >= 0.991-1library(multcomp)## next is slow:(tk <- confint(glht(immer.aov, linfct = mcp(Var = "Tukey"))))plot(tk)confint(glht(oats1, linfct = mcp(V = "Tukey")))lmat <- matrix(c(0,-1,1,rep(0, 11), 0,0,-1,1, rep(0,10), 0,0,0,-1,1,rep(0,9)),,3, dimnames = list(NULL, c("0.2cwt-0.0cwt", "0.4cwt-0.2cwt", "0.6cwt-0.4cwt")))confint(glht(oats1, linfct = mcp(N = t(lmat[2:5, ])), alternative = "greater"))plot(tk)# End of ch06
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -