m1 <- lm(I(log(price)) ~ I(carat^(1/3)), data = diamonds)
m2 <- update(m1, ~ . + carat)
m3 <- update(m2, ~ . + cut)
m4 <- update(m3, ~ . + color)
m5 <- update(m4, ~ . + clarity)
mtable(m1, m2, m3, m4, m5)
diamondsbig$logprice <- log(diamondsbig$price)
m1 <- lm(logprice ~ I(carat^(1/3)), diamondsbig[diamondsbig$price < 10000 & diamondsbig$cert == "GIA", ])
m2 <- update(m1, ~ . + carat)
m3 <- update(m2, ~ . + cut)
m4 <- update(m3, ~ . + color)
m5 <- update(m4, ~ . + clarity)
mtable(m1, m2, m3, m4, m5, sdigits = 3)
# Predictions
thisDiamond = data.frame(carat = 1.00, cut = "V.Good", color = "I", clarity="VS1")
modelEstimate = predict(m5, newdata = thisDiamond, interval="prediction", level = .95)
dat = data.frame(m4$model, m4$residuals)
with(dat, sd(m4.residuals))
with(subset(dat, carat > .9 & carat < 1.1), sd(m4.residuals))
dat$resid <- as.numeric(dat$m4.residuals)
ggplot(aes(y = resid, x = round(carat, 2)), data = dat) +
geom_line(stat = "summary", fun.y = sd)