# R语言之Logistic回归分析

> library(RSADBE)
> data(sat)
> pass_probit <- glm(Pass~Sat,data=sat,binomial(probit))
> summary(pass_probit)
> library(pscl)
> pR2(pass_probit)
> predict(pass_probit,newdata=list(Sat=400),type = "response")
> predict(pass_probit,newdata=list(Sat=700),type = "response")

> library(RSADBE)
> data(sat)
> pass_logistic <- glm(Pass~Sat,data=sat,family = ‘binomial‘)
> summary.glm(pass_logistic)
> pR2(pass_logistic)
> with(pass_logistic, pchisq(null.deviance - deviance, df.null
+ - df.residual, lower.tail = FALSE))
> confint(pass_logistic)
> predict.glm(pass_logistic,newdata=list(Sat=400),type = "response")
> predict.glm(pass_logistic,newdata=list(Sat=700),type = "response")
> sat_x <- seq(400,700, 10)
> pred_l <- predict(pass_logistic,newdata=list(Sat=sat_x),type="response")
> plot(sat_x,pred_l,type="l",ylab="Probability",xlab="Sat_M")

1.使用分类和拟合函数对拟合值排序
2.将排序后的拟合值分为g组，g的值一般选择6-10
3.找到每组观察和预期的数
4.对这些组进行卡方拟合优度检验。

> pass_hat <- fitted(pass_logistic)
> hosmerlem <- function(y, yhat, g=10)     {
+   cutyhat <- cut(yhat,breaks = quantile(yhat, probs=seq(0,1, 1/g)), include.lowest=TRUE)
+      obs = xtabs(cbind(1 - y, y) ~ cutyhat)
+      expect = xtabs(cbind(1 - yhat, yhat) ~ cutyhat)
+      chisq = sum((obs - expect)^2/expect)
+      P = 1 - pchisq(chisq, g - 2)
+   return(list(chisq=chisq,p.value=P))
+                     }
> hosmerlem(pass_logistic\$y, pass_hat)

1.响应残差

2.异常残差

3.皮尔逊残差
4.局部残差
5.沃金残差

> library(RSADBE)
> data(sat)
> pass_logistic <- glm(Pass~Sat,data=sat,family = ‘binomial‘)
> par(mfrow=c(1,3), oma=c(0,0,3,0))
> plot(fitted(pass_logistic), residuals(pass_logistic,"response"), col="red", > xlab="Fitted Values", ylab="Response Residuals")
> points(fitted(pass_probit), residuals(pass_probit,"response"), col="green")
> abline(h=0)
> plot(fitted(pass_logistic), residuals(pass_logistic,"deviance"), col="red", > xlab="Fitted Values", ylab="Deviance Residuals")
> points(fitted(pass_probit), residuals(pass_probit,"deviance"), col="green")
> abline(h=0)
> plot(fitted(pass_logistic), residuals(pass_logistic,"pearson"), col="red",   xlab="Fitted Values", ylab="Pearson Residuals")
> points(fitted(pass_probit), residuals(pass_probit,"pearson"), col="green")
> abline(h=0)
> title(main="Response, Deviance, and Pearson Residuals Comparison for the Logistic and > Probit Models",outer=TRUE)

1.hatvalues值大于2(p+1)/2，可认为观测值有杠杆效应
2.Cooks距离比F分布的10%分位数大，可认为该观测值对参数估计有影响，如果超出50%分位数，则认为是强影响点
3.dfbetas、dffits的经验法则是，如果绝对值大于1，则认为观测值对协变量存在影响

> hatvalues(pass_logistic)
> cooks.distance(pass_logistic)
> dfbetas(pass_logistic)
> dffits(pass_logistic)
> cbind(hatvalues(pass_logistic),cooks.distance(pass_logistic),
dfbetas(pass_logistic),dffits(pass_logistic))
> hatvalues(pass_logistic)>2*(length(pass_logistic\$coefficients)-1)
/length(pass_logistic\$y)
> cooks.distance(pass_logistic)>qf(0.1,length(pass_logistic\$coefficients),
length(pass_logistic\$y)-length(pass_logistic\$coefficients))
> cooks.distance(pass_logistic)>qf(0.5,length(pass_logistic\$coefficients),
length(pass_logistic\$y)-length(pass_logistic\$coefficients))
> par(mfrow=c(1,3))
> plot(dfbetas(pass_logistic)[,1],ylab="DFBETAS - INTERCEPT")
> plot(dfbetas(pass_logistic)[,2],ylab="DFBETAS - SAT")
> plot(dffits(pass_logistic),ylab="DFFITS")

R语言之Logistic回归分析

(0)
(2)

© 2014 mamicode.com 版权所有 京ICP备13008772号-2