首页
Web开发
Windows程序
编程语言
数据库
移动开发
系统相关
微信
其他好文
会员
首页
>
Web开发
> 详细
(转)HttpURLConnection模拟登录后添加cookie读取网页
时间:
2016-05-09 14:03:11
阅读:
581
评论:
0
收藏:
0
[点我收藏+]
标签:
package fileTest;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
public class ConnTest {
public static void main(String args[]) throws Exception{
String
lianzaiUrl =
"http://tieba.baidu.com/p/1243174814?pn=";
String
loginAction =
"https://passport.baidu.com/?login?";
//取cookie
String
cookie =
getCookie("test","test",loginAction);
if(!cookie.contains("
USERID=")){
System.out.println("登录失败");
System.exit(1);
}
StringBuffer
result =
new StringBuffer();
StringBuffer
errorList =
new StringBuffer();
for(int
i=
1;i
<=3;i++){
String
allUrl =
getUrl(lianzaiUrl+i);
String all[] = allUrl.split(";");
for(int
x =
0;x
<
all.length;x++){//拿到每个帖子的地址
String
content =
doRead(cookie, all[x]);
if(null != content && !"".equals(content)){
result.append(content);
}else{
errorList.append(all[x] + "\r\n");
}
}
}
BufferedWriter
writer =
new BufferedWriter(new FileWriter(new File("F:\\遮天.txt")));
BufferedWriter
errorWriter =
new BufferedWriter(new FileWriter(new File("F:\\errorList.txt")));
writer.write(result.toString());
writer.flush();
writer.close();
errorWriter.write(errorList.toString());
errorWriter.flush();
errorWriter.close();
}
public static String doRead(String cookie,String url) throws IOException {
BufferedReader
reader =
null;
String
titleBegin =
"<h1>";
String
titleEnd =
"</h1>";
String
contentBegin = "class=\"
d_post_content\"
>";
String
contentEnd =
"</p>";
HttpURLConnection
conn = (HttpURLConnection) new URL(url).openConnection();
conn.setRequestProperty("Cookie",cookie);
reader =
new BufferedReader(new InputStreamReader(conn.getInputStream(),"gbk"));
String
line =
"";
StringBuffer
resultBuffer =
new StringBuffer();
while((
line =
reader.readLine()) != null){
resultBuffer.append(line);
}
String
result =
resultBuffer.toString();
int
titleBeginIndex =
result.indexOf(titleBegin) + titleBegin.length();
int
titleEndIndex =
result.indexOf(titleEnd);
if(titleBeginIndex
<
0 || titleEndIndex
<
0){
System.out.println("帖子不存在,url:"+url);
return null;
}
String
title =
result.substring(titleBeginIndex,titleEndIndex);
System.out.println("正在读取帖子:"+ title + "...");
String
content =
title + "\r\n";
while(result.contains(contentBegin)){
int
contentBeginIndex =
result.indexOf(contentBegin) + contentBegin.length();
result =
result.substring(contentBeginIndex);
int
contentEndIndex =
result.indexOf(contentEnd);
content += result.substring(0,contentEndIndex);
result =
result.substring(contentEndIndex + contentEnd.length());
}
conn.disconnect();
reader.close();
content =
content.replaceAll("
<
br
>","\r\n");
content =
content.replaceAll("
</
br
>","\r\n");
content += "\r\n";
return content;
}
/**
* 获得一连载贴内容中的所有超链接
* @param lianzaiUrl
* @return
* @throws Exception
*/
public static String getUrl(String lianzaiUrl) throws Exception{
URL
url =
new URL(lianzaiUrl);
HttpURLConnection
conn = (HttpURLConnection) url.openConnection();
BufferedReader
reader =
new BufferedReader(new InputStreamReader(conn.getInputStream(),"gbk"));
String
line =
"";
StringBuffer
buffer =
new StringBuffer();
StringBuffer
urlBuf =
new StringBuffer();
while((
line =
reader.readLine()) != null){
buffer.append(line);
}
String
result =
buffer.toString();
String
contentBegin = "class=\"
d_post_content\"
>";
String
contentEnd =
"</p>";
String
urlBegin =
"<a href=\"";
String
urlEnd =
"\"";
while(result.contains(contentBegin)){
int
contentStartIndex =
result.indexOf(contentBegin) + contentBegin.length();
result =
result.substring(contentStartIndex);
int
contentEndIndex =
result.indexOf(contentEnd);
String
content =
result.substring(0,contentEndIndex);
while (content.contains(urlBegin)){
int
urlBeginIndex =
content.indexOf(urlBegin) + urlBegin.length();
content =
content.substring(urlBeginIndex);
int
urlEndIndex =
content.indexOf(urlEnd);
String
href =
content.substring(0,urlEndIndex).trim();
/*http://tieba.baidu.com/p/1196506653
http://tieba.baidu.com/p/1196506653?
see_lz=
1
http://tieba.baidu.com/f?
kz=
1127473409
http://tieba.baidu.com/p/1127473409?
see_lz=
1*/
//http://tieba.baidu.com/f?
kz=
1127600193
//http://tieba.baidu.com/p/1127600193?
see_lz=
1
//将超链接转为只看楼主模式
if(href.contains("http://tieba.baidu.com/f?")){
String
kz =
href.substring("http://tieba.baidu.com/f?
kz=".length());
href =
"http://tieba.baidu.com/p/" + kz.trim() + "?
see_lz=
1";
}else{
href += "?
see_lz=
1";
}
urlBuf.append(href + ";");
content =
content.substring(urlEndIndex + urlEnd.length());
}
result =
result.substring(contentEndIndex + contentEnd.length());
}
reader.close();
return urlBuf.toString();
}
/**
* post方式登录
* @param username
* @param password
* @param loginAction
* @return
* @throws Exception
*/
public static String getCookie(String username,String password,String loginAction) throws Exception{
//登录
URL
url =
new URL(loginAction);
String
param =
"username="+username+"&
password="+password;
HttpURLConnection
conn = (HttpURLConnection) url.openConnection();
conn.setDoInput(true);
conn.setDoOutput(true);
conn.setRequestMethod("POST");
OutputStream
out =
conn.getOutputStream();
out.write(param.getBytes());
out.flush();
out.close();
String
sessionId =
"";
String
cookieVal =
"";
String
key =
null;
//取cookie
for(int
i =
1; (
key =
conn.getHeaderFieldKey(i)) != null; i++){
if(key.equalsIgnoreCase("set-cookie")){
cookieVal =
conn.getHeaderField(i);
cookieVal =
cookieVal.substring(0, cookieVal.indexOf(";"));
sessionId =
sessionId + cookieVal + ";";
}
}
return sessionId;
}
}
(转)HttpURLConnection模拟登录后添加cookie读取网页
标签:
原文地址:http://www.cnblogs.com/rwind/p/5473680.html
踩
(
0
)
赞
(
0
)
举报
评论
一句话评论(
0
)
登录后才能评论!
分享档案
更多>
2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)
周排行
更多
36.VUE — 认识 Webpack 和 安装
2021-07-28
【PHP】上传图片翻转问题
2021-07-28
php对数字进行万。亿的转化
2021-07-28
五个 .NET 性能小贴士
2021-07-28
Three.js中显示坐标轴、平面、球体、四方体
2021-07-28
.net 5+ 知新:【1】 .Net 5 基本概念和开发环境搭建
2021-07-27
1.html,css
2021-07-27
基于Docker搭建 Php-fpm + Nginx 环境
2021-07-27
nginx + http + svn
2021-07-27
kubernets kube-proxy的代理 iptables和ipvs
2021-07-26
友情链接
兰亭集智
国之画
百度统计
站长统计
阿里云
chrome插件
新版天听网
关于我们
-
联系我们
-
留言反馈
© 2014
mamicode.com
版权所有 联系我们:gaon5@hotmail.com
迷上了代码!