标签:style class blog code java http
//登录csdn
// String uri = "https://passport.csdn.net/account/login";
// String html = HttpUtil.DownHtml(uri);
// <input type="hidden" name="lt" value="LT-207426-moK0sGnfCa9aqijJKeLYhFDYiEe2id" />
// <input type="hidden" name="execution" value="e1s1" />
// <input type="hidden" name="_eventId" value="submit" />
// String lt = getGroup_1("name=\"lt\" value=\"(.*?)\"", html);
// String execution = getGroup_1("name=\"execution\" value=\"(.*?)\"", html);
// System.out.println(lt + "\t" + execution);
//
// //构建cookie
// Map<String, String> params = new HashMap<String,String>();
// params.put("_eventId", "submit");
// params.put("execution", execution);
// params.put("lt", lt);
// params.put("password", "biantai123");
// params.put("username", "tidal.melon@gmail.com");
//
// HttpUtil.Post(uri, params);
模拟登录流程:
1 请求host_url
2 从host_url中解析出 隐藏表单 的值 添加到POST_DATA中
3 添加账户,密码到POST_DATA中
4 编码后,发送POST请求
要点1:java下,HttpClient必须是单例模式
要点2:post的url可能跟登录界面的url不同。post_url可以从host_url的返回结果中得到(具体情况自行分析)
5 通过firefox,chrome等相关插件验证登录完成
6 测试需要登录的采集任务
登录插件工厂设计:
设计目标(粗略):
接口设计:
public interface ILogin {
//HttpClient
public HttpClient login(String usr, String pwd, Map<k, v> metadata);
}
example:
public class BaiduLogin implements ILogin {
@Override
public HttpClient login(String usr, String pwd, Map<k, v> metadata) {
System.out.println("萤火虫在飞");
}
}
//采用线程安全的方式返回HttpClient。。。返回HttpClient 或者 返回cookie 具体待定
public static synchronized HttpClient getSaveHttpClient(){
if(mHttpClient == null){
HttpParams params = new BasicHttpParams();
//设置基本参数
HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
HttpProtocolParams.setContentCharset(params, CHARSET);
HttpProtocolParams.setUseExpectContinue(params, true);
//超时设置
/*从连接池中取连接的超时时间*/
ConnManagerParams.setTimeout(params, 1000);
/*连接超时*/
HttpConnectionParams.setConnectionTimeout(params, 2000);
/*请求超时*/
HttpConnectionParams.setSoTimeout(params, 4000);
//设置HttpClient支持HTTp和HTTPS两种模式
SchemeRegistry schReg = new SchemeRegistry();
schReg.register(new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
schReg.register(new Scheme("https", SSLSocketFactory.getSocketFactory(), 443));
//使用线程安全的连接管理来创建HttpClient
ClientConnectionManager conMgr = new ThreadSafeClientConnManager(params, schReg);
mHttpClient = new DefaultHttpClient(conMgr, params);
}
return mHttpClient;
}
HttpClient(http://hc.apache.org/httpcomponents-client-ga/)自动管理了cookie信息,
只需要先传递登录信息执行登录过程,然后直接访问想要的页面,跟访问一个普通的页面没有任何区别,
因为HttpClient已经帮忙发送了Cookie信息。下面的例子实现了这样一个访问的过程。
http://xugou4-yahoo-com-cn.iteye.com/blog/1308457
http://blog.csdn.net/yanzi1225627/article/details/24937439
public class RenRen { // 配置参数 private static String userName = "邮箱地址"; private static String password = "密码"; private static String redirectURL = "http://blog.renren.com/blog/304317577/449470467"; //要抓取的网址 // 登录URL地址 private static String renRenLoginURL = "http://www.renren.com/PLogin.do"; // 用于取得重定向地址 private HttpResponse response; // 在一个会话中用到的httpclient对象 private DefaultHttpClient httpclient = new DefaultHttpClient(); //登录到页面 private boolean login() { //根据登录页面地址初始化httpost对象 HttpPost httpost = new HttpPost(renRenLoginURL); //POST给网站的所有参数 List<NameValuePair> nvps = new ArrayList<NameValuePair>(); nvps.add(new BasicNameValuePair("origURL", redirectURL)); nvps.add(new BasicNameValuePair("domain", "renren.com")); nvps.add(new BasicNameValuePair("isplogin", "true")); nvps.add(new BasicNameValuePair("formName", "")); nvps.add(new BasicNameValuePair("method", "")); nvps.add(new BasicNameValuePair("submit", "登录")); nvps.add(new BasicNameValuePair("email", userName)); nvps.add(new BasicNameValuePair("password", password)); try { httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8)); response = httpclient.execute(httpost); } catch (Exception e) { e.printStackTrace(); return false; } finally { httpost.abort(); } return true; } //取得重定向地址 private String getRedirectLocation() { Header locationHeader = response.getFirstHeader("Location"); if (locationHeader == null) { return null; } return locationHeader.getValue(); } //根据重定向地址返回内容 private String getText(String redirectLocation) { HttpGet httpget = new HttpGet(redirectLocation); // 创建一个响应处理器 ResponseHandler<String> responseHandler = new BasicResponseHandler(); String responseBody = ""; try { //取得网页内容 responseBody = httpclient.execute(httpget, responseHandler); } catch (Exception e) { e.printStackTrace(); responseBody = null; } finally { httpget.abort(); httpclient.getConnectionManager().shutdown();//关闭连接 } return responseBody; } public void printText() { if (login()) { String redirectLocation = getRedirectLocation(); if (redirectLocation != null) { System.out.println(getText(redirectLocation)); } } } public static void main(String[] args) { RenRen renRen = new RenRen(); renRen.printText(); } }
httpclient 模拟登录 及线程安全,布布扣,bubuko.com
标签:style class blog code java http
原文地址:http://www.cnblogs.com/i80386/p/3779252.html