码迷,mamicode.com
首页 > 编程语言 > 详细

<<< Java提取网页源码

时间:2014-07-07 11:02:39      阅读:193      评论:0      收藏:0      [点我收藏+]

标签:blog   http   java   os   io   line   

 

package com.sevennight;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;

public class zidongwenzi {

/**
  * @param args
  * @param 
  * @throws IOException 
  */
public static void main(String[] args) throws IOException {
  GetWebContent con=new GetWebContent();
  BufferedWriter bw=new BufferedWriter(new FileWriter("F:/WebContent.txt"));   //此处保存在本地的地址
        bw.write(con.getWebCon("http://www.baidu.com"));   //此处填写网址...
        bw.flush();
        bw.close();
}
}
class GetWebContent {
         public String getWebCon(String domain){
   
           StringBuffer sb  =  new StringBuffer();
            try{    
                java.net.URL url = new java.net.URL(domain);
                BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(),"GBK"));   //此处gbk是网页编码
                String line;
                while((line=in.readLine())!=null){
                    sb.append(line+‘\n‘);
                    System.out.println(line);
                }
               in.close();
            }catch(Exception e){//   Report   any   errors   that   arise  
                      sb.append(e.toString());
                  System.err.println(e);  
                  System.err.println("Usage:   java   HttpClient   <URL>   [<filename>]");  
            }    
            return sb.toString();                  
    }
}

  

<<< Java提取网页源码,布布扣,bubuko.com

<<< Java提取网页源码

标签:blog   http   java   os   io   line   

原文地址:http://www.cnblogs.com/7night/p/3820419.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!