标签:des blog http java color os
| 
       1 
      2 
      3 
      4 
      5 
      6 
      7 
      8 
      9 
      10 
      11 
      12 
      13 
      14 
      15 
      16 
      17 
      18 
      19 
      20 
      21 
      22 
      23 
      24 
      25 
      26 
      27 
      28 
      29 
      30 
      31 
      32 
      33 
      34 
      35 
      36 
      37 
      38 
      39 
      40 
      41 
      42 
      43 
      44 
      45 
      46 
      47 
      48 
      49 
      50 
      51 
      52 
      53 
      54 
      55 
      56 
      57 
      58 
      59 
      60 
      61 
      62 
      63 
      64 
      65 
      66 
      67 
      68 
      69 
      70 
      71 
      72 
      73 
      74 
      75 
      76 
      77 
      78 
      79 
      80 
      81 
      82 
      83 
      84 
      85 
      86 
      87 
      88 
      89 
      90 
      91 
      92 
      93 
      94 
      95 
      96 
      97 
      98 
      99 
      100 
      101 
      102 
      103 
      104 
      105 
      106 
      107 
      108 
      109 
      110 
      111 
      112 
      113 
      114 
      115 
      116 
      117 
      118 
      119 
      120 
      121 
      122 
      123 
      124 
      125 
      126 
      127 
      128 
      129 
      130 
      131 
      132 
      133 
      134 
      135 
      136 
      137 
      138 
      139 
      140 
      141 
      142 
      143 
      144 
      145 
      146 
      147 
      148 
      149 
      150 
      151 
      152 
      153 
      154 
      155 
      156 
      157 
      158 
      159 
      160 
      161 
      162 
      163 
      164 
      165 
      166 
      167 
      168 
      169 
      170 
      171 
      172 
      173 
      174 
      175 
      176 
      177 
      178 
      179 
      180 
      181 
      182 
      183 
      184 
      185 
      186 
      187 
      188 
      189 
      190 
      191 
      192 
      193 
      194 
      195 
      196 
      197 
      198 
      199 
      200 
      201 
      202 
      203 
      204 
      205 
      206 
      207 
      208 
      209 
      210 
      211 
      212 
      213 
      214 
      215 
      216 
      217 
      218 
      219 
      220 
      221 
      222 
      223 
      224 
      225 
      226 
      227 
      228 
      229 
      230 
      231 
      232 
      233 
      234 
      235 
      236 
      237 
      238 
      239 
      240 
      241 
      242 
      243 
      244 
      245 
      246 
      247 
      248 
      249 
      250 
      251 
      252 
      253 
      254 
      255 
      256 
      257 
      258 
      259 
      260 
      261 
      262 
      263 
      264 
      265 
      266 
      267 
      268 
      269 
      270 
      271 
      272 
      273 
      274 
      275 
      276 
      277 
      278 
      279 
      280 
      281 
      282 
      283 
      284 
      285 
      286 
      287 
      288 
      289 
      290 
      291 
      292 
      293 
      294 
      295 
      296 
      297 
      298 
      299 
      300 
      301 
      302 
      303 
      304 
      305 
      306 
      307 
      308 
      309 
      310 
      311 
      312 
      313 
      314 
      315 
      316 
      317 
      318 
      319 
      320 
      321 
      322 
      323 
      324 
      325 
      326 
      327 
      328 
      329 
      330 
      331 
      332 
      333 
      334 
      335 
      336 
      337 
      338 
      339 
      340 
      341 
      342 
      343 
      344 
      345 
      346 
      347 
      348 
      349 
      350 
      351 
      352 
      353 
      354 
      355 
      356 
      357 
      358 
      359 
      360 
      361 
      362 
      363 
      364 
      365 
      366 
      367 
      368 
      369 
      370 
      371 
      372 
      373 
      374 
      375 
      376 
      377 
      378 
      379 
      380 
      381 
      382 
      383 
      384 
      385 
      386 
      387 
      388 
      389 
      390 
      391 
      392 
      393 
      394 
      395 
      396 
      397 
      398 
      399 
      400 
      401 
      402 
      403 
      404 
      405 
      406 
      407 
      408 
      409 
      410 
      411 
      412 
      413 
      414 
      415 
      416 
      417 
      418 
      419  | 
    
      import java.io.File;import java.io.FileOutputStream;import java.io.InputStream;import java.io.OutputStream;import java.net.HttpURLConnection;import java.net.URL;import java.net.URLConnection;import java.text.SimpleDateFormat;import java.util.ArrayList;import 
java.util.Iterator;import 
java.util.List;import 
java.util.UUID;import 
java.util.regex.Matcher;import 
java.util.regex.Pattern;/*** * java抓取网络图片 *  * @author swinglife *  */public 
class CatchImage{    // 地址    // 编码    private 
static final String ECODING = "UTF-8";    // 获取img标签正则    private 
static final String IMGURL_REG = "<img.*src=(.*?)[^>]*?>";    // 获取src路径的正则    private 
static final String IMGSRC_REG = "http:\"?(.*?)(\"|>|\\s+)";    private 
static final String IMGDSRC_REG = "[\"\‘](http.+\\.(jpg|JPG|png|PNG|gif|GIF))[\"\‘]";    private 
static final String[] picstuffix = { "jpg", "JPG", "gif", "GIF", "png", "PNG" 
};    private 
static List<String> pList = new 
ArrayList<>();    public 
static void main(String[] args) throws 
Exception    {        CatchImage cm = new 
CatchImage();        // 获得html文本内容        String HTML = cm.getHTML(URL);        // System.out.println(HTML);        // 获取图片标签        List<String> imgUrl = cm.getImageUrl(HTML);         // 获取图片src地址         List<String> imgSrc = cm.getImageSrc(imgUrl);         // 下载图片         cm.Download(imgSrc, "E:\\Imagesave"+saveDiff());         //      cm.getImageSrc(HTML);        // cm.ThreadDownload(imgSrc, "E:\\Imagesave"+saveDiff() , 6);//      cm.TOThreadDownload(pList, "E:\\Imagesave" + saveDiff(), 6, 6000);    }    /***     * 获取HTML内容     *      * @param url     * @return     * @throws Exception     */    private 
String getHTML(String url) throws 
Exception    {        URL uri = new 
URL(url);        URLConnection connection = uri.openConnection();        InputStream in = connection.getInputStream();        byte[] buf = new 
byte[1024];        int 
length = 0;        StringBuffer sb = new 
StringBuffer();        while 
((length = in.read(buf, 0, buf.length)) > 0)        {            sb.append(new 
String(buf, ECODING));        }        in.close();        return 
sb.toString();    }    /***     * 获取ImageUrl地址     *      * @param HTML     * @return     */    private 
List<String> getImageUrl(String HTML)    {        Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);        List<String> listImgUrl = new 
ArrayList<String>();        while 
(matcher.find())        {            listImgUrl.add(matcher.group());        }        return 
listImgUrl;    }    /***     * 获取ImageSrc地址     *      * @param listImageUrl     * @return     */    private 
List<String> getImageSrc(List<String> listImageUrl)    {        List<String> listImgSrc = new 
ArrayList<String>();        for 
(String image : listImageUrl)        {            Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);            while 
(matcher.find())            {                listImgSrc.add(matcher.group().substring(0, matcher.group().length() - 1));            }        }        return 
listImgSrc;    }    /**     * 获取html里面的图片     *      * @param html     * @return     */    private 
void getImageSrc(String html)    {        Pattern p = Pattern.compile(IMGDSRC_REG);        Matcher m = p.matcher(html);        while 
(m.find())        {            getpicsrc(m.group(1));        }    }    /**     * 截取字符串里面的图片     *      * @param src     * @return     */    public 
void getpicsrc(String src)    {        if 
(src.contains("http:"))        {            String[] app = src.split("http:");            for 
(int i = 0; i < app.length; i++)            {                if 
(!isBlank(app[i]))                {                    for 
(int j = 0; j < picstuffix.length; j++)                    {                        if 
(app[i].contains("." 
+ picstuffix[j]))                        {                            int 
inum = app[i].indexOf(picstuffix[j]);                            String url = "http:" 
+ app[i].substring(0, inum) + picstuffix[j];                            pList.add(url);                        }                    }                }            }        }    }    /**     * 去处重复元素     *      * @param result     * @return     */    public 
static List<String> RemoveRepeated(List<String> result)    {        List<String> tmpArr = new 
ArrayList<String>();        for 
(int i = 0; i < result.size(); i++)        {            if 
(!tmpArr.contains(result.get(i)))            {                tmpArr.add((String) result.get(i));            }        }        return 
tmpArr;    }    /**     * 判断非空     *      * @param cs     * @return     */    public 
static boolean isBlank(CharSequence cs)    {        int 
strLen;        if 
(cs == null 
|| (strLen = cs.length()) == 0)        {            return 
true;        }        for 
(int i = 0; i < strLen; i++)        {            if 
(Character.isWhitespace(cs.charAt(i)) == false)            {                return 
false;            }        }        return 
true;    }    /***     * 单线程下载图片     *      * @param listImgSrc     */    private 
void Download(List<String> listImgSrc, String savedir)    {        for 
(String url : listImgSrc)        {            try            {                String imageName = url.substring(url.lastIndexOf("/") + 1, url.length());                URL uri = new 
URL(url);                InputStream in = uri.openStream();                FileOutputStream fo = new 
FileOutputStream(new 
File(savedir + imageName));                byte[] buf = new 
byte[1024];                int 
length = 0;                while 
((length = in.read(buf, 0, buf.length)) != -1)                {                    fo.write(buf, 0, length);                }                in.close();                fo.close();                System.out.println("*^_^*");            } catch 
(Exception e)            {                System.out.println("-_-!");            }        }    }    /**     * 多线程下载图片     *      * @param listImgSrc     * @param savedir     * @param tnum     */    private 
void ThreadDownload(List<String> listImgSrc, String savedir, int 
tnum)    {        for 
(int i = 0; i < listImgSrc.size(); i += tnum)        {            new 
DThread(savedir, tnum, listImgSrc, i).start();        }    }    /**     *      * 2014-4-3上午10:52:38 Describe: 多线程下载照片     *      * @author: ITWANG     */    class 
DThread extends 
Thread    {        private 
String savedir = null;        private 
int tnum;        private 
List<String> listImgSrc;        private 
int bunm;        public 
DThread(String savedir, int 
tnum, List<String> listImgSrc, int 
bnum)        {            this.savedir = savedir;            this.tnum = tnum;            this.listImgSrc = listImgSrc;            this.bunm = bnum;        }        @Override        public 
void run()        {            for 
(int i = 0; i < tnum; i++)            {                try                {                    String url = listImgSrc.get(bunm + i);                    String sps = url.substring(url.lastIndexOf("."), url.length());                    String imageName = UUID.randomUUID().toString() + sps;                    URL uri = new 
URL(url);                    InputStream in = uri.openStream();                    System.out.println(savedir + imageName);                    FileOutputStream fo = new 
FileOutputStream(new 
File(savedir + imageName));                    byte[] buf = new 
byte[1024];                    int 
length = 0;                    while 
((length = in.read(buf, 0, buf.length)) != -1)                    {                        fo.write(buf, 0, length);                    }                    in.close();                    fo.close();                    System.out.println("*^_^*");                } catch 
(Exception e)                {                    System.out.println("-_-!");                }            }        }    }    /**     * 多线程超时下载     *      * @param listImgSrc     * @param savedir     * @param tnum     * @param timeout     */    private 
void TOThreadDownload(List<String> listImgSrc, String savedir, int 
tnum, int 
timeout)    {        for 
(int i = 0; i < listImgSrc.size(); i += tnum)        {            new 
TODThread(savedir, tnum, listImgSrc, i, timeout).start();        }    }    /**     *      * 2014-4-3上午10:52:07 Describe: 超时方式下载照片线程     *      * @author: ITWANG     */    class 
TODThread extends 
Thread    {        private 
String savedir = null;        private 
int tnum;        private 
List<String> listImgSrc;        private 
int bunm;        private 
int timeout = 3000;        public 
TODThread(String savedir, int 
tnum, List<String> listImgSrc, int 
bnum, int 
timeout)        {            this.savedir = savedir;            this.tnum = tnum;            this.listImgSrc = listImgSrc;            this.bunm = bnum;            this.timeout = timeout;        }        @Override        public 
void run()        {            for 
(int i = 0; i < tnum; i++)            {                String url = listImgSrc.get(bunm + i);                String sps = url.substring(url.lastIndexOf("."), url.length());                String imageName = UUID.randomUUID().toString() + sps;                try                {                    if 
(getPic(url, savedir, imageName, timeout))                    {                        System.out.println("*^_^*");                    } else                    {                        System.out.println("-_-!");                    }                } catch 
(Exception e)                {                    System.out.println("下载异常");                }            }        }    }    /**     * GET方式下载照片     *      * @param purl     * @param folder     * @param filename     * @param timeout     * @return     * @throws Exception     */    public 
boolean getPic(String purl, String folder, String filename, int 
timeout) throws 
Exception    {        URL url = new 
URL(purl);        HttpURLConnection conn = (HttpURLConnection) url.openConnection();        conn.setConnectTimeout(timeout);        conn.setRequestMethod("GET");        conn.setDoOutput(true);        conn.setDoInput(true);        if 
(conn.getResponseCode() == 200)        {            InputStream is = conn.getInputStream();            byte[] bs = new 
byte[1024];            int 
len;            File sf = new 
File(folder);            if 
(!sf.exists())            {                sf.mkdirs();            }            OutputStream os = new 
FileOutputStream(sf.getPath() + "\\" 
+ filename);            while 
((len = is.read(bs)) != -1)            {                os.write(bs, 0, len);            }            os.close();            is.close();            return 
true;        }        return 
false;    }    /**     * 时间文件夹     *      * @return     */    public 
static String saveDiff()    {        SimpleDateFormat formate = new 
SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");        return 
"\\" + formate.format(System.currentTimeMillis()) + "\\";    }} | 
标签:des blog http java color os
原文地址:http://www.cnblogs.com/itwang/p/3699727.html