标签:lan 使用 插入数据 list header row tab ref util
使用之前单应用的队列仓库存储抓取的url存在以下两个弊端:
我们本节实现以下功能:Redis实现高低优先级队列
在https://mvnrepository.com/搜索出redis的jar包依赖
123456
<!-- https://mvnrepository.com/artifact/redis.clients/jedis --><dependency>  <groupId>redis.clients</groupId>  <artifactId>jedis</artifactId>  <version>2.9.0</version></dependency>
实现向redis中插入数据,提取数据的工具类
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
   /** * 操作redis数据库的工具类 * @author yxm * @date 2018/11/15 23:04:20 */public class RedisUtil {	/**	 * redis中高优先级	 */	public static String highkey = "spider.highlevel";	/**	 * redis中低优先级	 */	public static String lowkey = "spider.lowlevel";	JedisPool jedisPool = null;	public RedisUtil(){		JedisPoolConfig poolConfig = new JedisPoolConfig();		poolConfig.setMaxId 大专栏  java-爬虫-14-采用Redis创建url仓库,实现分布式爬虫le(10);		poolConfig.setMaxTotal(100);		poolConfig.setMaxWaitMillis(10000);		poolConfig.setTestOnBorrow(true);		jedisPool = new JedisPool(poolConfig, "192.168.20.129", 6379);	}       	/**	 * 查询	 * @param key	 * @param start	 * @param end	 * @return	 */	public List<String> lrange(String key,int start,int end){		Jedis resource = jedisPool.getResource();				List<String> list = resource.lrange(key, start, end);		jedisPool.returnResourceObject(resource);		return list;			}       	/**	 * 添加	 * @param Key	 * @param url	 */	public void add(String Key, String url) {		Jedis resource = jedisPool.getResource();		resource.lpush(Key, url);		jedisPool.returnResourceObject(resource);	}       	/**	 * 获取	 * @param key	 * @return	 */	public String poll(String key) {		Jedis resource = jedisPool.getResource();		String result = resource.rpop(key);		jedisPool.returnResourceObject(resource);		return result;	}		public static void main(String[] args) {		RedisUtil redisUtil = new RedisUtil();		String url = "http://tv.youku.com/search/index/_page40177_1_cmodid_40177";		redisUtil.add(highkey, url);	}}
 
  
123456789101112131415161718192021222324
   /** * Redis url仓库实现类 * @author yxm * @date 2018/11/15 23:22:10 */public class RedisRepositoryService implements IRepositoryService {    RedisUtil reidsUtil =  new RedisUtil();    @Override    public String poll() {        String url = reidsUtil.poll(RedisUtil.highkey);        if(StringUtils.isBlank(url)){            url = reidsUtil.poll(RedisUtil.lowkey);        }        return url;    }    @Override    public void addHighLevel(String url) {        reidsUtil.add(RedisUtil.highkey,url);    }    @Override    public void addLowLevel(String url) {        reidsUtil.add(RedisUtil.lowkey,url);    }}

java-爬虫-14-采用Redis创建url仓库,实现分布式爬虫
标签:lan 使用 插入数据 list header row tab ref util
原文地址:https://www.cnblogs.com/dajunjun/p/11711043.html