码迷,mamicode.com
首页 > 编程语言 > 详细

使用Spring Data ElasticSearch+Jsoup操作集群数据存储

时间:2020-05-04 23:06:28      阅读:113      评论:0      收藏:0      [点我收藏+]

标签:coding   sea   ati   形参   findall   apache   ESS   int   dom   

使用Spring Data ElasticSearch+Jsoup操作集群数据存储

1、使用Jsoup爬取京东商城的商品数据

1)获取商品名称、价格以及商品地址,并封装为一个Product对象,代码截图:

2)创建Product实体类,完成对索引、类型、映射以及文档的配置,代码截图:

3)将爬取到的商品对象存储到集群中,代码截图:

4)完成对商品信息的查询、分页、删除和更新操作,代码截图:

 

applicationContext.xml

 1 <?xml version="1.0" encoding="UTF-8"?>
 2 <beans xmlns="http://www.springframework.org/schema/beans"
 3        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context"
 4        xmlns:elasticsearch="http://www.springframework.org/schema/data/elasticsearch"
 5        xsi:schemaLocation="http://www.springframework.org/schema/beans
 6        http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd  http://www.springframework.org/schema/data/elasticsearch http://www.springframework.org/schema/data/elasticsearch/spring-elasticsearch.xsd">
 7     <!--开启包扫描-->
 8     <context:component-scan base-package="com.elasticsearch"/>
 9     <!--配置集群信息-->
10     <elasticsearch:transport-client id="esClient" cluster-name="my-cluster" cluster-nodes="127.0.0.1:9301,
11                     127.0.0.1:9302,127.0.0.1:9303"/>
12     <!--注入ESTemplate模板-->
13     <bean id="elasticsearchTemplate" class="org.springframework.data.elasticsearch.core.ElasticsearchTemplate">
14         <constructor-arg name="client" ref="esClient"/>
15     </bean>
16     <!--扫描Mapper(mybatis中直接操作数据),在对应的包下BeanMapper,Bean在ES中是类型(表)-->
17     <elasticsearch:repositories base-package="com.elasticsearch.mapper"/>
18 </beans>

pom.xml

  1 <?xml version="1.0" encoding="UTF-8"?>
  2 
  3 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5   <modelVersion>4.0.0</modelVersion>
  6 
  7   <groupId>com.elasticsearch</groupId>
  8   <artifactId>eshm0430</artifactId>
  9   <version>1.0-SNAPSHOT</version>
 10 
 11   <name>eshm0430</name>
 12   <!-- FIXME change it to the project‘s website -->
 13   <url>http://www.example.com</url>
 14 
 15   <properties>
 16     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 17     <maven.compiler.source>1.7</maven.compiler.source>
 18     <maven.compiler.target>1.7</maven.compiler.target>
 19   </properties>
 20 
 21   <dependencies>
 22     <dependency>
 23       <groupId>junit</groupId>
 24       <artifactId>junit</artifactId>
 25       <version>4.11</version>
 26       <scope>test</scope>
 27     </dependency>
 28 
 29 
 30     <dependency>
 31       <groupId>org.springframework.data</groupId>
 32       <artifactId>spring-data-elasticsearch</artifactId>
 33       <version>3.1.9.RELEASE</version>
 34       <exclusions>
 35         <exclusion>
 36           <groupId>org.elasticsearch.plugin</groupId>
 37           <artifactId>transport‐netty4‐client</artifactId>
 38         </exclusion>
 39       </exclusions>
 40     </dependency>
 41 
 42     <dependency>
 43       <groupId>org.springframework</groupId>
 44       <artifactId>spring-test</artifactId>
 45       <version>5.1.5.RELEASE</version>
 46       <scope>test</scope>
 47     </dependency>
 48     <dependency>
 49       <groupId>junit</groupId>
 50       <artifactId>junit</artifactId>
 51       <version>4.12</version>
 52       <scope>compile</scope>
 53     </dependency>
 54     <dependency>
 55       <groupId>org.springframework</groupId>
 56       <artifactId>spring-test</artifactId>
 57       <version>5.2.5.RELEASE</version>
 58       <scope>compile</scope>
 59     </dependency>
 60     <!--// jsoup-->
 61     <dependency>
 62       <groupId>org.jsoup</groupId>
 63       <artifactId>jsoup</artifactId>
 64       <version>1.11.3</version>
 65     </dependency>
 66   </dependencies>
 67 
 68   <build>
 69     <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
 70       <plugins>
 71         <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
 72         <plugin>
 73           <artifactId>maven-clean-plugin</artifactId>
 74           <version>3.1.0</version>
 75         </plugin>
 76         <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
 77         <plugin>
 78           <artifactId>maven-resources-plugin</artifactId>
 79           <version>3.0.2</version>
 80         </plugin>
 81         <plugin>
 82           <artifactId>maven-compiler-plugin</artifactId>
 83           <version>3.8.0</version>
 84         </plugin>
 85         <plugin>
 86           <artifactId>maven-surefire-plugin</artifactId>
 87           <version>2.22.1</version>
 88         </plugin>
 89         <plugin>
 90           <artifactId>maven-jar-plugin</artifactId>
 91           <version>3.0.2</version>
 92         </plugin>
 93         <plugin>
 94           <artifactId>maven-install-plugin</artifactId>
 95           <version>2.5.2</version>
 96         </plugin>
 97         <plugin>
 98           <artifactId>maven-deploy-plugin</artifactId>
 99           <version>2.8.2</version>
100         </plugin>
101         <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
102         <plugin>
103           <artifactId>maven-site-plugin</artifactId>
104           <version>3.7.1</version>
105         </plugin>
106         <plugin>
107           <artifactId>maven-project-info-reports-plugin</artifactId>
108           <version>3.0.0</version>
109         </plugin>
110       </plugins>
111     </pluginManagement>
112     <plugins>
113       <plugin>
114         <groupId>org.apache.maven.plugins</groupId>
115         <artifactId>maven-compiler-plugin</artifactId>
116         <configuration>
117           <source>8</source>
118           <target>8</target>
119         </configuration>
120       </plugin>
121     </plugins>
122   </build>
123 </project>

Product

 1 package com.elasticsearch.entity;
 2 import org.springframework.data.annotation.Id;
 3 import org.springframework.data.elasticsearch.annotations.Document;
 4 import org.springframework.data.elasticsearch.annotations.Field;
 5 import org.springframework.data.elasticsearch.annotations.FieldType;
 6 
 7 @Document(indexName = "my-index3", type = "Product")
 8 public class Product {
 9     @Id
10 
11     @Field(type = FieldType.Long,index = false,store = true)
12     private Long id;
13 
14     @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
15     private String pname;
16 
17     @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
18     private String pprice;
19 
20     @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
21     private String padress;
22 
23     public Long getId() {
24         return id;
25     }
26 
27     public void setId(Long id) {
28         this.id = id;
29     }
30 
31     public String getPname() {
32         return pname;
33     }
34 
35     public void setPname(String pname) {
36         this.pname = pname;
37     }
38 
39     public String getPprice() {
40         return pprice;
41     }
42 
43     public void setPprice(String pprice) {
44         this.pprice = pprice;
45     }
46 
47     public String getPadress() {
48         return padress;
49     }
50 
51     public void setPadress(String padress) {
52         this.padress = padress;
53     }
54 
55     @Override
56     public String toString() {
57         return "Product{" +
58                 "id=" + id +
59                 ", pname=‘" + pname + ‘\‘‘ +
60                 ", pprice=‘" + pprice + ‘\‘‘ +
61                 ", padress=‘" + padress + ‘\‘‘ +
62                 ‘}‘;
63     }
64 }

 

ProductMapper

 1 package com.elasticsearch.mapper;
 2 
 3 import com.elasticsearch.entity.Product;
 4 import org.springframework.data.domain.Pageable;
 5 import org.springframework.data.elasticsearch.repository.ElasticsearchCrudRepository;
 6 import org.springframework.stereotype.Repository;
 7 
 8 import java.util.List;
 9 
10 @Repository
11 public interface ProductMapper extends ElasticsearchCrudRepository <Product,Long> {
12 
13 
14     // 根据标题查询并分页
15     List<Product> findByPname(String pname , Pageable pageable);
16 
17 }

ProductService

 1 package com.elasticsearch.service;
 2 
 3 import com.elasticsearch.entity.Product;
 4 import org.springframework.data.domain.Pageable;
 5 
 6 import java.util.List;
 7 import java.util.Optional;
 8 
 9 
10 public interface ProductService {
11 
12     // 新增文档的方法
13     void save(Product product);
14 
15     //  根据文档查询商品信息
16     Optional<Product> findById(Long id);
17 
18     // 根据id删除
19     void deleteById(Long id);
20 
21     // 根据id更新文件
22     void updateById(Product product);
23 
24     // 根据标题查询并分页
25     List<Product> findByPname(String pname ,Pageable pageable);
26 
27 
28 }

ProductServiceImp

 1 package com.elasticsearch.service.Imp;
 2 
 3 import com.elasticsearch.entity.Product;
 4 import com.elasticsearch.mapper.ProductMapper;
 5 import com.elasticsearch.service.ProductService;
 6 import org.springframework.beans.factory.annotation.Autowired;
 7 import org.springframework.data.domain.Pageable;
 8 import org.springframework.stereotype.Service;
 9 
10 import java.util.List;
11 import java.util.Optional;
12 
13 @Service("ProductService")
14 public class ProductServiceImp implements ProductService {
15     @Autowired
16     private ProductMapper productMapper;
17 
18     @Override
19     public void save(Product product) {
20         productMapper.save(product);
21     }
22 
23     @Override
24     public Optional<Product> findById(Long id) {
25         return productMapper.findById(id);
26     }
27 
28     @Override
29     public void deleteById(Long id) {
30         productMapper.deleteById(id);
31     }
32 
33     @Override
34     public void updateById(Product product) {
35         productMapper.save(product);
36     }
37 
38     @Override
39     public List<Product> findByPname(String pname, Pageable pageable) {
40         return productMapper.findByPname(pname,pageable);
41     }
42 }

SpringDataESTest

  1 package com.elasticsearch;
  2 import com.elasticsearch.entity.Product;
  3 import com.elasticsearch.service.ProductService;
  4 import org.jsoup.Jsoup;
  5 import org.jsoup.nodes.Document;
  6 import org.jsoup.nodes.Element;
  7 import org.jsoup.select.Elements;
  8 import org.junit.Test;
  9 import org.junit.runner.RunWith;
 10 import org.springframework.beans.factory.annotation.Autowired;
 11 import org.springframework.data.domain.PageRequest;
 12 import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
 13 import org.springframework.test.context.ContextConfiguration;
 14 import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
 15 
 16 import java.io.IOException;
 17 import java.util.List;
 18 import java.util.Optional;
 19 
 20 @RunWith(SpringJUnit4ClassRunner.class)
 21 @ContextConfiguration(locations = "classpath:applicationContext.xml")
 22 public class SpringDataESTest {
 23 
 24     @Autowired
 25     private ElasticsearchTemplate elasticsearchTemplate;
 26 
 27     @Autowired
 28     private ProductService productService;
 29 
 30     @Test   //import org.junit.Test;   不要自己创建一个名称为Test类
 31     public void createIndex() {
 32         //创建空的索引库
 33         elasticsearchTemplate.createIndex(Product.class);
 34         //添加映射
 35         elasticsearchTemplate.putMapping(Product.class);
 36     }
 37 
 38     // 创建
 39     @Test
 40     public void createDocument(){
 41         Document doc = null;
 42         String url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&psort=3&click=0";
 43         // String url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&wq=%E6%89%8B%E6%9C%BA&pvid=4cbce742a5634b66996fa09045840c0e";
 44         try {
 45             doc = Jsoup.connect(url).get();
 46             //Element:页面中的所有 ul > li , li特点是  class = gl-item,使用类选择器
 47             Elements liLists = doc.select(".gl-item");
 48             long i=0;
 49             for (Element li : liLists) {
 50                 //分析 li  结构
 51                 //1)获取图片地址 class= p-img ,查找img标签,获取 img 的src 属性的值
 52                 //String pimgsrc = li.select(".p-img").select("img").attr("src");
 53                 //System.out.println(pimgsrc);
 54 
 55                 //2)获取商品价格: class = p-price ,查找  i 标签,获取 i 标签包含的内容 <i>12324</i>
 56                 String pprice = li.select(".p-price").select("i").text();
 57                 System.out.println(pprice);
 58 
 59                 //3)获取商品名称: class= p-name p-name-type-2,查找 em 标签,获取 em 标签的内容
 60                 String pname = li.select(".p-name").select(".p-name-type-2").select("em").text();
 61                 String pname2 = li.select("div[class=‘p-name p-name-type-2‘]").select("em").text();
 62 
 63                 System.out.println(pname);
 64                 System.out.println(pname2);
 65 
 66                 //4)获取商品地址
 67                 String padress = li.select(".p-img").select("a").attr("href");
 68                 System.out.println(padress);
 69                 i++;
 70                 Product product = new Product();
 71                 product.setId(i);
 72                 product.setPname(pname);
 73                 product.setPprice(pprice);
 74                 product.setPadress(padress);
 75 
 76                 productService.save(product);
 77             }
 78         } catch (IOException e) {
 79             e.printStackTrace();
 80         }
 81     }
 82 
 83     @Test
 84     public void getDocumentById(){
 85         Optional<Product> byId = productService.findById(1L);
 86         Product product = byId.get();
 87         System.out.println("根据id查询"+product);
 88 
 89     }
 90 
 91     // 根据id删除文件
 92     @Test
 93     public void deleteDocumentById(){
 94         productService.deleteById(30L);
 95 
 96     }
 97 
 98     @Test
 99     // 根据id更新文件
100     public void updateDocumentById(){
101         Product product = new Product();
102         product.setId(29L);
103         product.setPprice("2");
104         product.setPname("根据id更新的名字");
105         product.setPadress("更新的");
106         productService.updateById(product);
107         System.out.println("更新后的文件"+product);
108     }
109 
110     // 根据title查询 并且分页
111     @Test
112     public void getDocumentByPnameAndPage(){
113         List<Product> byPnameAndPage = productService.findByPname("华为", PageRequest.of(0, 10));
114         System.out.println(byPnameAndPage);
115     }
116 
117 
118 
119 
120     // 创建
121 //    @Test
122 //    public void createDocument(){
123 //        for (Long i = 1L;i <= 10L; i++){
124 //            // 批量创建Hello对象
125 //            Hello hello = new Hello();
126 //            hello.setId(i);
127 //            hello.setTitle("新增的title"+i);
128 //            hello.setContent("新增的content"+i);
129 //            helloService.save(hello);
130 //        }
131 //
132 //    }
133 //
134 //    // 根据id查询
135 //    @Test
136 //    public void getDocumentById(){
137 //        Optional<Hello> helloOptional = helloService.findById(1L);
138 //        Hello hello = helloOptional.get();
139 //        System.out.println("根据id查询hello:"+hello);
140 //    }
141 //
142 //    // 查询所有hello
143 //    @Test
144 //    public void getAllDocument(){
145 //        Iterable<Hello> all = helloService.findAll();
146 //
147 //        //方法一
148 ////        Iterator<Hello> iterator = all.iterator();// 10个
149 ////        while (iterator.next() != null){
150 ////            Hello hello = iterator.next();
151 ////            System.out.println("查询所有hello"+hello);
152 ////        }
153 //
154 //        // 方法二
155 //        // forEach(Consumer),Consumer接口通过@FunctionallInterface修饰
156 //        // 表示他是一个函数式
157 //        // 如果一个方法是形参是函数接口,传递形参时可以使用Lambda表达式,特点是使用箭头符号
158 //        // void accept(T t)
159 //        all.forEach(item-> System.out.println("查询所有hello"+item));
160 //
161 //
162 //    }
163 //
164 //    // 根据id更新
165 //    @Test
166 //    public void updateDocumentById(){
167 //        Hello hello = new Hello();
168 //        hello.setId(1L);
169 //        hello.setTitle("更新修改的title");
170 //        hello.setContent("更新修改的Content");
171 //        helloService.save(hello);
172 //        System.out.println("更新后的为"+hello);
173 //    }
174 //
175 //    // 根据id删除文档
176 //    @Test
177 //    public void deleteDocumentById(){
178 //        helloService.deleteById(10L);
179 //    }
180 //
181 //    // 删除所有文档
182 ////    @Test
183 ////    public void deleteAllDocument(){
184 ////        helloService.deleteAll();
185 ////    }
186 //
187 //    // 根据title查询
188 //    @Test
189 //    public void getDocumentByTitle(){
190 //        List<Hello> hs = helloService.findByTitle("新增");
191 //        System.out.println(hs);
192 //    }
193 //
194 //    @Test
195 //    public void getDocumentByTitleAndPage(){
196 //        List<Hello> hs = helloService.findByTitle("新增");
197 //        System.out.println(hs);
198 //
199 //        List<Hello> hs1 =  helloService.findByTitle("新增",  PageRequest.of(1,3));
200 //        System.out.println("---------"+hs1);
201 //    }
202 
203 
204 
205 }

 

 

1

 

 

 

 

 

//        System.out.println(hs);

使用Spring Data ElasticSearch+Jsoup操作集群数据存储

标签:coding   sea   ati   形参   findall   apache   ESS   int   dom   

原文地址:https://www.cnblogs.com/caoxinfang/p/12828662.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!