使用Spring Data ElasticSearch+Jsoup操作集群数据存储
阅读原文时间:2023年07月08日阅读:1

使用Spring Data ElasticSearch+Jsoup操作集群数据存储

1、使用Jsoup爬取京东商城的商品数据

1)获取商品名称、价格以及商品地址,并封装为一个Product对象,代码截图:

2)创建Product实体类,完成对索引、类型、映射以及文档的配置,代码截图:

3)将爬取到的商品对象存储到集群中,代码截图:

4)完成对商品信息的查询、分页、删除和更新操作,代码截图:

applicationContext.xml

1
2 http://www.springframework.org/schema/beans 6 http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd http://www.springframework.org/schema/data/elasticsearch http://www.springframework.org/schema/data/elasticsearch/spring-elasticsearch.xsd"> 7 8 9 10 12 13 14 15 16 17 18

pom.xml

1
2
3 http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 5 4.0.0 6 7 com.elasticsearch 8 eshm0430 9 1.0-SNAPSHOT 10 11 eshm0430 12 13 http://www.example.com 14 15 16 UTF-8 17 1.7 18 1.7 19 20 21 22 23 junit 24 junit 25 4.11 26 test 27 28 29 30 31 org.springframework.data 32 spring-data-elasticsearch 33 3.1.9.RELEASE 34 35 36 org.elasticsearch.plugin 37 transport‐netty4‐client 38 39 40 41 42 43 org.springframework 44 spring-test 45 5.1.5.RELEASE 46 test 47 48 49 junit 50 junit 51 4.12 52 compile 53 54 55 org.springframework 56 spring-test 57 5.2.5.RELEASE 58 compile 59 60 61 62 org.jsoup 63 jsoup 64 1.11.3 65 66 67 68 69 70 71 72 73 maven-clean-plugin 74 3.1.0 75 76 77 78 maven-resources-plugin 79 3.0.2 80 81 82 maven-compiler-plugin 83 3.8.0 84 85 86 maven-surefire-plugin 87 2.22.1 88 89 90 maven-jar-plugin 91 3.0.2 92 93 94 maven-install-plugin 95 2.5.2 96 97 98 maven-deploy-plugin 99 2.8.2 100 101 102 103 maven-site-plugin 104 3.7.1 105 106 107 maven-project-info-reports-plugin 108 3.0.0 109 110 111 112 113 114 org.apache.maven.plugins 115 maven-compiler-plugin 116 117 8 118 8 119 120 121 122 123

Product

1 package com.elasticsearch.entity;
2 import org.springframework.data.annotation.Id;
3 import org.springframework.data.elasticsearch.annotations.Document;
4 import org.springframework.data.elasticsearch.annotations.Field;
5 import org.springframework.data.elasticsearch.annotations.FieldType;
6
7 @Document(indexName = "my-index3", type = "Product")
8 public class Product {
9 @Id
10
11 @Field(type = FieldType.Long,index = false,store = true)
12 private Long id;
13
14 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
15 private String pname;
16
17 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
18 private String pprice;
19
20 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
21 private String padress;
22
23 public Long getId() {
24 return id;
25 }
26
27 public void setId(Long id) {
28 this.id = id;
29 }
30
31 public String getPname() {
32 return pname;
33 }
34
35 public void setPname(String pname) {
36 this.pname = pname;
37 }
38
39 public String getPprice() {
40 return pprice;
41 }
42
43 public void setPprice(String pprice) {
44 this.pprice = pprice;
45 }
46
47 public String getPadress() {
48 return padress;
49 }
50
51 public void setPadress(String padress) {
52 this.padress = padress;
53 }
54
55 @Override
56 public String toString() {
57 return "Product{" +
58 "id=" + id +
59 ", pname='" + pname + '\'' +
60 ", pprice='" + pprice + '\'' +
61 ", padress='" + padress + '\'' +
62 '}';
63 }
64 }

ProductMapper

1 package com.elasticsearch.mapper;
2
3 import com.elasticsearch.entity.Product;
4 import org.springframework.data.domain.Pageable;
5 import org.springframework.data.elasticsearch.repository.ElasticsearchCrudRepository;
6 import org.springframework.stereotype.Repository;
7
8 import java.util.List;
9
10 @Repository
11 public interface ProductMapper extends ElasticsearchCrudRepository {
12
13
14 // 根据标题查询并分页
15 List findByPname(String pname , Pageable pageable);
16
17 }

ProductService

1 package com.elasticsearch.service;
2
3 import com.elasticsearch.entity.Product;
4 import org.springframework.data.domain.Pageable;
5
6 import java.util.List;
7 import java.util.Optional;
8
9
10 public interface ProductService {
11
12 // 新增文档的方法
13 void save(Product product);
14
15 // 根据文档查询商品信息
16 Optional findById(Long id);
17
18 // 根据id删除
19 void deleteById(Long id);
20
21 // 根据id更新文件
22 void updateById(Product product);
23
24 // 根据标题查询并分页
25 List findByPname(String pname ,Pageable pageable);
26
27
28 }

ProductServiceImp

1 package com.elasticsearch.service.Imp;
2
3 import com.elasticsearch.entity.Product;
4 import com.elasticsearch.mapper.ProductMapper;
5 import com.elasticsearch.service.ProductService;
6 import org.springframework.beans.factory.annotation.Autowired;
7 import org.springframework.data.domain.Pageable;
8 import org.springframework.stereotype.Service;
9
10 import java.util.List;
11 import java.util.Optional;
12
13 @Service("ProductService")
14 public class ProductServiceImp implements ProductService {
15 @Autowired
16 private ProductMapper productMapper;
17
18 @Override
19 public void save(Product product) {
20 productMapper.save(product);
21 }
22
23 @Override
24 public Optional findById(Long id) {
25 return productMapper.findById(id);
26 }
27
28 @Override
29 public void deleteById(Long id) {
30 productMapper.deleteById(id);
31 }
32
33 @Override
34 public void updateById(Product product) {
35 productMapper.save(product);
36 }
37
38 @Override
39 public List findByPname(String pname, Pageable pageable) {
40 return productMapper.findByPname(pname,pageable);
41 }
42 }

SpringDataESTest

1 package com.elasticsearch;
2 import com.elasticsearch.entity.Product;
3 import com.elasticsearch.service.ProductService;
4 import org.jsoup.Jsoup;
5 import org.jsoup.nodes.Document;
6 import org.jsoup.nodes.Element;
7 import org.jsoup.select.Elements;
8 import org.junit.Test;
9 import org.junit.runner.RunWith;
10 import org.springframework.beans.factory.annotation.Autowired;
11 import org.springframework.data.domain.PageRequest;
12 import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
13 import org.springframework.test.context.ContextConfiguration;
14 import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
15
16 import java.io.IOException;
17 import java.util.List;
18 import java.util.Optional;
19
20 @RunWith(SpringJUnit4ClassRunner.class)
21 @ContextConfiguration(locations = "classpath:applicationContext.xml")
22 public class SpringDataESTest {
23
24 @Autowired
25 private ElasticsearchTemplate elasticsearchTemplate;
26
27 @Autowired
28 private ProductService productService;
29
30 @Test //import org.junit.Test; 不要自己创建一个名称为Test类
31 public void createIndex() {
32 //创建空的索引库
33 elasticsearchTemplate.createIndex(Product.class);
34 //添加映射
35 elasticsearchTemplate.putMapping(Product.class);
36 }
37
38 // 创建
39 @Test
40 public void createDocument(){
41 Document doc = null;
42 String url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&psort=3&click=0";
43 // String url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&wq=%E6%89%8B%E6%9C%BA&pvid=4cbce742a5634b66996fa09045840c0e";
44 try {
45 doc = Jsoup.connect(url).get();
46 //Element:页面中的所有 ul > li , li特点是 class = gl-item,使用类选择器
47 Elements liLists = doc.select(".gl-item");
48 long i=0;
49 for (Element li : liLists) {
50 //分析 li 结构
51 //1)获取图片地址 class= p-img ,查找img标签,获取 img 的src 属性的值
52 //String pimgsrc = li.select(".p-img").select("img").attr("src");
53 //System.out.println(pimgsrc);
54
55 //2)获取商品价格: class = p-price ,查找 i 标签,获取 i 标签包含的内容 12324
56 String pprice = li.select(".p-price").select("i").text();
57 System.out.println(pprice);
58
59 //3)获取商品名称: class= p-name p-name-type-2,查找 em 标签,获取 em 标签的内容
60 String pname = li.select(".p-name").select(".p-name-type-2").select("em").text();
61 String pname2 = li.select("div[class='p-name p-name-type-2']").select("em").text();
62
63 System.out.println(pname);
64 System.out.println(pname2);
65
66 //4)获取商品地址
67 String padress = li.select(".p-img").select("a").attr("href");
68 System.out.println(padress);
69 i++;
70 Product product = new Product();
71 product.setId(i);
72 product.setPname(pname);
73 product.setPprice(pprice);
74 product.setPadress(padress);
75
76 productService.save(product);
77 }
78 } catch (IOException e) {
79 e.printStackTrace();
80 }
81 }
82
83 @Test
84 public void getDocumentById(){
85 Optional byId = productService.findById(1L);
86 Product product = byId.get();
87 System.out.println("根据id查询"+product);
88
89 }
90
91 // 根据id删除文件
92 @Test
93 public void deleteDocumentById(){
94 productService.deleteById(30L);
95
96 }
97
98 @Test
99 // 根据id更新文件
100 public void updateDocumentById(){
101 Product product = new Product();
102 product.setId(29L);
103 product.setPprice("2");
104 product.setPname("根据id更新的名字");
105 product.setPadress("更新的");
106 productService.updateById(product);
107 System.out.println("更新后的文件"+product);
108 }
109
110 // 根据title查询 并且分页
111 @Test
112 public void getDocumentByPnameAndPage(){
113 List byPnameAndPage = productService.findByPname("华为", PageRequest.of(0, 10));
114 System.out.println(byPnameAndPage);
115 }
116
117
118
119
120 // 创建
121 // @Test
122 // public void createDocument(){
123 // for (Long i = 1L;i <= 10L; i++){ 124 // // 批量创建Hello对象 125 // Hello hello = new Hello(); 126 // hello.setId(i); 127 // hello.setTitle("新增的title"+i); 128 // hello.setContent("新增的content"+i); 129 // helloService.save(hello); 130 // } 131 // 132 // } 133 // 134 // // 根据id查询 135 // @Test 136 // public void getDocumentById(){ 137 // Optional helloOptional = helloService.findById(1L);
138 // Hello hello = helloOptional.get();
139 // System.out.println("根据id查询hello:"+hello);
140 // }
141 //
142 // // 查询所有hello
143 // @Test
144 // public void getAllDocument(){
145 // Iterable all = helloService.findAll();
146 //
147 // //方法一
148 //// Iterator iterator = all.iterator();// 10个
149 //// while (iterator.next() != null){
150 //// Hello hello = iterator.next();
151 //// System.out.println("查询所有hello"+hello);
152 //// }
153 //
154 // // 方法二
155 // // forEach(Consumer),Consumer接口通过@FunctionallInterface修饰
156 // // 表示他是一个函数式
157 // // 如果一个方法是形参是函数接口,传递形参时可以使用Lambda表达式,特点是使用箭头符号
158 // // void accept(T t)
159 // all.forEach(item-> System.out.println("查询所有hello"+item));
160 //
161 //
162 // }
163 //
164 // // 根据id更新
165 // @Test
166 // public void updateDocumentById(){
167 // Hello hello = new Hello();
168 // hello.setId(1L);
169 // hello.setTitle("更新修改的title");
170 // hello.setContent("更新修改的Content");
171 // helloService.save(hello);
172 // System.out.println("更新后的为"+hello);
173 // }
174 //
175 // // 根据id删除文档
176 // @Test
177 // public void deleteDocumentById(){
178 // helloService.deleteById(10L);
179 // }
180 //
181 // // 删除所有文档
182 //// @Test
183 //// public void deleteAllDocument(){
184 //// helloService.deleteAll();
185 //// }
186 //
187 // // 根据title查询
188 // @Test
189 // public void getDocumentByTitle(){
190 // List hs = helloService.findByTitle("新增");
191 // System.out.println(hs);
192 // }
193 //
194 // @Test
195 // public void getDocumentByTitleAndPage(){
196 // List hs = helloService.findByTitle("新增");
197 // System.out.println(hs);
198 //
199 // List hs1 = helloService.findByTitle("新增", PageRequest.of(1,3));
200 // System.out.println("---------"+hs1);
201 // }
202
203
204
205 }

1

// System.out.println(hs);