es之得分（加权）

阅读原文时间：2023年07月09日阅读：1

随着应用程序的增长，提高搜索质量的需求也进一步增大。我们把它叫做搜索体验。我们需要知道什么对用户更重要，关注用户如何使用搜索功能。这导致不同的结论，例如，有些文档比其他的更重要，或特定查询需强调一个字段而弱化其他字段。这就是可以用到加权的地方。

进一步说搜索体验，我们更希望检索出来的数据是最想得到的数据；

这个其实就是关于文档的【相关性得分】

进一步细节说：我们查询的所有文档，会在内部做一次相关性的评分_score;然后会根据这个_score从大到小的排序，依次展示给客户端；

如何计算评分？

Elasticsearch使用的计算评分公式TF-IDF算法的实用计算公式如下:
score(q,d) coord(q,d)queryNorm(q)(tf (tind)idf (t)2 boost(t)norm(t,d))

TF：词频，词在文档中出现的频度是多少？频度越高，权重越高

IDF：逆向文档率，词在集合所有文档里出现的频率是多少？频次越高，权重越低

在我们实际的工作中，我们经常会控制boost来调整score（boost默认值是1）

创建索引和映射：

1）：创建索引

@Test
public void createIndex(){
/**
* 创建索引
* */

client.admin().indices().prepareCreate("blog").get();
}

2）：创建映射

/**
* 创建映射
*/
@Test
public void testCreateIndexMapping_boost() throws Exception{
/**
* 格式：
* "mappings" : {
* "document" : {
* "dynamic" : "false",
* "properties" :{
* "id" : { "type" : "string" },
* "content" : { "type" : "string" },
* "comment" : {"type" : "string"},
* "author" : { "type" : "string" }
* }
* }
* }
*/
//构建json的数据格式，创建映射
XContentBuilder mappingBuilder = XContentFactory.jsonBuilder()
.startObject()
.startObject("document")
.startObject("properties")
.startObject("id").field("type","integer").field("store", "yes")
.endObject()
.startObject("title").field("type","string").field("store", "yes").field("analyzer" , "ik_max_word")
.endObject()
.startObject("content").field("type","string").field("store", "yes").field("analyzer" , "ik_max_word")
.endObject()
.startObject("comment").field("type","string").field("store", "yes").field("analyzer" , "ik_max_word")
.endObject()
.endObject()
.endObject()
.endObject();
PutMappingRequest request = Requests.putMappingRequest("blog")
.type("document")
.source(mappingBuilder);
client.admin().indices().putMapping(request).get();
}

3）：创建Document实体类

package com.elasticsearch.bean;

/**
* Created by angel；
*/
public class Document {
private Integer id;
private String title;
private String content;
private String comment;

public Integer getId() {
return id;
}

public String getComment() {
return comment;
}

public String getContent() {
return content;
}

public String getTitle() {
return title;
}

public void setComment(String comment) {
this.comment = comment;
}

public void setContent(String content) {
this.content = content;
}

public void setId(Integer id) {
this.id = id;
}

public void setTitle(String title) {
this.title = title;
}
}

4）：重新创建索引和映射，创建文档

/**
* 创建文档
* */
@Test
public void createDocument() throws JsonProcessingException {
Document document = new Document();

// document.setId(1);
// document.setTitle("搜索引擎服务器");
// document.setContent("基于restful的数据风格");
// document.setComment("我们学习Elasticsearch搜索引擎服务器");
//
// document.setId(2);
// document.setTitle("什么是Elasticsearch");
// document.setContent("Elasticsearch搜索引擎服务器");
// document.setComment("Elasticsearch封装了lucene");
//
document.setId(3);
document.setTitle("Elasticsearch的用途");
document.setContent("Elasticsearch可以用来进行海量数据的检索");
document.setComment("Elasticsearch真NB");

ObjectMapper objectMapper = new ObjectMapper();
String source = objectMapper.writeValueAsString(document);
System.out.println("source:"+source);

IndexResponse indexResponse = client.prepareIndex("blog", "document", document.getId().toString()).setSource(source).get();
// 获取响应的信息
System.out.println("索引名称："+indexResponse.getIndex());
System.out.println("文档类型："+indexResponse.getType());
System.out.println("ID："+indexResponse.getId());
System.out.println("版本："+indexResponse.getVersion());
System.out.println("是否创建成功："+indexResponse.status());
client.close();
}

5）：测试：

//TODO 如何让id2 在 id1前面
@Test
public void BoolQuery_boost(){
SearchResponse searchResponse = client.prepareSearch("blog").setTypes("document")
.setQuery(QueryBuilders.boolQuery()
.should(QueryBuilders.termQuery("title" , "搜索"))
.should(QueryBuilders.termQuery("content" , "搜索"))
.should(QueryBuilders.termQuery("comment" , "搜索"))

).get();
SearchHits hits = searchResponse.getHits();//获取数据的结果集对象，获取命中次数
// 显示数据
printSearch(hits);

}

public void printSearch(SearchHits hits){
System.out.println("查询的结果数量有"+hits.getTotalHits()+"条");
System.out.println("结果中最高分："+hits.getMaxScore());
// 遍历每条数据
Iterator iterator = hits.iterator();
while(iterator.hasNext()){
SearchHit searchHit = iterator.next();
System.out.println("所有的数据JSON的数据格式："+searchHit.getSourceAsString());
System.out.println("每条得分："+searchHit.getScore());
// 获取每个字段的数据
System.out.println("id:"+searchHit.getSource().get("id"));
System.out.println("title:"+searchHit.getSource().get("title"));
System.out.println("content:"+searchHit.getSource().get("content"));
System.out.println("**********************************************");
for(Iterator ite = searchHit.iterator(); ite.hasNext();){
SearchHitField next = ite.next();
System.out.println(next.getValues());
}
}
}

手机扫一扫

移动阅读更方便

你可能感兴趣的文章

Day11_基本搜索