受欢迎的博客标签

ElasticSearch使用completion实现自动补全功能

Published

文章目录

文章目录

最终效果

completion使用

Java实现搜索补全代码

热门搜索推荐


 

最终效果

实现的最终效果如下图百度搜索相似,输入词的时候返回提示。同时输入拼音也会有相同的提示效果。

completion使用

安装ik中文分词器:https://github.com/medcl/elasticsearch-analysis-ik

安装拼音分词器:https://github.com/medcl/elasticsearch-analysis-pinyin

定义关键词索引并自定义ik+pinyin分词器,要完成补全搜索,必须要用到特殊的数据类型completion,要汉字拼音都能补全,必须要使用自定义的ik+pinyin分词器。

  1.  
    PUT suggest
  2.  
    {
  3.  
    "settings": {
  4.  
    "number_of_replicas": 0,
  5.  
    "number_of_shards": 1,
  6.  
    "analysis": {
  7.  
    "analyzer": {
  8.  
    "ik_pinyin_analyzer": {
  9.  
    "type": "custom",
  10.  
    "tokenizer": "ik_max_word",
  11.  
    "filter": ["my_pinyin", "word_delimiter"]
  12.  
    }
  13.  
    },
  14.  
    "filter": {
  15.  
    "my_pinyin": {
  16.  
    "type": "pinyin",
  17.  
    "first letter": "prefix",
  18.  
    "padding_char": " "
  19.  
    }
  20.  
    }
  21.  
    }
  22.  
    },
  23.  
    "mappings": {
  24.  
    "suggest": {
  25.  
    "properties": {
  26.  
    "keyword": {
  27.  
    "type": "completion",
  28.  
    "analyzer": "ik_pinyin_analyzer",
  29.  
    "fields": {
  30.  
    "key": {
  31.  
    "type": "keyword"
  32.  
    }
  33.  
    }
  34.  
    },
  35.  
    "id": {
  36.  
    "type": "keyword"
  37.  
    },
  38.  
    "createDate": {
  39.  
    "type": "date",
  40.  
    "format": "yyyy-MM-dd HH:mm:ss"
  41.  
    }
  42.  
    }
  43.  
    }
  44.  
    }
  45.  
    }
 

初始化部分数据

  1.  
    POST _bulk/?refresh=true
  2.  
    { "index": { "_index": "suggest", "_type": "suggest" }}
  3.  
    { "keyword": "项目"}
  4.  
    { "index": { "_index": "suggest", "_type": "suggest" }}
  5.  
    { "keyword": "项目进度"}
  6.  
    { "index": { "_index": "suggest", "_type": "suggest" }}
  7.  
    { "keyword": "项目管理"}
  8.  
    { "index": { "_index": "suggest", "_type": "suggest" }}
  9.  
    { "keyword": "项目进度及调整 汇总.doc_文档"}
  10.  
    { "index": { "_index": "suggest", "_type": "suggest" }}
  11.  
    { "keyword": "项目"}
 

使用suggest获取搜索补全建议,并对同一词语去重。 

  1.  
    GET /suggest/_search
  2.  
    {
  3.  
    "suggest": {
  4.  
    "my-suggest": {
  5.  
    "prefix": "项目",
  6.  
    "completion": {
  7.  
    "field": "keyword",
  8.  
    "size": 20,
  9.  
    "skip_duplicates": true
  10.  
    }
  11.  
    }
  12.  
    }
  13.  
    }
 

Java实现搜索补全代码

  1.  
    /**
  2.  
    * 获取相关搜索,最多返回9条
  3.  
    * @param key
  4.  
    * @return
  5.  
    */
  6.  
    public JSONObject getSearchSuggest(String key) {
  7.  
    CompletionSuggestionBuilder suggestion = SuggestBuilders
  8.  
    .completionSuggestion("keyword").prefix(key).size(20).skipDuplicates(true);
  9.  
    SuggestBuilder suggestBuilder = new SuggestBuilder();
  10.  
    suggestBuilder.addSuggestion("suggest", suggestion);
  11.  
    SearchResponse response = template.suggest(suggestBuilder, EsConstants.SUGGEST);
  12.  
    Suggest suggest = response.getSuggest();
  13.  
     
  14.  
    Set<String> keywords = null;
  15.  
    if (suggest != null) {
  16.  
    keywords = new HashSet<>();
  17.  
    List<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> entries = suggest.getSuggestion("suggest").getEntries();
  18.  
     
  19.  
    for (Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option> entry: entries) {
  20.  
    for (Suggest.Suggestion.Entry.Option option: entry.getOptions()) {
  21.  
    /** 最多返回9个推荐,每个长度最大为20 */
  22.  
    String keyword = option.getText().string();
  23.  
    if (!StringUtils.isEmpty(keyword) && keyword.length() <= 20) {
  24.  
    /** 去除输入字段 */
  25.  
    if (keyword.equals(key)) continue;
  26.  
    keywords.add(keyword);
  27.  
    if (keywords.size() >= 9) {
  28.  
    break;
  29.  
    }
  30.  
    }
  31.  
    }
  32.  
    }
  33.  
    }
  34.  
    return ApiResult.OK(keywords, "获取推荐词组成功");
  35.  
    }
  36.  
     
 

热门搜索推荐

上面自定义的索引中,通过fields属性专门存储了类型为keyword的字段keyword.key。可以通过统计keyword.key来获取搜索次数最多的句子。下面是java实现方式

  1.  
    public JSONObject searchHot(Map<String, Object> map) {
  2.  
    Integer size = 10;
  3.  
    if (!StringUtils.isEmpty(map.get("size"))) {
  4.  
    size = (Integer)map.get("size");
  5.  
    }
  6.  
     
  7.  
    /** 获取最近一个月时间 */
  8.  
    String preMonth = LocalDateTime.now().minusMonths(1).format(EsConstants.fomatter);
  9.  
    String now = LocalDateTime.now().format(EsConstants.fomatter);
  10.  
     
  11.  
    /** 统计最近一个月的热门搜索,长度最大10,方便显示 */
  12.  
    SearchRequestBuilder requestBuilder = template.getClient().prepareSearch(EsConstants.SUGGEST)
  13.  
    .setQuery(QueryBuilders.rangeQuery("createDate").get(preMonth).lte(now));
  14.  
     
  15.  
    SearchResponse searchResponse = requestBuilder.addAggregation(AggregationBuilders
  16.  
    .terms("hotSearch").field("keyword.key").size(size)).execute().actionGet();
  17.  
     
  18.  
    Aggregations aggregations = searchResponse.getAggregations();
  19.  
    Set<String> keywords = null;
  20.  
    if (aggregations != null) {
  21.  
    keywords = new HashSet<>();
  22.  
    Terms hotSearch = aggregations.get("hotSearch");
  23.  
    List<? extends Terms.Bucket> buckets = hotSearch.getBuckets();
  24.  
    for (Terms.Bucket bucket: buckets) {
  25.  
    if (bucket.getKey().toString().length() <= 10) {
  26.  
    keywords.add((String)bucket.getKey());
  27.  
    }
  28.  
    }
  29.  
    }
  30.  
    return ApiResult.OK(keywords, "热门搜索获取成功");
  31.  
    }