受欢迎的博客标签

elasticsearch2.x+IK分词器+Java实现配置近义词功能教程

Published
http://blog.csdn.net/tianzhaixing2013/article/details/51506496 同义词配置 step 1 elasticserach.yml 最后一行添加: index.analysis.analyzer.default.type: ik step 2 在elasticsearch-2.3.1/config目录下面,存放synonyms.txt 其中,synonyms.txt 编码格式为’utf-8’,内容为: #Example: ipod, i-pod, i pod foozball , foosball universe , cosmos 西红柿, 番茄 马铃薯, 土豆 aa, bb 1 2 3 4 5 6 7 1 2 3 4 5 6 7 step 3 新建立索引类型设置 curl -XPUT localhost:9200/test/_mapping?pretty -d ' { "settings": { "index": { "analysis": { "analyzer": { "jt_cn": { "type": "custom", "use_smart": "true", "tokenizer": "ik_smart", "filter": ["jt_tfr","jt_sfr"], "char_filter": ["jt_cfr"] }, "ik_smart": { "type": "ik", "use_smart": "true" }, "ik_max_word": { "type": "ik", "use_smart": "false" } }, "filter": { "jt_tfr": { "type": "stop", "stopwords": [" "] }, "jt_sfr": { "type": "synonym", "synonyms_path": "synonyms.txt" } }, "char_filter": { "jt_cfr": { "type": "mapping", "mappings": [ "| => \\|" ] } } } } }, "mappings": { "solution": { "properties": { "title": { "include_in_all": true, "analyzer": "jt_cn", "term_vector": "with_positions_offsets", "boost": 8, "store": true, "type": "string" } } } } } ' 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 step 4 curl -XPUT localhost:9200/test/solution/1 -d ' { "title": "番茄" } ' curl -XPUT localhost:9200/test/solution/2 -d ' { "title": "西红柿" } ' 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 step 5 curl -XPOST 'localhost:9200/test/solution/_search?pretty' -d ' { "query": { "query_string": { "title": { "query": "西红柿", "analyzer": "jt_cn" } } }, "highlight": { "pre_tags": [ "<tag1>", "<tag2>" ], "post_tags": [ "</tag1>", "</tag2>" ], "fields": { "title": {} } } } ' 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 step 6 { "took": 3, "timed_out": false, "_shards": { "total": 5, "successful": 5, "failed": 0 }, "hits": { "total": 2, "max_score": 0.4500804, "hits": [ { "_index": "test", "_type": "solution", "_id": "1", "_score": 0.4500804, "_source": { "title": "西红柿" } }, { "_index": "test", "_type": "solution", "_id": "2", "_score": 0.36006433, "_source": { "title": "番茄" } } ] } } 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 动态更新同义词文件  .