2020-04-19 框架 Elasticsearch,笔记 1,887
业务需要,学习使用搜索引擎
中文分词
找到了一个 HanLP 来分词,更新比较频繁,效果比 IK 要好。
我做了个镜像丢到了 Docker Hub https://hub.docker.com/repository/docker/xalanq/elasticsearch-hanlp
$ docker pull xalanq/elasticsearch-hanlp:7.5.1
配置可以这样
"analysis": {
"analyzer": {
"zh_Hans": {
"tokenizer": "zh_Hans"
},
"zh_Hans_index": {
"tokenizer": "zh_Hans_index"
},
"zh_Hant": {
"tokenizer": "zh_Hant"
},
"zh_Hant_index": {
"tokenizer": "zh_Hant_index"
}
},
"tokenizer": {
"zh_Hans": {
"type": "hanlp",
"enable_offset": false,
"enable_normalization": true,
"enable_stop_dictionary": true,
"enable_part_of_speech_tagging": false,
"enable_number_quantifier_recognize": true,
"enable_traditional_chinese_mode": false,
"enable_index_mode": false,
"enable_custom_config": true
},
"zh_Hans_index": {
"type": "hanlp_index",
"enable_offset": false,
"enable_normalization": true,
"enable_stop_dictionary": true,
"enable_part_of_speech_tagging": false,
"enable_number_quantifier_recognize": true,
"enable_traditional_chinese_mode": false,
"enable_index_mode": true,
"enable_custom_config": true
},
"zh_Hant": {
"type": "hanlp",
"enable_offset": false,
"enable_normalization": true,
"enable_stop_dictionary": true,
"enable_part_of_speech_tagging": false,
"enable_number_quantifier_recognize": true,
"enable_traditional_chinese_mode": true,
"enable_index_mode": false,
"enable_custom_config": true
},
"zh_Hant_index": {
"type": "hanlp_index",
"enable_offset": false,
"enable_normalization": true,
"enable_stop_dictionary": true,
"enable_part_of_speech_tagging": false,
"enable_number_quantifier_recognize": true,
"enable_traditional_chinese_mode": true,
"enable_index_mode": true,
"enable_custom_config": true
}
}
}