业务需要,学习使用搜索引擎

中文分词

找到了一个 HanLP 来分词,更新比较频繁,效果比 IK 要好。

我做了个镜像丢到了 Docker Hub https://hub.docker.com/repository/docker/xalanq/elasticsearch-hanlp

$ docker pull xalanq/elasticsearch-hanlp:7.5.1

配置可以这样

"analysis": {
    "analyzer": {
        "zh_Hans": {
            "tokenizer": "zh_Hans"
        },
        "zh_Hans_index": {
            "tokenizer": "zh_Hans_index"
        },
        "zh_Hant": {
            "tokenizer": "zh_Hant"
        },
        "zh_Hant_index": {
            "tokenizer": "zh_Hant_index"
        }
    },
    "tokenizer": {
        "zh_Hans": {
            "type": "hanlp",
            "enable_offset": false,
            "enable_normalization": true,
            "enable_stop_dictionary": true,
            "enable_part_of_speech_tagging": false,
            "enable_number_quantifier_recognize": true,
            "enable_traditional_chinese_mode": false,
            "enable_index_mode": false,
            "enable_custom_config": true
        },
        "zh_Hans_index": {
            "type": "hanlp_index",
            "enable_offset": false,
            "enable_normalization": true,
            "enable_stop_dictionary": true,
            "enable_part_of_speech_tagging": false,
            "enable_number_quantifier_recognize": true,
            "enable_traditional_chinese_mode": false,
            "enable_index_mode": true,
            "enable_custom_config": true
        },
        "zh_Hant": {
            "type": "hanlp",
            "enable_offset": false,
            "enable_normalization": true,
            "enable_stop_dictionary": true,
            "enable_part_of_speech_tagging": false,
            "enable_number_quantifier_recognize": true,
            "enable_traditional_chinese_mode": true,
            "enable_index_mode": false,
            "enable_custom_config": true
        },
        "zh_Hant_index": {
            "type": "hanlp_index",
            "enable_offset": false,
            "enable_normalization": true,
            "enable_stop_dictionary": true,
            "enable_part_of_speech_tagging": false,
            "enable_number_quantifier_recognize": true,
            "enable_traditional_chinese_mode": true,
            "enable_index_mode": true,
            "enable_custom_config": true
        }
    }
}

欢迎留言>_<

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据