From f31af836d990d08645ab48d462ad502a9f821750 Mon Sep 17 00:00:00 2001 From: TheAMM Date: Fri, 13 Apr 2018 14:37:01 +0300 Subject: [PATCH] Optimize Elasticsearch fullword field Since the main display_name field ngrams words up to 15 characters, anything to and under that will already be indexed - the fullword field (which we have for words longer than 15 characters) needs to index only words longer than that. --- es_mapping.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/es_mapping.yml b/es_mapping.yml index 4d4e39b..6001305 100644 --- a/es_mapping.yml +++ b/es_mapping.yml @@ -32,13 +32,19 @@ settings: filter: - lowercase - word_delimit - # These should be enough, as my_index_analyzer will match the rest + # Skip tokens shorter than N characters, + # since they're already indexed in the main field + - fullword_min filter: my_ngram: type: edgeNGram min_gram: 1 max_gram: 15 + fullword_min: + type: length + # Remember to change this if you change the max_gram below! + min: 16 resolution: type: pattern_capture patterns: ["(\\d+)[xX](\\d+)"]