From 2d0cf7cbb42442de02022c2a453f3c5ceee52a8e Mon Sep 17 00:00:00 2001 From: TheAMM Date: Mon, 5 Jun 2017 17:29:00 +0300 Subject: [PATCH] [ES Schema change] Multi-field search display_name to match words over ngram limit This fixes searching for "Machiavellianism", 16 chars ("Machiavellianis", 15 chars, worked previously). Does not (seem to!) break anything, but requires a re-indexing of ES. --- es_mapping.yml | 19 ++++++++++++++++++- nyaa/search.py | 2 ++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/es_mapping.yml b/es_mapping.yml index 036db5b..4d4e39b 100644 --- a/es_mapping.yml +++ b/es_mapping.yml @@ -23,6 +23,17 @@ settings: - my_ngram - word_delimit - trim_zero + # For matching full words longer than the ngram limit (15 chars) + my_fullword_index_analyzer: + type: custom + tokenizer: standard + char_filter: + - my_char_filter + filter: + - lowercase + - word_delimit + # These should be enough, as my_index_analyzer will match the rest + filter: my_ngram: type: edgeNGram @@ -67,7 +78,13 @@ mappings: # the scene convention of stuff in brackets, plus stuff like k-on type: text analyzer: my_index_analyzer - fielddata: true + fielddata: true # Is this required? + fields: + # Multi-field for full-word matching (when going over ngram limits) + # Note: will have to be queried for, not automatic + fullword: + type: text + analyzer: my_fullword_index_analyzer created_time: type: date # Only in the ES index for generating magnet links diff --git a/nyaa/search.py b/nyaa/search.py index 83b9ebd..2614e24 100644 --- a/nyaa/search.py +++ b/nyaa/search.py @@ -109,6 +109,8 @@ def search_elastic(term='', user=None, sort='id', order='desc', # Apply search term if term: s = s.query('simple_query_string', + # Query both fields, latter for words with >15 chars + fields=['display_name', 'display_name.fullword'], analyzer='my_search_analyzer', default_operator="AND", query=term)