From bc1901baa5a2b7d08de470a4a80413862fefadf1 Mon Sep 17 00:00:00 2001 From: Anna-Maria Meriniemi Date: Fri, 8 Jun 2018 10:59:19 +0300 Subject: [PATCH] ES: implement real substring matching (#500) ...by splitting input into characters, instead of whitespace delimited words. This means you can now match partial words, real substrings from anywhere: "foo ba" will match "Foo Bar Baz", while previously you had to have full words ("foo bar") to match anything. My dev setup incurred an 8% increase in storage usage, from ~13MB to ~14MB (for ~40k torrents). Small change, big improvement. Wonder why I didn't do this at first. --- es_mapping.yml | 11 +++++++++-- nyaa/templates/help.html | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/es_mapping.yml b/es_mapping.yml index 1086fb2..14983d5 100644 --- a/es_mapping.yml +++ b/es_mapping.yml @@ -24,9 +24,9 @@ settings: - my_ngram - trim_zero - unique - # For exact matching - simple lowercase + whitespace delimiter + # For exact matching - separate each character for substring matching + lowercase exact_analyzer: - tokenizer: whitespace + tokenizer: exact_tokenizer filter: - lowercase # For matching full words longer than the ngram limit (15 chars) @@ -43,6 +43,13 @@ settings: - fullword_min - unique + tokenizer: + # Splits input into characters, for exact substring matching + exact_tokenizer: + type: pattern + pattern: "(.)" + group: 1 + filter: my_ngram: type: edgeNGram diff --git a/nyaa/templates/help.html b/nyaa/templates/help.html index f7de873..47cbbf5 100644 --- a/nyaa/templates/help.html +++ b/nyaa/templates/help.html @@ -46,7 +46,7 @@ name, but not those which have bar in the name as well.
- If you want to search for a several-word expression in its entirety, you can + If you want to search for a several-word expression (substring) in its entirety, you can surround searches with " (double quotes), such as "foo bar", which would match torrents named foo bar but not those named bar foo. You may also use the aforementioned | to group