mirror of
https://gitlab.com/SIGBUS/nyaa.git
synced 2024-12-22 19:30:00 +00:00
Extend ES term preprocessing for OR groups
Implements handling "foo"|"bar" literal OR groups in the Elasticsearch term preprocessor. Groups can be negated with -, but don't mesh with precedence (like plain literals). This is a partial hack, the real solution would be to parse the entire search terms ourselves, with AND and OR groups, negations etc. But having that work neatly with the simple_query_string would be bit of a hassle.
This commit is contained in:
parent
0b78428abc
commit
87502978c3
|
@ -70,7 +70,18 @@ def _generate_query_string(term, category, filter, user):
|
||||||
|
|
||||||
|
|
||||||
# For preprocessing ES search terms in _parse_es_search_terms
|
# For preprocessing ES search terms in _parse_es_search_terms
|
||||||
QUOTED_LITERAL_REGEX = re.compile(r'(?i)(-)?"(.*?)"')
|
QUOTED_LITERAL_REGEX = re.compile(r'(?i)(-)?"(.+?)"')
|
||||||
|
QUOTED_LITERAL_GROUP_REGEX = re.compile(r'''
|
||||||
|
(?i)
|
||||||
|
(-)? # Negate entire group at once
|
||||||
|
(
|
||||||
|
".+?" # First literal
|
||||||
|
(?:
|
||||||
|
\| # OR
|
||||||
|
".+?" # Second literal
|
||||||
|
)+ # repeating
|
||||||
|
)
|
||||||
|
''', re.X)
|
||||||
|
|
||||||
|
|
||||||
def _es_name_exact_phrase(literal):
|
def _es_name_exact_phrase(literal):
|
||||||
|
@ -98,7 +109,30 @@ def _parse_es_search_terms(search, search_terms):
|
||||||
must_set = set()
|
must_set = set()
|
||||||
must_not_set = set()
|
must_not_set = set()
|
||||||
|
|
||||||
def literal_matcher(match):
|
must_or_groups = []
|
||||||
|
must_not_or_groups = []
|
||||||
|
|
||||||
|
def must_group_matcher(match):
|
||||||
|
''' Grabs [-]"foo"|"bar"[|"baz"...] groups from the search terms '''
|
||||||
|
negated = bool(match.group(1))
|
||||||
|
literal_group = match.group(2)
|
||||||
|
|
||||||
|
literals = QUOTED_LITERAL_REGEX.findall(literal_group)
|
||||||
|
group_query = Q(
|
||||||
|
'bool',
|
||||||
|
should=[_es_name_exact_phrase(lit_m[1]) for lit_m in literals]
|
||||||
|
)
|
||||||
|
|
||||||
|
if negated:
|
||||||
|
must_not_or_groups.append(group_query)
|
||||||
|
else:
|
||||||
|
must_or_groups.append(group_query)
|
||||||
|
|
||||||
|
# Remove the parsed group from search terms
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def must_matcher(match):
|
||||||
|
''' Grabs [-]"foo" literals from the search terms '''
|
||||||
negated = bool(match.group(1))
|
negated = bool(match.group(1))
|
||||||
literal = match.group(2)
|
literal = match.group(2)
|
||||||
|
|
||||||
|
@ -111,11 +145,12 @@ def _parse_es_search_terms(search, search_terms):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
# Remove quoted parts (optionally prepended with -) and store them in the sets
|
# Remove quoted parts (optionally prepended with -) and store them in the sets
|
||||||
parsed_search_terms = QUOTED_LITERAL_REGEX.sub(literal_matcher, search_terms).strip()
|
parsed_search_terms = QUOTED_LITERAL_GROUP_REGEX.sub(must_group_matcher, search_terms).strip()
|
||||||
|
parsed_search_terms = QUOTED_LITERAL_REGEX.sub(must_matcher, parsed_search_terms).strip()
|
||||||
|
|
||||||
# Create phrase matches (if any)
|
# Create phrase matches (if any)
|
||||||
must_queries = [_es_name_exact_phrase(lit) for lit in must_set]
|
must_queries = [_es_name_exact_phrase(lit) for lit in must_set] + must_or_groups
|
||||||
must_not_queries = [_es_name_exact_phrase(lit) for lit in must_not_set]
|
must_not_queries = [_es_name_exact_phrase(lit) for lit in must_not_set] + must_not_or_groups
|
||||||
|
|
||||||
if parsed_search_terms:
|
if parsed_search_terms:
|
||||||
# Normal text search without the quoted parts
|
# Normal text search without the quoted parts
|
||||||
|
|
Loading…
Reference in a new issue