1
0
Fork 0
mirror of https://gitlab.com/SIGBUS/nyaa.git synced 2024-12-22 07:50:01 +00:00

elasticsearch 7.x compatability (#576)

* es_mapping: update turning off dynamic mappings

they changed it in 6.x

https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic.html
https://github.com/elastic/elasticsearch/pull/25734

* es_mapping: remove _all field

deprecated in 6.0 anyway

* es_mapping.yml: fix deprecated mapping type

https://www.elastic.co/guide/en/elasticsearch/reference/6.7/removal-of-types.html#_schedule_for_removal_of_mapping_types

it gives a really unhelpful error otherwise, oof.

* es: fix remaining 7.xisms

the enabled: false apparently only applies to
"object" fields now, need index: false

and the _type got removed everywhere. Seems to work now.

* Fix weird offset error with word_delimiter_graph

yet another es7-ism i guess

* Fix warning and some app stuff for ES 7.x

Co-authored-by: Arylide <Arylide@users.noreply.github.com>
This commit is contained in:
queue 2020-07-12 01:10:47 -06:00 committed by GitHub
parent 72087ddaaf
commit 4fcef92b94
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 68 additions and 73 deletions

View file

@ -1,4 +1,5 @@
#!/usr/bin/env bash #!/usr/bin/env bash
set -e
# create indices named "nyaa" and "sukebei", these are hardcoded # create indices named "nyaa" and "sukebei", these are hardcoded
curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml

View file

@ -10,7 +10,6 @@ settings:
char_filter: char_filter:
- my_char_filter - my_char_filter
filter: filter:
- standard
- lowercase - lowercase
my_index_analyzer: my_index_analyzer:
type: custom type: custom
@ -52,7 +51,7 @@ settings:
filter: filter:
my_ngram: my_ngram:
type: edgeNGram type: edge_ngram
min_gram: 1 min_gram: 1
max_gram: 15 max_gram: 15
fullword_min: fullword_min:
@ -66,9 +65,13 @@ settings:
type: pattern_capture type: pattern_capture
patterns: ["0*([0-9]*)"] patterns: ["0*([0-9]*)"]
word_delimit: word_delimit:
type: word_delimiter type: word_delimiter_graph
preserve_original: true preserve_original: true
split_on_numerics: false split_on_numerics: false
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-graph-tokenfilter.html#word-delimiter-graph-tokenfilter-configure-parms
# since we're using "trim" filters downstream, otherwise
# you get weird lucene errors about startOffset
adjust_offsets: false
char_filter: char_filter:
my_char_filter: my_char_filter:
type: mapping type: mapping
@ -78,70 +81,65 @@ settings:
# plus replicas don't really help either. # plus replicas don't really help either.
number_of_shards: 1 number_of_shards: 1
number_of_replicas : 0 number_of_replicas : 0
mapper:
# disable elasticsearch's "helpful" autoschema
dynamic: false
# since we disabled the _all field, default query the
# name of the torrent.
query: query:
default_field: display_name default_field: display_name
mappings: mappings:
torrent: # disable elasticsearch's "helpful" autoschema
# don't want everything concatenated dynamic: false
_all: properties:
enabled: false id:
properties: type: long
id: display_name:
type: long # TODO could do a fancier tokenizer here to parse out the
display_name: # the scene convention of stuff in brackets, plus stuff like k-on
# TODO could do a fancier tokenizer here to parse out the type: text
# the scene convention of stuff in brackets, plus stuff like k-on analyzer: my_index_analyzer
type: text fielddata: true # Is this required?
analyzer: my_index_analyzer fields:
fielddata: true # Is this required? # Multi-field for full-word matching (when going over ngram limits)
fields: # Note: will have to be queried for, not automatic
# Multi-field for full-word matching (when going over ngram limits) fullword:
# Note: will have to be queried for, not automatic type: text
fullword: analyzer: my_fullword_index_analyzer
type: text # Stored for exact phrase matching
analyzer: my_fullword_index_analyzer exact:
# Stored for exact phrase matching type: text
exact: analyzer: exact_analyzer
type: text created_time:
analyzer: exact_analyzer type: date
created_time: #
type: date # Only in the ES index for generating magnet links
# Only in the ES index for generating magnet links info_hash:
info_hash: type: keyword
enabled: false index: false
filesize: filesize:
type: long type: long
anonymous: anonymous:
type: boolean type: boolean
trusted: trusted:
type: boolean type: boolean
remake: remake:
type: boolean type: boolean
complete: complete:
type: boolean type: boolean
hidden: hidden:
type: boolean type: boolean
deleted: deleted:
type: boolean type: boolean
has_torrent: has_torrent:
type: boolean type: boolean
download_count: download_count:
type: long type: long
leech_count: leech_count:
type: long type: long
seed_count: seed_count:
type: long type: long
comment_count: comment_count:
type: long type: long
# these ids are really only for filtering, thus keyword # these ids are really only for filtering, thus keyword
uploader_id: uploader_id:
type: keyword type: keyword
main_category_id: main_category_id:
type: keyword type: keyword
sub_category_id: sub_category_id:
type: keyword type: keyword

View file

@ -34,7 +34,6 @@ def pad_bytes(in_bytes, size):
def mk_es(t, index_name): def mk_es(t, index_name):
return { return {
"_id": t.id, "_id": t.id,
"_type": "torrent",
"_index": index_name, "_index": index_name,
"_source": { "_source": {
# we're also indexing the id as a number so you can # we're also indexing the id as a number so you can

View file

@ -17,7 +17,7 @@
{% endif %} {% endif %}
{% endif %} {% endif %}
{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %} {% if (use_elastic and torrent_query.hits.total.value > 0) or (torrent_query.items) %}
<div class="table-responsive"> <div class="table-responsive">
<table class="table table-bordered table-hover table-striped torrent-list"> <table class="table table-bordered table-hover table-striped torrent-list">
<thead> <thead>

View file

@ -167,7 +167,7 @@ def home(rss):
else: else:
rss_query_string = _generate_query_string( rss_query_string = _generate_query_string(
search_term, category, quality_filter, user_name) search_term, category, quality_filter, user_name)
max_results = min(max_search_results, query_results['hits']['total']) max_results = min(max_search_results, query_results['hits']['total']['value'])
# change p= argument to whatever you change page_parameter to or pagination breaks # change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=results_per_page, pagination = Pagination(p=query_args['page'], per_page=results_per_page,
total=max_results, bs_version=3, page_parameter='p', total=max_results, bs_version=3, page_parameter='p',

View file

@ -114,7 +114,6 @@ def reindex_torrent(t, index_name):
return { return {
'_op_type': 'update', '_op_type': 'update',
'_index': index_name, '_index': index_name,
'_type': 'torrent',
'_id': str(t['id']), '_id': str(t['id']),
"doc": doc, "doc": doc,
"doc_as_upsert": True "doc_as_upsert": True
@ -128,7 +127,6 @@ def reindex_stats(s, index_name):
return { return {
'_op_type': 'update', '_op_type': 'update',
'_index': index_name, '_index': index_name,
'_type': 'torrent',
'_id': str(s['torrent_id']), '_id': str(s['torrent_id']),
"doc": { "doc": {
"stats_last_updated": s["last_updated"], "stats_last_updated": s["last_updated"],
@ -141,7 +139,6 @@ def delet_this(row, index_name):
return { return {
"_op_type": 'delete', "_op_type": 'delete',
'_index': index_name, '_index': index_name,
'_type': 'torrent',
'_id': str(row['values']['id'])} '_id': str(row['values']['id'])}
# we could try to make this script robust to errors from es or mysql, but since # we could try to make this script robust to errors from es or mysql, but since