mirror of
https://gitlab.com/SIGBUS/nyaa.git
synced 2025-01-26 06:55:14 +00:00
elasticsearch 7.x compatability (#576)
* es_mapping: update turning off dynamic mappings they changed it in 6.x https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic.html https://github.com/elastic/elasticsearch/pull/25734 * es_mapping: remove _all field deprecated in 6.0 anyway * es_mapping.yml: fix deprecated mapping type https://www.elastic.co/guide/en/elasticsearch/reference/6.7/removal-of-types.html#_schedule_for_removal_of_mapping_types it gives a really unhelpful error otherwise, oof. * es: fix remaining 7.xisms the enabled: false apparently only applies to "object" fields now, need index: false and the _type got removed everywhere. Seems to work now. * Fix weird offset error with word_delimiter_graph yet another es7-ism i guess * Fix warning and some app stuff for ES 7.x Co-authored-by: Arylide <Arylide@users.noreply.github.com>
This commit is contained in:
parent
72087ddaaf
commit
4fcef92b94
|
@ -1,4 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
# create indices named "nyaa" and "sukebei", these are hardcoded
|
||||
curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
|
||||
|
|
132
es_mapping.yml
132
es_mapping.yml
|
@ -10,7 +10,6 @@ settings:
|
|||
char_filter:
|
||||
- my_char_filter
|
||||
filter:
|
||||
- standard
|
||||
- lowercase
|
||||
my_index_analyzer:
|
||||
type: custom
|
||||
|
@ -52,7 +51,7 @@ settings:
|
|||
|
||||
filter:
|
||||
my_ngram:
|
||||
type: edgeNGram
|
||||
type: edge_ngram
|
||||
min_gram: 1
|
||||
max_gram: 15
|
||||
fullword_min:
|
||||
|
@ -66,9 +65,13 @@ settings:
|
|||
type: pattern_capture
|
||||
patterns: ["0*([0-9]*)"]
|
||||
word_delimit:
|
||||
type: word_delimiter
|
||||
type: word_delimiter_graph
|
||||
preserve_original: true
|
||||
split_on_numerics: false
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-graph-tokenfilter.html#word-delimiter-graph-tokenfilter-configure-parms
|
||||
# since we're using "trim" filters downstream, otherwise
|
||||
# you get weird lucene errors about startOffset
|
||||
adjust_offsets: false
|
||||
char_filter:
|
||||
my_char_filter:
|
||||
type: mapping
|
||||
|
@ -78,70 +81,65 @@ settings:
|
|||
# plus replicas don't really help either.
|
||||
number_of_shards: 1
|
||||
number_of_replicas : 0
|
||||
mapper:
|
||||
# disable elasticsearch's "helpful" autoschema
|
||||
dynamic: false
|
||||
# since we disabled the _all field, default query the
|
||||
# name of the torrent.
|
||||
query:
|
||||
default_field: display_name
|
||||
mappings:
|
||||
torrent:
|
||||
# don't want everything concatenated
|
||||
_all:
|
||||
enabled: false
|
||||
properties:
|
||||
id:
|
||||
type: long
|
||||
display_name:
|
||||
# TODO could do a fancier tokenizer here to parse out the
|
||||
# the scene convention of stuff in brackets, plus stuff like k-on
|
||||
type: text
|
||||
analyzer: my_index_analyzer
|
||||
fielddata: true # Is this required?
|
||||
fields:
|
||||
# Multi-field for full-word matching (when going over ngram limits)
|
||||
# Note: will have to be queried for, not automatic
|
||||
fullword:
|
||||
type: text
|
||||
analyzer: my_fullword_index_analyzer
|
||||
# Stored for exact phrase matching
|
||||
exact:
|
||||
type: text
|
||||
analyzer: exact_analyzer
|
||||
created_time:
|
||||
type: date
|
||||
# Only in the ES index for generating magnet links
|
||||
info_hash:
|
||||
enabled: false
|
||||
filesize:
|
||||
type: long
|
||||
anonymous:
|
||||
type: boolean
|
||||
trusted:
|
||||
type: boolean
|
||||
remake:
|
||||
type: boolean
|
||||
complete:
|
||||
type: boolean
|
||||
hidden:
|
||||
type: boolean
|
||||
deleted:
|
||||
type: boolean
|
||||
has_torrent:
|
||||
type: boolean
|
||||
download_count:
|
||||
type: long
|
||||
leech_count:
|
||||
type: long
|
||||
seed_count:
|
||||
type: long
|
||||
comment_count:
|
||||
type: long
|
||||
# these ids are really only for filtering, thus keyword
|
||||
uploader_id:
|
||||
type: keyword
|
||||
main_category_id:
|
||||
type: keyword
|
||||
sub_category_id:
|
||||
type: keyword
|
||||
# disable elasticsearch's "helpful" autoschema
|
||||
dynamic: false
|
||||
properties:
|
||||
id:
|
||||
type: long
|
||||
display_name:
|
||||
# TODO could do a fancier tokenizer here to parse out the
|
||||
# the scene convention of stuff in brackets, plus stuff like k-on
|
||||
type: text
|
||||
analyzer: my_index_analyzer
|
||||
fielddata: true # Is this required?
|
||||
fields:
|
||||
# Multi-field for full-word matching (when going over ngram limits)
|
||||
# Note: will have to be queried for, not automatic
|
||||
fullword:
|
||||
type: text
|
||||
analyzer: my_fullword_index_analyzer
|
||||
# Stored for exact phrase matching
|
||||
exact:
|
||||
type: text
|
||||
analyzer: exact_analyzer
|
||||
created_time:
|
||||
type: date
|
||||
#
|
||||
# Only in the ES index for generating magnet links
|
||||
info_hash:
|
||||
type: keyword
|
||||
index: false
|
||||
filesize:
|
||||
type: long
|
||||
anonymous:
|
||||
type: boolean
|
||||
trusted:
|
||||
type: boolean
|
||||
remake:
|
||||
type: boolean
|
||||
complete:
|
||||
type: boolean
|
||||
hidden:
|
||||
type: boolean
|
||||
deleted:
|
||||
type: boolean
|
||||
has_torrent:
|
||||
type: boolean
|
||||
download_count:
|
||||
type: long
|
||||
leech_count:
|
||||
type: long
|
||||
seed_count:
|
||||
type: long
|
||||
comment_count:
|
||||
type: long
|
||||
# these ids are really only for filtering, thus keyword
|
||||
uploader_id:
|
||||
type: keyword
|
||||
main_category_id:
|
||||
type: keyword
|
||||
sub_category_id:
|
||||
type: keyword
|
||||
|
|
|
@ -34,7 +34,6 @@ def pad_bytes(in_bytes, size):
|
|||
def mk_es(t, index_name):
|
||||
return {
|
||||
"_id": t.id,
|
||||
"_type": "torrent",
|
||||
"_index": index_name,
|
||||
"_source": {
|
||||
# we're also indexing the id as a number so you can
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
|
||||
{% if (use_elastic and torrent_query.hits.total.value > 0) or (torrent_query.items) %}
|
||||
<div class="table-responsive">
|
||||
<table class="table table-bordered table-hover table-striped torrent-list">
|
||||
<thead>
|
||||
|
|
|
@ -167,7 +167,7 @@ def home(rss):
|
|||
else:
|
||||
rss_query_string = _generate_query_string(
|
||||
search_term, category, quality_filter, user_name)
|
||||
max_results = min(max_search_results, query_results['hits']['total'])
|
||||
max_results = min(max_search_results, query_results['hits']['total']['value'])
|
||||
# change p= argument to whatever you change page_parameter to or pagination breaks
|
||||
pagination = Pagination(p=query_args['page'], per_page=results_per_page,
|
||||
total=max_results, bs_version=3, page_parameter='p',
|
||||
|
|
|
@ -114,7 +114,6 @@ def reindex_torrent(t, index_name):
|
|||
return {
|
||||
'_op_type': 'update',
|
||||
'_index': index_name,
|
||||
'_type': 'torrent',
|
||||
'_id': str(t['id']),
|
||||
"doc": doc,
|
||||
"doc_as_upsert": True
|
||||
|
@ -128,7 +127,6 @@ def reindex_stats(s, index_name):
|
|||
return {
|
||||
'_op_type': 'update',
|
||||
'_index': index_name,
|
||||
'_type': 'torrent',
|
||||
'_id': str(s['torrent_id']),
|
||||
"doc": {
|
||||
"stats_last_updated": s["last_updated"],
|
||||
|
@ -141,7 +139,6 @@ def delet_this(row, index_name):
|
|||
return {
|
||||
"_op_type": 'delete',
|
||||
'_index': index_name,
|
||||
'_type': 'torrent',
|
||||
'_id': str(row['values']['id'])}
|
||||
|
||||
# we could try to make this script robust to errors from es or mysql, but since
|
||||
|
|
Loading…
Reference in a new issue