elasticsearch 7.x compatability (#576)

* es_mapping: update turning off dynamic mappings they changed it in 6.x https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic.html https://github.com/elastic/elasticsearch/pull/25734 * es_mapping: remove _all field deprecated in 6.0 anyway * es_mapping.yml: fix deprecated mapping type https://www.elastic.co/guide/en/elasticsearch/reference/6.7/removal-of-types.html#_schedule_for_removal_of_mapping_types it gives a really unhelpful error otherwise, oof. * es: fix remaining 7.xisms the enabled: false apparently only applies to "object" fields now, need index: false and the _type got removed everywhere. Seems to work now. * Fix weird offset error with word_delimiter_graph yet another es7-ism i guess * Fix warning and some app stuff for ES 7.x Co-authored-by: Arylide <Arylide@users.noreply.github.com>
2025-03-30 14:14:56 +00:00 · 2020-07-12 01:10:47 -06:00 · 2020-07-12 01:10:47 -06:00 · 4fcef92b94
parent 72087ddaaf
commit 4fcef92b94
6 changed files with 68 additions and 73 deletions
--- a/create_es.sh
+++ b/create_es.sh
@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+set -e

 # create indices named "nyaa" and "sukebei", these are hardcoded
 curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
--- a/es_mapping.yml
+++ b/es_mapping.yml
@ -10,7 +10,6 @@ settings:
        char_filter:
          - my_char_filter
        filter:
-          - standard
          - lowercase
      my_index_analyzer:
        type: custom
@ -52,7 +51,7 @@ settings:

    filter:
      my_ngram:
-        type: edgeNGram
+        type: edge_ngram
        min_gram: 1
        max_gram: 15
      fullword_min:
@ -66,9 +65,13 @@ settings:
        type: pattern_capture
        patterns: ["0*([0-9]*)"]
      word_delimit:
-        type: word_delimiter
+        type: word_delimiter_graph
        preserve_original: true
        split_on_numerics: false
+        # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-graph-tokenfilter.html#word-delimiter-graph-tokenfilter-configure-parms
+        # since we're using "trim" filters downstream, otherwise
+        # you get weird lucene errors about startOffset
+        adjust_offsets: false
    char_filter:
      my_char_filter:
        type: mapping
@ -78,18 +81,11 @@ settings:
    # plus replicas don't really help either.
    number_of_shards: 1
    number_of_replicas : 0
-    mapper:
-      # disable elasticsearch's "helpful" autoschema
-      dynamic: false
-    # since we disabled the _all field, default query the
-    # name of the torrent.
    query:
      default_field: display_name
 mappings:
-  torrent:
-    # don't want everything concatenated
-    _all:
-      enabled: false
+  # disable elasticsearch's "helpful" autoschema
+  dynamic: false
  properties:
    id:
      type: long
@ -111,9 +107,11 @@ mappings:
          analyzer: exact_analyzer
    created_time:
      type: date
+      #
    # Only in the ES index for generating magnet links
    info_hash:
-        enabled: false
+      type: keyword
+      index: false
    filesize:
      type: long
    anonymous:
--- a/import_to_es.py
+++ b/import_to_es.py
@ -34,7 +34,6 @@ def pad_bytes(in_bytes, size):
 def mk_es(t, index_name):
    return {
        "_id": t.id,
-        "_type": "torrent",
        "_index": index_name,
        "_source": {
            # we're also indexing the id as a number so you can
--- a/nyaa/templates/search_results.html
+++ b/nyaa/templates/search_results.html
@ -17,7 +17,7 @@
 {% endif %}
 {% endif %}

-{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
+{% if (use_elastic and torrent_query.hits.total.value > 0) or (torrent_query.items) %}
 <div class="table-responsive">
 	<table class="table table-bordered table-hover table-striped torrent-list">
 		<thead>
--- a/nyaa/views/main.py
+++ b/nyaa/views/main.py
@ -167,7 +167,7 @@ def home(rss):
        else:
            rss_query_string = _generate_query_string(
                search_term, category, quality_filter, user_name)
-            max_results = min(max_search_results, query_results['hits']['total'])
+            max_results = min(max_search_results, query_results['hits']['total']['value'])
            # change p= argument to whatever you change page_parameter to or pagination breaks
            pagination = Pagination(p=query_args['page'], per_page=results_per_page,
                                    total=max_results, bs_version=3, page_parameter='p',
--- a/sync_es.py
+++ b/sync_es.py
@ -114,7 +114,6 @@ def reindex_torrent(t, index_name):
    return {
        '_op_type': 'update',
        '_index': index_name,
-        '_type': 'torrent',
        '_id': str(t['id']),
        "doc": doc,
        "doc_as_upsert": True
@ -128,7 +127,6 @@ def reindex_stats(s, index_name):
    return {
        '_op_type': 'update',
        '_index': index_name,
-        '_type': 'torrent',
        '_id': str(s['torrent_id']),
        "doc": {
            "stats_last_updated": s["last_updated"],
@ -141,7 +139,6 @@ def delet_this(row, index_name):
    return {
        "_op_type": 'delete',
        '_index': index_name,
-        '_type': 'torrent',
        '_id': str(row['values']['id'])}

 # we could try to make this script robust to errors from es or mysql, but since