2017-05-15 18:14:01 +00:00
|
|
|
---
|
|
|
|
# CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml
|
|
|
|
# fo inline comments.
|
|
|
|
settings:
|
|
|
|
analysis:
|
|
|
|
analyzer:
|
|
|
|
my_search_analyzer:
|
|
|
|
type: custom
|
|
|
|
tokenizer: standard
|
|
|
|
char_filter:
|
|
|
|
- my_char_filter
|
|
|
|
filter:
|
|
|
|
- standard
|
|
|
|
- lowercase
|
|
|
|
my_index_analyzer:
|
|
|
|
type: custom
|
|
|
|
tokenizer: standard
|
|
|
|
char_filter:
|
|
|
|
- my_char_filter
|
|
|
|
filter:
|
2017-05-18 08:58:08 +00:00
|
|
|
- resolution
|
2017-05-15 18:14:01 +00:00
|
|
|
- lowercase
|
|
|
|
- my_ngram
|
2017-05-25 06:19:08 +00:00
|
|
|
- word_delimit
|
2017-06-05 06:03:32 +00:00
|
|
|
- trim_zero
|
2018-04-14 00:06:25 +00:00
|
|
|
# For exact matching - simple lowercase + whitespace delimiter
|
|
|
|
exact_analyzer:
|
|
|
|
tokenizer: whitespace
|
|
|
|
filter:
|
|
|
|
- lowercase
|
2017-06-05 14:29:00 +00:00
|
|
|
# For matching full words longer than the ngram limit (15 chars)
|
|
|
|
my_fullword_index_analyzer:
|
|
|
|
type: custom
|
|
|
|
tokenizer: standard
|
|
|
|
char_filter:
|
|
|
|
- my_char_filter
|
|
|
|
filter:
|
|
|
|
- lowercase
|
|
|
|
- word_delimit
|
2018-04-14 00:06:25 +00:00
|
|
|
# Skip tokens shorter than N characters,
|
|
|
|
# since they're already indexed in the main field
|
|
|
|
- fullword_min
|
2017-06-05 14:29:00 +00:00
|
|
|
|
2017-05-15 18:14:01 +00:00
|
|
|
filter:
|
|
|
|
my_ngram:
|
|
|
|
type: edgeNGram
|
|
|
|
min_gram: 1
|
|
|
|
max_gram: 15
|
2018-04-14 00:06:25 +00:00
|
|
|
fullword_min:
|
|
|
|
type: length
|
|
|
|
# Remember to change this if you change the max_gram below!
|
|
|
|
min: 16
|
2017-05-18 08:58:08 +00:00
|
|
|
resolution:
|
|
|
|
type: pattern_capture
|
2017-05-25 06:19:08 +00:00
|
|
|
patterns: ["(\\d+)[xX](\\d+)"]
|
2017-06-05 06:03:32 +00:00
|
|
|
trim_zero:
|
|
|
|
type: pattern_capture
|
|
|
|
patterns: ["0*([0-9]*)"]
|
2017-05-25 06:19:08 +00:00
|
|
|
word_delimit:
|
|
|
|
type: word_delimiter
|
|
|
|
preserve_original: true
|
|
|
|
split_on_numerics: false
|
2017-05-15 18:14:01 +00:00
|
|
|
char_filter:
|
|
|
|
my_char_filter:
|
|
|
|
type: mapping
|
2017-05-18 08:58:08 +00:00
|
|
|
mappings: ["-=>_", "!=>_", "_=>\\u0020"]
|
2017-05-15 18:14:01 +00:00
|
|
|
index:
|
|
|
|
# we're running a single es node, so no sharding necessary,
|
|
|
|
# plus replicas don't really help either.
|
|
|
|
number_of_shards: 1
|
|
|
|
number_of_replicas : 0
|
|
|
|
mapper:
|
|
|
|
# disable elasticsearch's "helpful" autoschema
|
|
|
|
dynamic: false
|
|
|
|
# since we disabled the _all field, default query the
|
|
|
|
# name of the torrent.
|
|
|
|
query:
|
|
|
|
default_field: display_name
|
|
|
|
mappings:
|
|
|
|
torrent:
|
|
|
|
# don't want everything concatenated
|
|
|
|
_all:
|
|
|
|
enabled: false
|
|
|
|
properties:
|
|
|
|
id:
|
|
|
|
type: long
|
|
|
|
display_name:
|
|
|
|
# TODO could do a fancier tokenizer here to parse out the
|
|
|
|
# the scene convention of stuff in brackets, plus stuff like k-on
|
|
|
|
type: text
|
|
|
|
analyzer: my_index_analyzer
|
2017-06-05 14:29:00 +00:00
|
|
|
fielddata: true # Is this required?
|
|
|
|
fields:
|
|
|
|
# Multi-field for full-word matching (when going over ngram limits)
|
|
|
|
# Note: will have to be queried for, not automatic
|
|
|
|
fullword:
|
|
|
|
type: text
|
|
|
|
analyzer: my_fullword_index_analyzer
|
2018-04-14 00:06:25 +00:00
|
|
|
# Stored for exact phrase matching
|
|
|
|
exact:
|
|
|
|
type: text
|
|
|
|
analyzer: exact_analyzer
|
2017-05-15 18:14:01 +00:00
|
|
|
created_time:
|
|
|
|
type: date
|
|
|
|
# Only in the ES index for generating magnet links
|
|
|
|
info_hash:
|
|
|
|
enabled: false
|
|
|
|
filesize:
|
|
|
|
type: long
|
|
|
|
anonymous:
|
|
|
|
type: boolean
|
|
|
|
trusted:
|
|
|
|
type: boolean
|
|
|
|
remake:
|
|
|
|
type: boolean
|
|
|
|
complete:
|
|
|
|
type: boolean
|
|
|
|
hidden:
|
|
|
|
type: boolean
|
|
|
|
deleted:
|
|
|
|
type: boolean
|
|
|
|
has_torrent:
|
|
|
|
type: boolean
|
|
|
|
download_count:
|
|
|
|
type: long
|
|
|
|
leech_count:
|
|
|
|
type: long
|
|
|
|
seed_count:
|
|
|
|
type: long
|
2017-05-26 13:12:47 +00:00
|
|
|
comment_count:
|
|
|
|
type: long
|
2017-05-15 18:14:01 +00:00
|
|
|
# these ids are really only for filtering, thus keyword
|
|
|
|
uploader_id:
|
|
|
|
type: keyword
|
|
|
|
main_category_id:
|
|
|
|
type: keyword
|
|
|
|
sub_category_id:
|
2017-05-17 05:53:03 +00:00
|
|
|
type: keyword
|