--- # CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml # fo inline comments. settings: analysis: analyzer: my_search_analyzer: type: custom tokenizer: standard char_filter: - my_char_filter filter: - standard - lowercase my_index_analyzer: type: custom tokenizer: standard char_filter: - my_char_filter filter: - resolution - lowercase - my_ngram - word_delimit - trim_zero # For exact matching - simple lowercase + whitespace delimiter exact_analyzer: tokenizer: whitespace filter: - lowercase # For matching full words longer than the ngram limit (15 chars) my_fullword_index_analyzer: type: custom tokenizer: standard char_filter: - my_char_filter filter: - lowercase - word_delimit # Skip tokens shorter than N characters, # since they're already indexed in the main field - fullword_min filter: my_ngram: type: edgeNGram min_gram: 1 max_gram: 15 fullword_min: type: length # Remember to change this if you change the max_gram below! min: 16 resolution: type: pattern_capture patterns: ["(\\d+)[xX](\\d+)"] trim_zero: type: pattern_capture patterns: ["0*([0-9]*)"] word_delimit: type: word_delimiter preserve_original: true split_on_numerics: false char_filter: my_char_filter: type: mapping mappings: ["-=>_", "!=>_", "_=>\\u0020"] index: # we're running a single es node, so no sharding necessary, # plus replicas don't really help either. number_of_shards: 1 number_of_replicas : 0 mapper: # disable elasticsearch's "helpful" autoschema dynamic: false # since we disabled the _all field, default query the # name of the torrent. query: default_field: display_name mappings: torrent: # don't want everything concatenated _all: enabled: false properties: id: type: long display_name: # TODO could do a fancier tokenizer here to parse out the # the scene convention of stuff in brackets, plus stuff like k-on type: text analyzer: my_index_analyzer fielddata: true # Is this required? fields: # Multi-field for full-word matching (when going over ngram limits) # Note: will have to be queried for, not automatic fullword: type: text analyzer: my_fullword_index_analyzer # Stored for exact phrase matching exact: type: text analyzer: exact_analyzer created_time: type: date # Only in the ES index for generating magnet links info_hash: enabled: false filesize: type: long anonymous: type: boolean trusted: type: boolean remake: type: boolean complete: type: boolean hidden: type: boolean deleted: type: boolean has_torrent: type: boolean download_count: type: long leech_count: type: long seed_count: type: long comment_count: type: long # these ids are really only for filtering, thus keyword uploader_id: type: keyword main_category_id: type: keyword sub_category_id: type: keyword