some more elasticsearch work, including index mapping and analyzer

This commit is contained in:
aldacron 2017-05-15 11:14:01 -07:00
parent 32b9170a81
commit c2c547e786
5 changed files with 97 additions and 24 deletions

3
create_es.sh Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
curl -v -XPUT 'localhost:9200/nyaav2?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml

91
es_mapping.yml Normal file
View File

@ -0,0 +1,91 @@
---
# CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml
# fo inline comments.
settings:
analysis:
analyzer:
my_search_analyzer:
type: custom
tokenizer: standard
char_filter:
- my_char_filter
filter:
- standard
- lowercase
my_index_analyzer:
type: custom
tokenizer: standard
char_filter:
- my_char_filter
filter:
- lowercase
- my_ngram
filter:
my_ngram:
type: edgeNGram
min_gram: 1
max_gram: 15
char_filter:
my_char_filter:
type: mapping
mappings: ["-=>_", "!=>_"]
index:
# we're running a single es node, so no sharding necessary,
# plus replicas don't really help either.
number_of_shards: 1
number_of_replicas : 0
mapper:
# disable elasticsearch's "helpful" autoschema
dynamic: false
# since we disabled the _all field, default query the
# name of the torrent.
query:
default_field: display_name
mappings:
torrent:
# don't want everything concatenated
_all:
enabled: false
properties:
id:
type: long
display_name:
# TODO could do a fancier tokenizer here to parse out the
# the scene convention of stuff in brackets, plus stuff like k-on
type: text
analyzer: my_index_analyzer
fielddata: true
created_time:
type: date
# Only in the ES index for generating magnet links
info_hash:
enabled: false
filesize:
type: long
anonymous:
type: boolean
trusted:
type: boolean
remake:
type: boolean
complete:
type: boolean
hidden:
type: boolean
deleted:
type: boolean
has_torrent:
type: boolean
download_count:
type: long
leech_count:
type: long
seed_count:
type: long
# these ids are really only for filtering, thus keyword
uploader_id:
type: keyword
main_category_id:
type: keyword
sub_category_id:
type: keyword

View File

@ -41,8 +41,6 @@ def mk_es(t):
"id": t.id,
"display_name": t.display_name,
"created_time": t.created_time,
"updated_time": t.updated_time,
"description": t.description,
# not analyzed but included so we can render magnet links
# without querying sql again.
"info_hash": t.info_hash.hex(),
@ -61,8 +59,7 @@ def mk_es(t):
"hidden": bool(t.hidden),
"deleted": bool(t.deleted),
"has_torrent": t.has_torrent,
# XXX last_updated isn't initialized
"stats_last_updated": t.stats.last_updated or t.created_time,
# Stats
"download_count": t.stats.download_count,
"leech_count": t.stats.leech_count,
"seed_count": t.stats.seed_count,

View File

@ -148,7 +148,7 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_
s = Search(using=es_client, index='nyaav2')
if term:
query = db.session.query(models.TorrentNameSearch)
s = s.query("query_string", default_field="display_name", default_operator="AND", query=term)
s = s.query("simple_query_string", analyzer="my_search_analyzer", default_operator="AND", query=term)
else:
query = models.Torrent.query

View File

@ -33,24 +33,6 @@ webassets==0.12.1
Werkzeug==0.12.1
WTForms==2.1
## The following requirements were added by pip freeze:
decorator==4.0.11
elasticsearch==5.3.0
elasticsearch-dsl==5.2.0
ipython==6.0.0
ipython-genutils==0.2.0
jedi==0.10.2
mysql-replication==0.13
pexpect==4.2.1
pickleshare==0.7.4
pkg-resources==0.0.0
progressbar2==3.20.0
prompt-toolkit==1.0.14
ptyprocess==0.5.1
Pygments==2.2.0
PyMySQL==0.7.11
python-dateutil==2.6.0
python-utils==2.1.0
simplegeneric==0.8.1
traitlets==4.3.2
urllib3==1.21.1
wcwidth==0.1.7
progressbar2==3.20.0