mirror of
https://gitlab.com/SIGBUS/nyaa.git
synced 2024-12-22 10:50:07 +00:00
some more elasticsearch work, including index mapping and analyzer
This commit is contained in:
parent
32b9170a81
commit
c2c547e786
3
create_es.sh
Executable file
3
create_es.sh
Executable file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
curl -v -XPUT 'localhost:9200/nyaav2?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
|
91
es_mapping.yml
Normal file
91
es_mapping.yml
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
---
|
||||||
|
# CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml
|
||||||
|
# fo inline comments.
|
||||||
|
settings:
|
||||||
|
analysis:
|
||||||
|
analyzer:
|
||||||
|
my_search_analyzer:
|
||||||
|
type: custom
|
||||||
|
tokenizer: standard
|
||||||
|
char_filter:
|
||||||
|
- my_char_filter
|
||||||
|
filter:
|
||||||
|
- standard
|
||||||
|
- lowercase
|
||||||
|
my_index_analyzer:
|
||||||
|
type: custom
|
||||||
|
tokenizer: standard
|
||||||
|
char_filter:
|
||||||
|
- my_char_filter
|
||||||
|
filter:
|
||||||
|
- lowercase
|
||||||
|
- my_ngram
|
||||||
|
filter:
|
||||||
|
my_ngram:
|
||||||
|
type: edgeNGram
|
||||||
|
min_gram: 1
|
||||||
|
max_gram: 15
|
||||||
|
char_filter:
|
||||||
|
my_char_filter:
|
||||||
|
type: mapping
|
||||||
|
mappings: ["-=>_", "!=>_"]
|
||||||
|
index:
|
||||||
|
# we're running a single es node, so no sharding necessary,
|
||||||
|
# plus replicas don't really help either.
|
||||||
|
number_of_shards: 1
|
||||||
|
number_of_replicas : 0
|
||||||
|
mapper:
|
||||||
|
# disable elasticsearch's "helpful" autoschema
|
||||||
|
dynamic: false
|
||||||
|
# since we disabled the _all field, default query the
|
||||||
|
# name of the torrent.
|
||||||
|
query:
|
||||||
|
default_field: display_name
|
||||||
|
mappings:
|
||||||
|
torrent:
|
||||||
|
# don't want everything concatenated
|
||||||
|
_all:
|
||||||
|
enabled: false
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: long
|
||||||
|
display_name:
|
||||||
|
# TODO could do a fancier tokenizer here to parse out the
|
||||||
|
# the scene convention of stuff in brackets, plus stuff like k-on
|
||||||
|
type: text
|
||||||
|
analyzer: my_index_analyzer
|
||||||
|
fielddata: true
|
||||||
|
created_time:
|
||||||
|
type: date
|
||||||
|
# Only in the ES index for generating magnet links
|
||||||
|
info_hash:
|
||||||
|
enabled: false
|
||||||
|
filesize:
|
||||||
|
type: long
|
||||||
|
anonymous:
|
||||||
|
type: boolean
|
||||||
|
trusted:
|
||||||
|
type: boolean
|
||||||
|
remake:
|
||||||
|
type: boolean
|
||||||
|
complete:
|
||||||
|
type: boolean
|
||||||
|
hidden:
|
||||||
|
type: boolean
|
||||||
|
deleted:
|
||||||
|
type: boolean
|
||||||
|
has_torrent:
|
||||||
|
type: boolean
|
||||||
|
download_count:
|
||||||
|
type: long
|
||||||
|
leech_count:
|
||||||
|
type: long
|
||||||
|
seed_count:
|
||||||
|
type: long
|
||||||
|
# these ids are really only for filtering, thus keyword
|
||||||
|
uploader_id:
|
||||||
|
type: keyword
|
||||||
|
main_category_id:
|
||||||
|
type: keyword
|
||||||
|
sub_category_id:
|
||||||
|
type: keyword
|
|
@ -41,8 +41,6 @@ def mk_es(t):
|
||||||
"id": t.id,
|
"id": t.id,
|
||||||
"display_name": t.display_name,
|
"display_name": t.display_name,
|
||||||
"created_time": t.created_time,
|
"created_time": t.created_time,
|
||||||
"updated_time": t.updated_time,
|
|
||||||
"description": t.description,
|
|
||||||
# not analyzed but included so we can render magnet links
|
# not analyzed but included so we can render magnet links
|
||||||
# without querying sql again.
|
# without querying sql again.
|
||||||
"info_hash": t.info_hash.hex(),
|
"info_hash": t.info_hash.hex(),
|
||||||
|
@ -61,8 +59,7 @@ def mk_es(t):
|
||||||
"hidden": bool(t.hidden),
|
"hidden": bool(t.hidden),
|
||||||
"deleted": bool(t.deleted),
|
"deleted": bool(t.deleted),
|
||||||
"has_torrent": t.has_torrent,
|
"has_torrent": t.has_torrent,
|
||||||
# XXX last_updated isn't initialized
|
# Stats
|
||||||
"stats_last_updated": t.stats.last_updated or t.created_time,
|
|
||||||
"download_count": t.stats.download_count,
|
"download_count": t.stats.download_count,
|
||||||
"leech_count": t.stats.leech_count,
|
"leech_count": t.stats.leech_count,
|
||||||
"seed_count": t.stats.seed_count,
|
"seed_count": t.stats.seed_count,
|
||||||
|
|
|
@ -148,7 +148,7 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_
|
||||||
s = Search(using=es_client, index='nyaav2')
|
s = Search(using=es_client, index='nyaav2')
|
||||||
if term:
|
if term:
|
||||||
query = db.session.query(models.TorrentNameSearch)
|
query = db.session.query(models.TorrentNameSearch)
|
||||||
s = s.query("query_string", default_field="display_name", default_operator="AND", query=term)
|
s = s.query("simple_query_string", analyzer="my_search_analyzer", default_operator="AND", query=term)
|
||||||
else:
|
else:
|
||||||
query = models.Torrent.query
|
query = models.Torrent.query
|
||||||
|
|
||||||
|
|
|
@ -33,24 +33,6 @@ webassets==0.12.1
|
||||||
Werkzeug==0.12.1
|
Werkzeug==0.12.1
|
||||||
WTForms==2.1
|
WTForms==2.1
|
||||||
## The following requirements were added by pip freeze:
|
## The following requirements were added by pip freeze:
|
||||||
decorator==4.0.11
|
|
||||||
elasticsearch==5.3.0
|
elasticsearch==5.3.0
|
||||||
elasticsearch-dsl==5.2.0
|
elasticsearch-dsl==5.2.0
|
||||||
ipython==6.0.0
|
|
||||||
ipython-genutils==0.2.0
|
|
||||||
jedi==0.10.2
|
|
||||||
mysql-replication==0.13
|
|
||||||
pexpect==4.2.1
|
|
||||||
pickleshare==0.7.4
|
|
||||||
pkg-resources==0.0.0
|
|
||||||
progressbar2==3.20.0
|
progressbar2==3.20.0
|
||||||
prompt-toolkit==1.0.14
|
|
||||||
ptyprocess==0.5.1
|
|
||||||
Pygments==2.2.0
|
|
||||||
PyMySQL==0.7.11
|
|
||||||
python-dateutil==2.6.0
|
|
||||||
python-utils==2.1.0
|
|
||||||
simplegeneric==0.8.1
|
|
||||||
traitlets==4.3.2
|
|
||||||
urllib3==1.21.1
|
|
||||||
wcwidth==0.1.7
|
|
||||||
|
|
Loading…
Reference in a new issue