mirror of
https://gitlab.com/SIGBUS/nyaa.git
synced 2024-12-22 05:10:00 +00:00
merged elasticsearch, let's hope this doesn't break shit
This commit is contained in:
commit
00c768c722
38
README.md
38
README.md
|
@ -44,5 +44,43 @@
|
||||||
- Start the dev server with `python run.py`
|
- Start the dev server with `python run.py`
|
||||||
- Deactivate `source deactivate`
|
- Deactivate `source deactivate`
|
||||||
|
|
||||||
|
# Enabling ElasticSearch
|
||||||
|
|
||||||
|
## Basics
|
||||||
|
- Install jdk `sudo apt-get install openjdk-8-jdk`
|
||||||
|
- Install elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html
|
||||||
|
- `sudo systemctl enable elasticsearch.service`
|
||||||
|
- `sudo systemctl start elasticsearch.service`
|
||||||
|
- Run `curl -XGET 'localhost:9200'` and make sure ES is running
|
||||||
|
- Optional: install Kabana as a search frontend for ES
|
||||||
|
|
||||||
|
## Enable MySQL Binlogging
|
||||||
|
- Add the `[mariadb]` bin-log section to my.cnf and reload mysql server
|
||||||
|
- Connect to mysql
|
||||||
|
- `SHOW VARIABLES LIKE 'binlog_format';`
|
||||||
|
- Make sure it shows ROW
|
||||||
|
- Connect to root user
|
||||||
|
- `GRANT REPLICATION SLAVE ON *.* TO 'test'@'localhost';` where test is the user you will be running `sync_es.py` with
|
||||||
|
|
||||||
|
## Setting up ES
|
||||||
|
- Run `./create_es.sh` and this creates two indicies: `nyaa` and `sukebei`
|
||||||
|
- The output should show `akncolwedged: true` twice
|
||||||
|
- The safest bet is to disable the webapp here to ensure there's no database writes
|
||||||
|
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `nyaa`
|
||||||
|
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `sukebei`
|
||||||
|
- These will take some time to run as it's indexing
|
||||||
|
|
||||||
|
## Setting up sync_es.py
|
||||||
|
- Sync_es.py keeps the ElasticSearch index updated by reading the BinLog
|
||||||
|
- Configure the MySQL options with the user where you granted the REPLICATION permissions
|
||||||
|
- Connect to MySQL, run `SHOW MASTER STATUS;`.
|
||||||
|
- Copy the output to `/var/lib/sync_es_position.json` with the contents `{"log_file": "FILE", "log_pos": POSITION}` and replace FILENAME with File (something like master1-bin.000002) in the SQL output and POSITION (something like 892528513) with Position
|
||||||
|
- Set up `sync_es.py` as a service and run it, preferably as the system/root
|
||||||
|
- Make sure `sync_es.py` runs within venv with the right dependencies
|
||||||
|
|
||||||
|
## Good to go!
|
||||||
|
- After that, enable the `USE_ELASTIC_SEARCH` flag and restart the webapp and you're good to go
|
||||||
|
|
||||||
|
|
||||||
## Code Quality:
|
## Code Quality:
|
||||||
- Remember to follow PEP8 style guidelines and run `./lint.sh` before committing.
|
- Remember to follow PEP8 style guidelines and run `./lint.sh` before committing.
|
||||||
|
|
|
@ -33,8 +33,6 @@ MAIL_FROM_ADDRESS = '***'
|
||||||
SMTP_USERNAME = '***'
|
SMTP_USERNAME = '***'
|
||||||
SMTP_PASSWORD = '***'
|
SMTP_PASSWORD = '***'
|
||||||
|
|
||||||
RESULTS_PER_PAGE = 75
|
|
||||||
|
|
||||||
# What the site identifies itself as.
|
# What the site identifies itself as.
|
||||||
SITE_NAME = 'Nyaa'
|
SITE_NAME = 'Nyaa'
|
||||||
|
|
||||||
|
@ -49,3 +47,14 @@ ENFORCE_MAIN_ANNOUNCE_URL = False
|
||||||
MAIN_ANNOUNCE_URL = ''
|
MAIN_ANNOUNCE_URL = ''
|
||||||
|
|
||||||
BACKUP_TORRENT_FOLDER = 'torrents'
|
BACKUP_TORRENT_FOLDER = 'torrents'
|
||||||
|
|
||||||
|
#
|
||||||
|
# Search Options
|
||||||
|
#
|
||||||
|
# Max ES search results, do not set over 10000
|
||||||
|
RESULTS_PER_PAGE = 75
|
||||||
|
|
||||||
|
USE_ELASTIC_SEARCH = False
|
||||||
|
ENABLE_ELASTIC_SEARCH_HIGHLIGHT = False
|
||||||
|
ES_MAX_SEARCH_RESULT = 1000
|
||||||
|
ES_INDEX_NAME = SITE_FLAVOR # we create indicies named nyaa or sukebei
|
5
create_es.sh
Executable file
5
create_es.sh
Executable file
|
@ -0,0 +1,5 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# create indicies named "nyaa" and "sukebei", these are hardcoded
|
||||||
|
curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
|
||||||
|
curl -v -XPUT 'localhost:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
|
91
es_mapping.yml
Normal file
91
es_mapping.yml
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
---
|
||||||
|
# CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml
|
||||||
|
# fo inline comments.
|
||||||
|
settings:
|
||||||
|
analysis:
|
||||||
|
analyzer:
|
||||||
|
my_search_analyzer:
|
||||||
|
type: custom
|
||||||
|
tokenizer: standard
|
||||||
|
char_filter:
|
||||||
|
- my_char_filter
|
||||||
|
filter:
|
||||||
|
- standard
|
||||||
|
- lowercase
|
||||||
|
my_index_analyzer:
|
||||||
|
type: custom
|
||||||
|
tokenizer: standard
|
||||||
|
char_filter:
|
||||||
|
- my_char_filter
|
||||||
|
filter:
|
||||||
|
- lowercase
|
||||||
|
- my_ngram
|
||||||
|
filter:
|
||||||
|
my_ngram:
|
||||||
|
type: edgeNGram
|
||||||
|
min_gram: 1
|
||||||
|
max_gram: 15
|
||||||
|
char_filter:
|
||||||
|
my_char_filter:
|
||||||
|
type: mapping
|
||||||
|
mappings: ["-=>_", "!=>_"]
|
||||||
|
index:
|
||||||
|
# we're running a single es node, so no sharding necessary,
|
||||||
|
# plus replicas don't really help either.
|
||||||
|
number_of_shards: 1
|
||||||
|
number_of_replicas : 0
|
||||||
|
mapper:
|
||||||
|
# disable elasticsearch's "helpful" autoschema
|
||||||
|
dynamic: false
|
||||||
|
# since we disabled the _all field, default query the
|
||||||
|
# name of the torrent.
|
||||||
|
query:
|
||||||
|
default_field: display_name
|
||||||
|
mappings:
|
||||||
|
torrent:
|
||||||
|
# don't want everything concatenated
|
||||||
|
_all:
|
||||||
|
enabled: false
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: long
|
||||||
|
display_name:
|
||||||
|
# TODO could do a fancier tokenizer here to parse out the
|
||||||
|
# the scene convention of stuff in brackets, plus stuff like k-on
|
||||||
|
type: text
|
||||||
|
analyzer: my_index_analyzer
|
||||||
|
fielddata: true
|
||||||
|
created_time:
|
||||||
|
type: date
|
||||||
|
# Only in the ES index for generating magnet links
|
||||||
|
info_hash:
|
||||||
|
enabled: false
|
||||||
|
filesize:
|
||||||
|
type: long
|
||||||
|
anonymous:
|
||||||
|
type: boolean
|
||||||
|
trusted:
|
||||||
|
type: boolean
|
||||||
|
remake:
|
||||||
|
type: boolean
|
||||||
|
complete:
|
||||||
|
type: boolean
|
||||||
|
hidden:
|
||||||
|
type: boolean
|
||||||
|
deleted:
|
||||||
|
type: boolean
|
||||||
|
has_torrent:
|
||||||
|
type: boolean
|
||||||
|
download_count:
|
||||||
|
type: long
|
||||||
|
leech_count:
|
||||||
|
type: long
|
||||||
|
seed_count:
|
||||||
|
type: long
|
||||||
|
# these ids are really only for filtering, thus keyword
|
||||||
|
uploader_id:
|
||||||
|
type: keyword
|
||||||
|
main_category_id:
|
||||||
|
type: keyword
|
||||||
|
sub_category_id:
|
||||||
|
type: keyword
|
92
import_to_es.py
Normal file
92
import_to_es.py
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Bulk load torents from mysql into elasticsearch `nyaav2` index,
|
||||||
|
which is assumed to already exist.
|
||||||
|
This is a one-shot deal, so you'd either need to complement it
|
||||||
|
with a cron job or some binlog-reading thing (TODO)
|
||||||
|
"""
|
||||||
|
from nyaa import app
|
||||||
|
from nyaa.models import Torrent
|
||||||
|
from elasticsearch import Elasticsearch
|
||||||
|
from elasticsearch import helpers
|
||||||
|
import progressbar
|
||||||
|
import sys
|
||||||
|
|
||||||
|
bar = progressbar.ProgressBar(
|
||||||
|
max_value=Torrent.query.count(),
|
||||||
|
widgets=[
|
||||||
|
progressbar.SimpleProgress(),
|
||||||
|
' [', progressbar.Timer(), '] ',
|
||||||
|
progressbar.Bar(),
|
||||||
|
' (', progressbar.ETA(), ') ',
|
||||||
|
])
|
||||||
|
|
||||||
|
es = Elasticsearch()
|
||||||
|
|
||||||
|
# turn into thing that elasticsearch indexes. We flatten in
|
||||||
|
# the stats (seeders/leechers) so we can order by them in es naturally.
|
||||||
|
# we _don't_ dereference uploader_id to the user's display name however,
|
||||||
|
# instead doing that at query time. I _think_ this is right because
|
||||||
|
# we don't want to reindex all the user's torrents just because they
|
||||||
|
# changed their name, and we don't really want to FTS search on the user anyway.
|
||||||
|
# Maybe it's more convenient to derefence though.
|
||||||
|
def mk_es(t):
|
||||||
|
return {
|
||||||
|
"_id": t.id,
|
||||||
|
"_type": "torrent",
|
||||||
|
"_index": app.config['ES_INDEX_NAME'],
|
||||||
|
"_source": {
|
||||||
|
# we're also indexing the id as a number so you can
|
||||||
|
# order by it. seems like this is just equivalent to
|
||||||
|
# order by created_time, but oh well
|
||||||
|
"id": t.id,
|
||||||
|
"display_name": t.display_name,
|
||||||
|
"created_time": t.created_time,
|
||||||
|
# not analyzed but included so we can render magnet links
|
||||||
|
# without querying sql again.
|
||||||
|
"info_hash": t.info_hash.hex(),
|
||||||
|
"filesize": t.filesize,
|
||||||
|
"uploader_id": t.uploader_id,
|
||||||
|
"main_category_id": t.main_category_id,
|
||||||
|
"sub_category_id": t.sub_category_id,
|
||||||
|
# XXX all the bitflags are numbers
|
||||||
|
"anonymous": bool(t.anonymous),
|
||||||
|
"trusted": bool(t.trusted),
|
||||||
|
"remake": bool(t.remake),
|
||||||
|
"complete": bool(t.complete),
|
||||||
|
# TODO instead of indexing and filtering later
|
||||||
|
# could delete from es entirely. Probably won't matter
|
||||||
|
# for at least a few months.
|
||||||
|
"hidden": bool(t.hidden),
|
||||||
|
"deleted": bool(t.deleted),
|
||||||
|
"has_torrent": t.has_torrent,
|
||||||
|
# Stats
|
||||||
|
"download_count": t.stats.download_count,
|
||||||
|
"leech_count": t.stats.leech_count,
|
||||||
|
"seed_count": t.stats.seed_count,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# page through an sqlalchemy query, like the per_fetch but
|
||||||
|
# doesn't break the eager joins its doing against the stats table.
|
||||||
|
# annoying that this isn't built in somehow.
|
||||||
|
def page_query(query, limit=sys.maxsize, batch_size=10000):
|
||||||
|
start = 0
|
||||||
|
while True:
|
||||||
|
# XXX very inelegant way to do this, i'm confus
|
||||||
|
stop = min(limit, start + batch_size)
|
||||||
|
if stop == start:
|
||||||
|
break
|
||||||
|
things = query.slice(start, stop)
|
||||||
|
if not things:
|
||||||
|
break
|
||||||
|
had_things = False
|
||||||
|
for thing in things:
|
||||||
|
had_things = True
|
||||||
|
yield(thing)
|
||||||
|
if not had_things or stop == limit:
|
||||||
|
break
|
||||||
|
bar.update(start)
|
||||||
|
start = min(limit, start + batch_size)
|
||||||
|
|
||||||
|
helpers.bulk(es, (mk_es(t) for t in page_query(Torrent.query)), chunk_size=10000)
|
6
my.cnf
6
my.cnf
|
@ -4,3 +4,9 @@ ft_min_word_len=2
|
||||||
innodb_ft_cache_size = 80000000
|
innodb_ft_cache_size = 80000000
|
||||||
innodb_ft_total_cache_size = 1600000000
|
innodb_ft_total_cache_size = 1600000000
|
||||||
max_allowed_packet = 100M
|
max_allowed_packet = 100M
|
||||||
|
|
||||||
|
[mariadb]
|
||||||
|
log-bin
|
||||||
|
server_id=1
|
||||||
|
log-basename=master1
|
||||||
|
binlog-format = row
|
||||||
|
|
279
nyaa/routes.py
279
nyaa/routes.py
|
@ -6,18 +6,16 @@ from nyaa import bencode, utils
|
||||||
from nyaa import torrents
|
from nyaa import torrents
|
||||||
from nyaa import backend
|
from nyaa import backend
|
||||||
from nyaa import api_handler
|
from nyaa import api_handler
|
||||||
|
from nyaa.search import search_elastic, search_db
|
||||||
import config
|
import config
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import ipaddress
|
import ipaddress
|
||||||
import os.path
|
import os.path
|
||||||
import base64
|
import base64
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
import sqlalchemy_fulltext.modes as FullTextMode
|
import math
|
||||||
from sqlalchemy_fulltext import FullTextSearch
|
|
||||||
import shlex
|
|
||||||
from werkzeug import url_encode
|
from werkzeug import url_encode
|
||||||
|
|
||||||
from itsdangerous import URLSafeSerializer, BadSignature
|
from itsdangerous import URLSafeSerializer, BadSignature
|
||||||
|
@ -27,7 +25,14 @@ from email.mime.multipart import MIMEMultipart
|
||||||
from email.mime.text import MIMEText
|
from email.mime.text import MIMEText
|
||||||
from email.utils import formatdate
|
from email.utils import formatdate
|
||||||
|
|
||||||
|
from flask_paginate import Pagination
|
||||||
|
|
||||||
|
|
||||||
DEBUG_API = False
|
DEBUG_API = False
|
||||||
|
DEFAULT_MAX_SEARCH_RESULT = 1000
|
||||||
|
DEFAULT_PER_PAGE = 75
|
||||||
|
SERACH_PAGINATE_DISPLAY_MSG = '''Displaying results {start}-{end} out of {total} results.<br>
|
||||||
|
Please refine your search results if you can't find what you were looking for.'''
|
||||||
|
|
||||||
|
|
||||||
def redirect_url():
|
def redirect_url():
|
||||||
|
@ -48,144 +53,13 @@ def modify_query(**new_values):
|
||||||
|
|
||||||
return '{}?{}'.format(flask.request.path, url_encode(args))
|
return '{}?{}'.format(flask.request.path, url_encode(args))
|
||||||
|
|
||||||
|
|
||||||
@app.template_global()
|
@app.template_global()
|
||||||
def filter_truthy(input_list):
|
def filter_truthy(input_list):
|
||||||
''' Jinja2 can't into list comprehension so this is for
|
''' Jinja2 can't into list comprehension so this is for
|
||||||
the search_results.html template '''
|
the search_results.html template '''
|
||||||
return [item for item in input_list if item]
|
return [item for item in input_list if item]
|
||||||
|
|
||||||
def search(term='', user=None, sort='id', order='desc', category='0_0', quality_filter='0', page=1, rss=False, admin=False):
|
|
||||||
sort_keys = {
|
|
||||||
'id': models.Torrent.id,
|
|
||||||
'size': models.Torrent.filesize,
|
|
||||||
'name': models.Torrent.display_name,
|
|
||||||
'seeders': models.Statistic.seed_count,
|
|
||||||
'leechers': models.Statistic.leech_count,
|
|
||||||
'downloads': models.Statistic.download_count
|
|
||||||
}
|
|
||||||
|
|
||||||
sort_ = sort.lower()
|
|
||||||
if sort_ not in sort_keys:
|
|
||||||
flask.abort(400)
|
|
||||||
sort = sort_keys[sort]
|
|
||||||
|
|
||||||
order_keys = {
|
|
||||||
'desc': 'desc',
|
|
||||||
'asc': 'asc'
|
|
||||||
}
|
|
||||||
|
|
||||||
order_ = order.lower()
|
|
||||||
if order_ not in order_keys:
|
|
||||||
flask.abort(400)
|
|
||||||
|
|
||||||
filter_keys = {
|
|
||||||
'0': None,
|
|
||||||
'1': (models.TorrentFlags.REMAKE, False),
|
|
||||||
'2': (models.TorrentFlags.TRUSTED, True),
|
|
||||||
'3': (models.TorrentFlags.COMPLETE, True)
|
|
||||||
}
|
|
||||||
|
|
||||||
sentinel = object()
|
|
||||||
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
|
|
||||||
if filter_tuple is sentinel:
|
|
||||||
flask.abort(400)
|
|
||||||
|
|
||||||
if user:
|
|
||||||
user = models.User.by_id(user)
|
|
||||||
if not user:
|
|
||||||
flask.abort(404)
|
|
||||||
user = user.id
|
|
||||||
|
|
||||||
main_category = None
|
|
||||||
sub_category = None
|
|
||||||
main_cat_id = 0
|
|
||||||
sub_cat_id = 0
|
|
||||||
if category:
|
|
||||||
cat_match = re.match(r'^(\d+)_(\d+)$', category)
|
|
||||||
if not cat_match:
|
|
||||||
flask.abort(400)
|
|
||||||
|
|
||||||
main_cat_id = int(cat_match.group(1))
|
|
||||||
sub_cat_id = int(cat_match.group(2))
|
|
||||||
|
|
||||||
if main_cat_id > 0:
|
|
||||||
if sub_cat_id > 0:
|
|
||||||
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
|
|
||||||
else:
|
|
||||||
main_category = models.MainCategory.by_id(main_cat_id)
|
|
||||||
|
|
||||||
if not category:
|
|
||||||
flask.abort(400)
|
|
||||||
|
|
||||||
# Force sort by id desc if rss
|
|
||||||
if rss:
|
|
||||||
sort = sort_keys['id']
|
|
||||||
order = 'desc'
|
|
||||||
|
|
||||||
same_user = False
|
|
||||||
if flask.g.user:
|
|
||||||
same_user = flask.g.user.id == user
|
|
||||||
|
|
||||||
if term:
|
|
||||||
query = db.session.query(models.TorrentNameSearch)
|
|
||||||
else:
|
|
||||||
query = models.Torrent.query
|
|
||||||
|
|
||||||
# User view (/user/username)
|
|
||||||
if user:
|
|
||||||
query = query.filter(models.Torrent.uploader_id == user)
|
|
||||||
|
|
||||||
if not admin:
|
|
||||||
# Hide all DELETED torrents if regular user
|
|
||||||
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
|
|
||||||
# If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous
|
|
||||||
# If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones
|
|
||||||
# On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what
|
|
||||||
if not same_user or rss:
|
|
||||||
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN |
|
|
||||||
models.TorrentFlags.ANONYMOUS)).is_(False))
|
|
||||||
# General view (homepage, general search view)
|
|
||||||
else:
|
|
||||||
if not admin:
|
|
||||||
# Hide all DELETED torrents if regular user
|
|
||||||
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
|
|
||||||
# If logged in, show all torrents that aren't hidden unless they belong to you
|
|
||||||
# On RSS pages, show all public torrents and nothing more.
|
|
||||||
if flask.g.user and not rss:
|
|
||||||
query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
|
|
||||||
(models.Torrent.uploader_id == flask.g.user.id))
|
|
||||||
# Otherwise, show all torrents that aren't hidden
|
|
||||||
else:
|
|
||||||
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False))
|
|
||||||
|
|
||||||
if main_category:
|
|
||||||
query = query.filter(models.Torrent.main_category_id == main_cat_id)
|
|
||||||
elif sub_category:
|
|
||||||
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
|
|
||||||
(models.Torrent.sub_category_id == sub_cat_id))
|
|
||||||
|
|
||||||
if filter_tuple:
|
|
||||||
query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
|
|
||||||
|
|
||||||
if term:
|
|
||||||
for item in shlex.split(term, posix=False):
|
|
||||||
if len(item) >= 2:
|
|
||||||
query = query.filter(FullTextSearch(
|
|
||||||
item, models.TorrentNameSearch, FullTextMode.NATURAL))
|
|
||||||
|
|
||||||
# Sort and order
|
|
||||||
if sort.class_ != models.Torrent:
|
|
||||||
query = query.join(sort.class_)
|
|
||||||
|
|
||||||
query = query.order_by(getattr(sort, order)())
|
|
||||||
|
|
||||||
if rss:
|
|
||||||
query = query.limit(app.config['RESULTS_PER_PAGE'])
|
|
||||||
else:
|
|
||||||
query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5)
|
|
||||||
|
|
||||||
return query
|
|
||||||
|
|
||||||
|
|
||||||
@app.errorhandler(404)
|
@app.errorhandler(404)
|
||||||
def not_found(error):
|
def not_found(error):
|
||||||
|
@ -203,7 +77,6 @@ def before_request():
|
||||||
flask.g.user = user
|
flask.g.user = user
|
||||||
|
|
||||||
if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now():
|
if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now():
|
||||||
print("hio")
|
|
||||||
flask.session['timeout'] = datetime.now() + timedelta(days=7)
|
flask.session['timeout'] = datetime.now() + timedelta(days=7)
|
||||||
flask.session.permanent = True
|
flask.session.permanent = True
|
||||||
flask.session.modified = True
|
flask.session.modified = True
|
||||||
|
@ -225,6 +98,18 @@ def _generate_query_string(term, category, filter, user):
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@app.template_filter('utc_time')
|
||||||
|
def get_utc_timestamp(datetime_str):
|
||||||
|
''' Returns a UTC POSIX timestamp, as seconds '''
|
||||||
|
UTC_EPOCH = datetime.utcfromtimestamp(0)
|
||||||
|
return int((datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S') - UTC_EPOCH).total_seconds())
|
||||||
|
|
||||||
|
|
||||||
|
@app.template_filter('display_time')
|
||||||
|
def get_display_time(datetime_str):
|
||||||
|
return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d %H:%M')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/rss', defaults={'rss': True})
|
@app.route('/rss', defaults={'rss': True})
|
||||||
@app.route('/', defaults={'rss': False})
|
@app.route('/', defaults={'rss': False})
|
||||||
def home(rss):
|
def home(rss):
|
||||||
|
@ -241,6 +126,10 @@ def home(rss):
|
||||||
if page:
|
if page:
|
||||||
page = int(page)
|
page = int(page)
|
||||||
|
|
||||||
|
per_page = app.config.get('RESULTS_PER_PAGE')
|
||||||
|
if not per_page:
|
||||||
|
per_page = DEFAULT_PER_PAGE
|
||||||
|
|
||||||
user_id = None
|
user_id = None
|
||||||
if user_name:
|
if user_name:
|
||||||
user = models.User.by_username(user_name)
|
user = models.User.by_username(user_name)
|
||||||
|
@ -249,27 +138,69 @@ def home(rss):
|
||||||
user_id = user.id
|
user_id = user.id
|
||||||
|
|
||||||
query_args = {
|
query_args = {
|
||||||
'term': term or '',
|
|
||||||
'user': user_id,
|
'user': user_id,
|
||||||
'sort': sort or 'id',
|
'sort': sort or 'id',
|
||||||
'order': order or 'desc',
|
'order': order or 'desc',
|
||||||
'category': category or '0_0',
|
'category': category or '0_0',
|
||||||
'quality_filter': quality_filter or '0',
|
'quality_filter': quality_filter or '0',
|
||||||
'page': page or 1,
|
'page': page or 1,
|
||||||
'rss': rss
|
'rss': rss,
|
||||||
|
'per_page': per_page
|
||||||
}
|
}
|
||||||
|
|
||||||
# God mode
|
if flask.g.user:
|
||||||
if flask.g.user and flask.g.user.is_admin:
|
query_args['logged_in_user'] = flask.g.user
|
||||||
|
if flask.g.user.is_admin: # God mode
|
||||||
query_args['admin'] = True
|
query_args['admin'] = True
|
||||||
|
|
||||||
query = search(**query_args)
|
# If searching, we get results from elastic search
|
||||||
|
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
|
||||||
|
if use_elastic and term:
|
||||||
|
query_args['term'] = term
|
||||||
|
|
||||||
|
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
|
||||||
|
if not max_search_results:
|
||||||
|
max_search_results = DEFAULT_MAX_SEARCH_RESULT
|
||||||
|
|
||||||
|
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages
|
||||||
|
|
||||||
|
query_args['page'] = max_page
|
||||||
|
query_args['max_search_results'] = max_search_results
|
||||||
|
|
||||||
|
query_results = search_elastic(**query_args)
|
||||||
|
|
||||||
if rss:
|
if rss:
|
||||||
return render_rss('/', query)
|
return render_rss('/', query_results, use_elastic=True)
|
||||||
else:
|
else:
|
||||||
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
|
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
|
||||||
|
max_results = min(max_search_results, query_results['hits']['total'])
|
||||||
|
# change p= argument to whatever you change page_parameter to or pagination breaks
|
||||||
|
pagination = Pagination(p=query_args['page'], per_page=per_page,
|
||||||
|
total=max_results, bs_version=3, page_parameter='p',
|
||||||
|
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
|
||||||
return flask.render_template('home.html',
|
return flask.render_template('home.html',
|
||||||
|
use_elastic=True,
|
||||||
|
pagination=pagination,
|
||||||
|
torrent_query=query_results,
|
||||||
|
search=query_args,
|
||||||
|
rss_filter=rss_query_string)
|
||||||
|
else:
|
||||||
|
# If ES is enabled, default to db search for browsing
|
||||||
|
if use_elastic:
|
||||||
|
query_args['term'] = ''
|
||||||
|
else: # Otherwise, use db search for everything
|
||||||
|
query_args['term'] = term or ''
|
||||||
|
|
||||||
|
query = search_db(**query_args)
|
||||||
|
if rss:
|
||||||
|
return render_rss('/', query, use_elastic=False)
|
||||||
|
else:
|
||||||
|
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
|
||||||
|
# Use elastic is always false here because we only hit this section
|
||||||
|
# if we're browsing without a search term (which means we default to DB)
|
||||||
|
# or if ES is disabled
|
||||||
|
return flask.render_template('home.html',
|
||||||
|
use_elastic=False,
|
||||||
torrent_query=query,
|
torrent_query=query,
|
||||||
search=query_args,
|
search=query_args,
|
||||||
rss_filter=rss_query_string)
|
rss_filter=rss_query_string)
|
||||||
|
@ -291,6 +222,10 @@ def view_user(user_name):
|
||||||
if page:
|
if page:
|
||||||
page = int(page)
|
page = int(page)
|
||||||
|
|
||||||
|
per_page = app.config.get('RESULTS_PER_PAGE')
|
||||||
|
if not per_page:
|
||||||
|
per_page = DEFAULT_PER_PAGE
|
||||||
|
|
||||||
query_args = {
|
query_args = {
|
||||||
'term': term or '',
|
'term': term or '',
|
||||||
'user': user.id,
|
'user': user.id,
|
||||||
|
@ -299,17 +234,54 @@ def view_user(user_name):
|
||||||
'category': category or '0_0',
|
'category': category or '0_0',
|
||||||
'quality_filter': quality_filter or '0',
|
'quality_filter': quality_filter or '0',
|
||||||
'page': page or 1,
|
'page': page or 1,
|
||||||
'rss': False
|
'rss': False,
|
||||||
|
'per_page': per_page
|
||||||
}
|
}
|
||||||
|
|
||||||
# God mode
|
if flask.g.user:
|
||||||
if flask.g.user and flask.g.user.is_admin:
|
query_args['logged_in_user'] = flask.g.user
|
||||||
|
if flask.g.user.is_admin: # God mode
|
||||||
query_args['admin'] = True
|
query_args['admin'] = True
|
||||||
|
|
||||||
query = search(**query_args)
|
# Use elastic search for term searching
|
||||||
|
|
||||||
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
|
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
|
||||||
|
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
|
||||||
|
if use_elastic and term:
|
||||||
|
query_args['term'] = term
|
||||||
|
|
||||||
|
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
|
||||||
|
if not max_search_results:
|
||||||
|
max_search_results = DEFAULT_MAX_SEARCH_RESULT
|
||||||
|
|
||||||
|
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages
|
||||||
|
|
||||||
|
query_args['page'] = max_page
|
||||||
|
query_args['max_search_results'] = max_search_results
|
||||||
|
|
||||||
|
query_results = search_elastic(**query_args)
|
||||||
|
|
||||||
|
max_results = min(max_search_results, query_results['hits']['total'])
|
||||||
|
# change p= argument to whatever you change page_parameter to or pagination breaks
|
||||||
|
pagination = Pagination(p=query_args['page'], per_page=per_page,
|
||||||
|
total=max_results, bs_version=3, page_parameter='p',
|
||||||
|
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
|
||||||
return flask.render_template('user.html',
|
return flask.render_template('user.html',
|
||||||
|
use_elastic=True,
|
||||||
|
pagination=pagination,
|
||||||
|
torrent_query=query_results,
|
||||||
|
search=query_args,
|
||||||
|
user=user,
|
||||||
|
user_page=True,
|
||||||
|
rss_filter=rss_query_string)
|
||||||
|
# Similar logic as home page
|
||||||
|
else:
|
||||||
|
if use_elastic:
|
||||||
|
query_args['term'] = ''
|
||||||
|
else:
|
||||||
|
query_args['term'] = term or ''
|
||||||
|
query = search_db(**query_args)
|
||||||
|
return flask.render_template('user.html',
|
||||||
|
use_elastic=False,
|
||||||
torrent_query=query,
|
torrent_query=query,
|
||||||
search=query_args,
|
search=query_args,
|
||||||
user=user,
|
user=user,
|
||||||
|
@ -321,18 +293,23 @@ def view_user(user_name):
|
||||||
def _jinja2_filter_rfc822(date, fmt=None):
|
def _jinja2_filter_rfc822(date, fmt=None):
|
||||||
return formatdate(float(date.strftime('%s')))
|
return formatdate(float(date.strftime('%s')))
|
||||||
|
|
||||||
|
@app.template_filter('rfc822_es')
|
||||||
|
def _jinja2_filter_rfc822(datestr, fmt=None):
|
||||||
|
return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s')))
|
||||||
|
|
||||||
def render_rss(label, query):
|
|
||||||
|
def render_rss(label, query, use_elastic):
|
||||||
rss_xml = flask.render_template('rss.xml',
|
rss_xml = flask.render_template('rss.xml',
|
||||||
|
use_elastic=use_elastic,
|
||||||
term=label,
|
term=label,
|
||||||
site_url=flask.request.url_root,
|
site_url=flask.request.url_root,
|
||||||
query=query)
|
torrent_query=query)
|
||||||
response = flask.make_response(rss_xml)
|
response = flask.make_response(rss_xml)
|
||||||
response.headers['Content-Type'] = 'application/xml'
|
response.headers['Content-Type'] = 'application/xml'
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
#@app.route('/about', methods=['GET'])
|
# @app.route('/about', methods=['GET'])
|
||||||
# def about():
|
# def about():
|
||||||
# return flask.render_template('about.html')
|
# return flask.render_template('about.html')
|
||||||
|
|
||||||
|
|
317
nyaa/search.py
Normal file
317
nyaa/search.py
Normal file
|
@ -0,0 +1,317 @@
|
||||||
|
import flask
|
||||||
|
import re
|
||||||
|
import math
|
||||||
|
import json
|
||||||
|
import shlex
|
||||||
|
|
||||||
|
from nyaa import app, db
|
||||||
|
from nyaa import models
|
||||||
|
|
||||||
|
import sqlalchemy_fulltext.modes as FullTextMode
|
||||||
|
from sqlalchemy_fulltext import FullTextSearch
|
||||||
|
from elasticsearch import Elasticsearch
|
||||||
|
from elasticsearch_dsl import Search, Q
|
||||||
|
|
||||||
|
|
||||||
|
def search_elastic(term='', user=None, sort='id', order='desc',
|
||||||
|
category='0_0', quality_filter='0', page=1,
|
||||||
|
rss=False, admin=False, logged_in_user=None,
|
||||||
|
per_page=75, max_search_results=1000):
|
||||||
|
# This function can easily be memcached now
|
||||||
|
|
||||||
|
es_client = Elasticsearch()
|
||||||
|
|
||||||
|
es_sort_keys = {
|
||||||
|
'id': 'id',
|
||||||
|
'size': 'filesize',
|
||||||
|
# 'name': 'display_name', # This is slow and buggy
|
||||||
|
'seeders': 'seed_count',
|
||||||
|
'leechers': 'leech_count',
|
||||||
|
'downloads': 'download_count'
|
||||||
|
}
|
||||||
|
|
||||||
|
sort_ = sort.lower()
|
||||||
|
if sort_ not in es_sort_keys:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
es_sort = es_sort_keys[sort]
|
||||||
|
|
||||||
|
order_keys = {
|
||||||
|
'desc': 'desc',
|
||||||
|
'asc': 'asc'
|
||||||
|
}
|
||||||
|
|
||||||
|
order_ = order.lower()
|
||||||
|
if order_ not in order_keys:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
# Only allow ID, desc if RSS
|
||||||
|
if rss:
|
||||||
|
sort = es_sort_keys['id']
|
||||||
|
order = 'desc'
|
||||||
|
|
||||||
|
# funky, es sort is default asc, prefixed by '-' if desc
|
||||||
|
if 'desc' == order:
|
||||||
|
es_sort = '-' + es_sort
|
||||||
|
|
||||||
|
# Quality filter
|
||||||
|
quality_keys = [
|
||||||
|
'0', # Show all
|
||||||
|
'1', # No remakes
|
||||||
|
'2', # Only trusted
|
||||||
|
'3' # Only completed
|
||||||
|
]
|
||||||
|
|
||||||
|
if quality_filter.lower() not in quality_keys:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
quality_filter = int(quality_filter)
|
||||||
|
|
||||||
|
# Category filter
|
||||||
|
main_category = None
|
||||||
|
sub_category = None
|
||||||
|
main_cat_id = 0
|
||||||
|
sub_cat_id = 0
|
||||||
|
if category:
|
||||||
|
cat_match = re.match(r'^(\d+)_(\d+)$', category)
|
||||||
|
if not cat_match:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
main_cat_id = int(cat_match.group(1))
|
||||||
|
sub_cat_id = int(cat_match.group(2))
|
||||||
|
|
||||||
|
if main_cat_id > 0:
|
||||||
|
if sub_cat_id > 0:
|
||||||
|
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
|
||||||
|
if not sub_category:
|
||||||
|
flask.abort(400)
|
||||||
|
else:
|
||||||
|
main_category = models.MainCategory.by_id(main_cat_id)
|
||||||
|
if not main_category:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
# This might be useless since we validate users
|
||||||
|
# before coming into this method, but just to be safe...
|
||||||
|
if user:
|
||||||
|
user = models.User.by_id(user)
|
||||||
|
if not user:
|
||||||
|
flask.abort(404)
|
||||||
|
user = user.id
|
||||||
|
|
||||||
|
same_user = False
|
||||||
|
if logged_in_user:
|
||||||
|
same_user = user == logged_in_user.id
|
||||||
|
|
||||||
|
s = Search(using=es_client, index=app.config.get('ES_INDEX_NAME')) # todo, sukebei prefix
|
||||||
|
|
||||||
|
# Apply search term
|
||||||
|
if term:
|
||||||
|
s = s.query('simple_query_string',
|
||||||
|
analyzer='my_search_analyzer',
|
||||||
|
default_operator="AND",
|
||||||
|
query=term)
|
||||||
|
|
||||||
|
# User view (/user/username)
|
||||||
|
if user:
|
||||||
|
s = s.filter('term', uploader_id=user)
|
||||||
|
|
||||||
|
if not admin:
|
||||||
|
# Hide all DELETED torrents if regular user
|
||||||
|
s = s.filter('term', deleted=False)
|
||||||
|
# If logged in user is not the same as the user being viewed,
|
||||||
|
# show only torrents that aren't hidden or anonymous.
|
||||||
|
#
|
||||||
|
# If logged in user is the same as the user being viewed,
|
||||||
|
# show all torrents including hidden and anonymous ones.
|
||||||
|
#
|
||||||
|
# On RSS pages in user view, show only torrents that
|
||||||
|
# aren't hidden or anonymous no matter what
|
||||||
|
if not same_user or rss:
|
||||||
|
s = s.filter('term', hidden=False)
|
||||||
|
s = s.filter('term', anonymous=False)
|
||||||
|
# General view (homepage, general search view)
|
||||||
|
else:
|
||||||
|
if not admin:
|
||||||
|
# Hide all DELETED torrents if regular user
|
||||||
|
s = s.filter('term', deleted=False)
|
||||||
|
# If logged in, show all torrents that aren't hidden unless they belong to you
|
||||||
|
# On RSS pages, show all public torrents and nothing more.
|
||||||
|
if logged_in_user and not rss:
|
||||||
|
hiddenFilter = Q('term', hidden=False)
|
||||||
|
userFilter = Q('term', uploader_id=logged_in_user.id)
|
||||||
|
combinedFilter = hiddenFilter | userFilter
|
||||||
|
s = s.filter('bool', filter=[combinedFilter])
|
||||||
|
else:
|
||||||
|
s = s.filter('term', hidden=False)
|
||||||
|
|
||||||
|
if main_category:
|
||||||
|
s = s.filter('term', main_category_id=main_cat_id)
|
||||||
|
elif sub_category:
|
||||||
|
s = s.filter('term', main_category_id=main_cat_id)
|
||||||
|
s = s.filter('term', sub_category_id=sub_cat_id)
|
||||||
|
|
||||||
|
if quality_filter == 0:
|
||||||
|
pass
|
||||||
|
elif quality_filter == 1:
|
||||||
|
s = s.filter('term', remake=False)
|
||||||
|
elif quality_filter == 2:
|
||||||
|
s = s.filter('term', trusted=True)
|
||||||
|
elif quality_filter == 3:
|
||||||
|
s = s.filter('term', complete=True)
|
||||||
|
|
||||||
|
# Apply sort
|
||||||
|
s = s.sort(es_sort)
|
||||||
|
|
||||||
|
# Only show first RESULTS_PER_PAGE items for RSS
|
||||||
|
if rss:
|
||||||
|
s = s[0:per_page]
|
||||||
|
else:
|
||||||
|
max_page = min(page, int(math.ceil(max_search_results / float(per_page))))
|
||||||
|
from_idx = (max_page-1)*per_page
|
||||||
|
to_idx = min(max_search_results, max_page*per_page)
|
||||||
|
s = s[from_idx:to_idx]
|
||||||
|
|
||||||
|
highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT')
|
||||||
|
if highlight:
|
||||||
|
s = s.highlight_options(tags_schema='styled')
|
||||||
|
s = s.highlight("display_name")
|
||||||
|
|
||||||
|
# Return query, uncomment print line to debug query
|
||||||
|
# from pprint import pprint
|
||||||
|
# print(json.dumps(s.to_dict()))
|
||||||
|
return s.execute()
|
||||||
|
|
||||||
|
|
||||||
|
def search_db(term='', user=None, sort='id', order='desc', category='0_0',
|
||||||
|
quality_filter='0', page=1, rss=False, admin=False,
|
||||||
|
logged_in_user=None, per_page=75):
|
||||||
|
sort_keys = {
|
||||||
|
'id': models.Torrent.id,
|
||||||
|
'size': models.Torrent.filesize,
|
||||||
|
# 'name': models.Torrent.display_name, # Disable this because we disabled this in search_elastic, for the sake of consistency
|
||||||
|
'seeders': models.Statistic.seed_count,
|
||||||
|
'leechers': models.Statistic.leech_count,
|
||||||
|
'downloads': models.Statistic.download_count
|
||||||
|
}
|
||||||
|
|
||||||
|
sort_ = sort.lower()
|
||||||
|
if sort_ not in sort_keys:
|
||||||
|
flask.abort(400)
|
||||||
|
sort = sort_keys[sort]
|
||||||
|
|
||||||
|
order_keys = {
|
||||||
|
'desc': 'desc',
|
||||||
|
'asc': 'asc'
|
||||||
|
}
|
||||||
|
|
||||||
|
order_ = order.lower()
|
||||||
|
if order_ not in order_keys:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
filter_keys = {
|
||||||
|
'0': None,
|
||||||
|
'1': (models.TorrentFlags.REMAKE, False),
|
||||||
|
'2': (models.TorrentFlags.TRUSTED, True),
|
||||||
|
'3': (models.TorrentFlags.COMPLETE, True)
|
||||||
|
}
|
||||||
|
|
||||||
|
sentinel = object()
|
||||||
|
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
|
||||||
|
if filter_tuple is sentinel:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
if user:
|
||||||
|
user = models.User.by_id(user)
|
||||||
|
if not user:
|
||||||
|
flask.abort(404)
|
||||||
|
user = user.id
|
||||||
|
|
||||||
|
main_category = None
|
||||||
|
sub_category = None
|
||||||
|
main_cat_id = 0
|
||||||
|
sub_cat_id = 0
|
||||||
|
if category:
|
||||||
|
cat_match = re.match(r'^(\d+)_(\d+)$', category)
|
||||||
|
if not cat_match:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
main_cat_id = int(cat_match.group(1))
|
||||||
|
sub_cat_id = int(cat_match.group(2))
|
||||||
|
|
||||||
|
if main_cat_id > 0:
|
||||||
|
if sub_cat_id > 0:
|
||||||
|
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
|
||||||
|
else:
|
||||||
|
main_category = models.MainCategory.by_id(main_cat_id)
|
||||||
|
|
||||||
|
if not category:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
# Force sort by id desc if rss
|
||||||
|
if rss:
|
||||||
|
sort = sort_keys['id']
|
||||||
|
order = 'desc'
|
||||||
|
|
||||||
|
same_user = False
|
||||||
|
if logged_in_user:
|
||||||
|
same_user = logged_in_user.id == user
|
||||||
|
|
||||||
|
if term:
|
||||||
|
query = db.session.query(models.TorrentNameSearch)
|
||||||
|
else:
|
||||||
|
query = models.Torrent.query
|
||||||
|
|
||||||
|
# User view (/user/username)
|
||||||
|
if user:
|
||||||
|
query = query.filter(models.Torrent.uploader_id == user)
|
||||||
|
|
||||||
|
if not admin:
|
||||||
|
# Hide all DELETED torrents if regular user
|
||||||
|
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
|
||||||
|
# If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous
|
||||||
|
# If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones
|
||||||
|
# On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what
|
||||||
|
if not same_user or rss:
|
||||||
|
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN |
|
||||||
|
models.TorrentFlags.ANONYMOUS)).is_(False))
|
||||||
|
# General view (homepage, general search view)
|
||||||
|
else:
|
||||||
|
if not admin:
|
||||||
|
# Hide all DELETED torrents if regular user
|
||||||
|
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
|
||||||
|
# If logged in, show all torrents that aren't hidden unless they belong to you
|
||||||
|
# On RSS pages, show all public torrents and nothing more.
|
||||||
|
if logged_in_user and not rss:
|
||||||
|
query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
|
||||||
|
(models.Torrent.uploader_id == logged_in_user.id))
|
||||||
|
# Otherwise, show all torrents that aren't hidden
|
||||||
|
else:
|
||||||
|
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False))
|
||||||
|
|
||||||
|
if main_category:
|
||||||
|
query = query.filter(models.Torrent.main_category_id == main_cat_id)
|
||||||
|
elif sub_category:
|
||||||
|
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
|
||||||
|
(models.Torrent.sub_category_id == sub_cat_id))
|
||||||
|
|
||||||
|
if filter_tuple:
|
||||||
|
query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
|
||||||
|
|
||||||
|
if term:
|
||||||
|
for item in shlex.split(term, posix=False):
|
||||||
|
if len(item) >= 2:
|
||||||
|
query = query.filter(FullTextSearch(
|
||||||
|
item, models.TorrentNameSearch, FullTextMode.NATURAL))
|
||||||
|
|
||||||
|
# Sort and order
|
||||||
|
if sort.class_ != models.Torrent:
|
||||||
|
query = query.join(sort.class_)
|
||||||
|
|
||||||
|
query = query.order_by(getattr(sort, order)())
|
||||||
|
|
||||||
|
if rss:
|
||||||
|
query = query.limit(per_page)
|
||||||
|
else:
|
||||||
|
query = query.paginate_faste(page, per_page=per_page, step=5)
|
||||||
|
|
||||||
|
return query
|
|
@ -98,3 +98,13 @@ table.torrent-list thead th.sorting_desc:after {
|
||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* elasticsearch term highlight */
|
||||||
|
.hlt1 {
|
||||||
|
font-style: normal;
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0 3px;
|
||||||
|
border-radius: 3px;
|
||||||
|
border: 1px solid rgba(100, 56, 0, 0.8);
|
||||||
|
background: rgba(200,127,0,0.3);
|
||||||
|
}
|
||||||
|
|
|
@ -4,20 +4,32 @@
|
||||||
<description>RSS Feed for {{ term }}</description>
|
<description>RSS Feed for {{ term }}</description>
|
||||||
<link>{{ url_for('home', _external=True) }}</link>
|
<link>{{ url_for('home', _external=True) }}</link>
|
||||||
<atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" />
|
<atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" />
|
||||||
{% for torrent in query %}
|
{% for torrent in torrent_query %}
|
||||||
{% if torrent.has_torrent %}
|
{% if torrent.has_torrent %}
|
||||||
<item>
|
<item>
|
||||||
<title>{{ torrent.display_name }}</title>
|
<title>{{ torrent.display_name }}</title>
|
||||||
|
{% if use_elastic %}
|
||||||
|
<link>{{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }}</link>
|
||||||
|
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
|
||||||
|
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
|
||||||
|
{% else %}
|
||||||
<link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link>
|
<link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link>
|
||||||
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
|
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
|
||||||
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
|
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
|
||||||
|
{% endif %}
|
||||||
</item>
|
</item>
|
||||||
{% else %}
|
{% else %}
|
||||||
<item>
|
<item>
|
||||||
<title>{{ torrent.display_name }}</title>
|
<title>{{ torrent.display_name }}</title>
|
||||||
|
{% if use_elastic %}
|
||||||
|
<link>{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}</link>
|
||||||
|
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
|
||||||
|
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
|
||||||
|
{% else %}
|
||||||
<link>{{ torrent.magnet_uri }}</link>
|
<link>{{ torrent.magnet_uri }}</link>
|
||||||
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
|
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
|
||||||
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
|
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
|
||||||
|
{% endif %}
|
||||||
</item>
|
</item>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
{{ caller() }}
|
{{ caller() }}
|
||||||
</th>
|
</th>
|
||||||
{% endmacro %}
|
{% endmacro %}
|
||||||
{% if torrent_query.items %}
|
{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
|
||||||
<div class="table-responsive">
|
<div class="table-responsive">
|
||||||
<table class="table table-bordered table-hover table-striped torrent-list">
|
<table class="table table-bordered table-hover table-striped torrent-list">
|
||||||
<thead>
|
<thead>
|
||||||
|
@ -16,7 +16,7 @@
|
||||||
{% call render_column_header("hdr-category", "width:80px;", center_text=True) %}
|
{% call render_column_header("hdr-category", "width:80px;", center_text=True) %}
|
||||||
<div>Category</div>
|
<div>Category</div>
|
||||||
{% endcall %}
|
{% endcall %}
|
||||||
{% call render_column_header("hdr-name", "width:auto;", sort_key="name") %}
|
{% call render_column_header("hdr-name", "width:auto;") %}
|
||||||
<div>Name</div>
|
<div>Name</div>
|
||||||
{% endcall %}
|
{% endcall %}
|
||||||
{% call render_column_header("hdr-link", "width:70px;", center_text=True) %}
|
{% call render_column_header("hdr-link", "width:70px;", center_text=True) %}
|
||||||
|
@ -45,27 +45,51 @@
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for torrent in torrent_query.items %}
|
{% set torrents = torrent_query if use_elastic else torrent_query.items %}
|
||||||
|
{% for torrent in torrents %}
|
||||||
<tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}">
|
<tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}">
|
||||||
{% set cat_id = (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %}
|
{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) if use_elastic else (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %}
|
||||||
{% set icon_dir = config.SITE_FLAVOR %}
|
{% set icon_dir = config.SITE_FLAVOR %}
|
||||||
<td style="padding:0 4px;">
|
<td style="padding:0 4px;">
|
||||||
|
{% if use_elastic %}
|
||||||
|
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category_id }} - {{ torrent.sub_category_id }}">
|
||||||
|
{% else %}
|
||||||
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category.name }} - {{ torrent.sub_category.name }}">
|
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category.name }} - {{ torrent.sub_category.name }}">
|
||||||
|
{% endif %}
|
||||||
<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
|
<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
|
||||||
</a>
|
</a>
|
||||||
</td>
|
</td>
|
||||||
|
{% if use_elastic %}
|
||||||
|
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}</a></td>
|
||||||
|
{% else %}
|
||||||
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}">{{ torrent.display_name | escape }}</a></td>
|
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}">{{ torrent.display_name | escape }}</a></td>
|
||||||
|
{% endif %}
|
||||||
<td style="white-space: nowrap;text-align: center;">
|
<td style="white-space: nowrap;text-align: center;">
|
||||||
{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
|
{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
|
||||||
|
{% if use_elastic %}
|
||||||
|
<a href="{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}"><i class="fa fa-fw fa-magnet"></i></a>
|
||||||
|
{% else %}
|
||||||
<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>
|
<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>
|
||||||
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
|
<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
|
||||||
|
{% if use_elastic %}
|
||||||
|
<td class="text-center" data-timestamp="{{ torrent.created_time | utc_time }}">{{ torrent.created_time | display_time }}</td>
|
||||||
|
{% else %}
|
||||||
<td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td>
|
<td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% if config.ENABLE_SHOW_STATS %}
|
{% if config.ENABLE_SHOW_STATS %}
|
||||||
|
{% if use_elastic %}
|
||||||
|
<td class="text-center" style="color: green;">{{ torrent.seed_count }}</td>
|
||||||
|
<td class="text-center" style="color: red;">{{ torrent.leech_count }}</td>
|
||||||
|
<td class="text-center">{{ torrent.download_count }}</td>
|
||||||
|
{% else %}
|
||||||
<td class="text-center" style="color: green;">{{ torrent.stats.seed_count }}</td>
|
<td class="text-center" style="color: green;">{{ torrent.stats.seed_count }}</td>
|
||||||
<td class="text-center" style="color: red;">{{ torrent.stats.leech_count }}</td>
|
<td class="text-center" style="color: red;">{{ torrent.stats.leech_count }}</td>
|
||||||
<td class="text-center">{{ torrent.stats.download_count }}</td>
|
<td class="text-center">{{ torrent.stats.download_count }}</td>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
|
@ -76,6 +100,11 @@
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<center>
|
<center>
|
||||||
|
{% if use_elastic %}
|
||||||
|
{{ pagination.info }}
|
||||||
|
{{ pagination.links }}
|
||||||
|
{% else %}
|
||||||
{% from "bootstrap/pagination.html" import render_pagination %}
|
{% from "bootstrap/pagination.html" import render_pagination %}
|
||||||
{{ render_pagination(torrent_query) }}
|
{{ render_pagination(torrent_query) }}
|
||||||
|
{% endif %}
|
||||||
</center>
|
</center>
|
||||||
|
|
|
@ -3,6 +3,7 @@ import base64
|
||||||
import time
|
import time
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from orderedset import OrderedSet
|
from orderedset import OrderedSet
|
||||||
|
from nyaa import app
|
||||||
|
|
||||||
from nyaa import bencode
|
from nyaa import bencode
|
||||||
from nyaa import app
|
from nyaa import app
|
||||||
|
@ -53,10 +54,23 @@ def get_trackers(torrent):
|
||||||
|
|
||||||
return list(trackers)
|
return list(trackers)
|
||||||
|
|
||||||
|
def get_trackers_magnet():
|
||||||
|
trackers = OrderedSet()
|
||||||
|
|
||||||
|
# Our main one first
|
||||||
|
main_announce_url = app.config.get('MAIN_ANNOUNCE_URL')
|
||||||
|
if main_announce_url:
|
||||||
|
trackers.add(main_announce_url)
|
||||||
|
|
||||||
|
# and finally our tracker list
|
||||||
|
trackers.update(default_trackers())
|
||||||
|
|
||||||
|
return list(trackers)
|
||||||
|
|
||||||
|
|
||||||
def create_magnet(torrent, max_trackers=5, trackers=None):
|
def create_magnet(torrent, max_trackers=5, trackers=None):
|
||||||
if trackers is None:
|
if trackers is None:
|
||||||
trackers = get_trackers(torrent)
|
trackers = get_trackers_magnet()
|
||||||
|
|
||||||
magnet_parts = [
|
magnet_parts = [
|
||||||
('dn', torrent.display_name)
|
('dn', torrent.display_name)
|
||||||
|
@ -68,6 +82,24 @@ def create_magnet(torrent, max_trackers=5, trackers=None):
|
||||||
return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts)
|
return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts)
|
||||||
|
|
||||||
|
|
||||||
|
# For processing ES links
|
||||||
|
@app.context_processor
|
||||||
|
def create_magnet_from_info():
|
||||||
|
def _create_magnet_from_info(display_name, info_hash, max_trackers=5, trackers=None):
|
||||||
|
if trackers is None:
|
||||||
|
trackers = get_trackers_magnet()
|
||||||
|
|
||||||
|
magnet_parts = [
|
||||||
|
('dn', display_name)
|
||||||
|
]
|
||||||
|
for tracker in trackers[:max_trackers]:
|
||||||
|
magnet_parts.append(('tr', tracker))
|
||||||
|
|
||||||
|
b32_info_hash = base64.b32encode(bytes.fromhex(info_hash)).decode('utf-8')
|
||||||
|
return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts)
|
||||||
|
return dict(create_magnet_from_info=_create_magnet_from_info)
|
||||||
|
|
||||||
|
|
||||||
def create_default_metadata_base(torrent, trackers=None):
|
def create_default_metadata_base(torrent, trackers=None):
|
||||||
if trackers is None:
|
if trackers is None:
|
||||||
trackers = get_trackers(torrent)
|
trackers = get_trackers(torrent)
|
||||||
|
|
|
@ -24,11 +24,17 @@ pycodestyle==2.3.1
|
||||||
pycparser==2.17
|
pycparser==2.17
|
||||||
pyparsing==2.2.0
|
pyparsing==2.2.0
|
||||||
six==1.10.0
|
six==1.10.0
|
||||||
SQLAlchemy>=1.1.9
|
SQLAlchemy==1.1.9
|
||||||
SQLAlchemy-FullText-Search==0.2.3
|
SQLAlchemy-FullText-Search==0.2.3
|
||||||
SQLAlchemy-Utils>=0.32.14
|
SQLAlchemy-Utils==0.32.14
|
||||||
uWSGI==2.0.15
|
uWSGI==2.0.15
|
||||||
visitor==0.1.3
|
visitor==0.1.3
|
||||||
webassets==0.12.1
|
webassets==0.12.1
|
||||||
Werkzeug==0.12.1
|
Werkzeug==0.12.1
|
||||||
WTForms==2.1
|
WTForms==2.1
|
||||||
|
## elasticsearch dependencies
|
||||||
|
elasticsearch==5.3.0
|
||||||
|
elasticsearch-dsl==5.2.0
|
||||||
|
progressbar2==3.20.0
|
||||||
|
mysql-replication==0.13
|
||||||
|
flask-paginate==0.4.5
|
168
sync_es.py
Normal file
168
sync_es.py
Normal file
|
@ -0,0 +1,168 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
stream changes in mysql (on the torrents and statistics table) into
|
||||||
|
elasticsearch as they happen on the binlog. This keeps elasticsearch in sync
|
||||||
|
with whatever you do to the database, including stuff like admin queries. Also,
|
||||||
|
because mysql keeps the binlog around for N days before deleting old stuff, you
|
||||||
|
can survive a hiccup of elasticsearch or this script dying and pick up where
|
||||||
|
you left off.
|
||||||
|
|
||||||
|
For that "picking up" part, this script depends on one piece of external state:
|
||||||
|
its last known binlog filename and position. This is saved off as a JSON file
|
||||||
|
to a configurable location on the filesystem periodically. If the file is not
|
||||||
|
present then you can initialize it with the values from `SHOW MASTER STATUS`
|
||||||
|
from the mysql repl, which will start the sync from current state.
|
||||||
|
|
||||||
|
In the case of catastrophic elasticsearch meltdown where you need to
|
||||||
|
reconstruct the index, you'll want to be a bit careful with coordinating
|
||||||
|
sync_es and import_to_es scripts. If you run import_to_es first than run
|
||||||
|
sync_es against SHOW MASTER STATUS, anything that changed the database between
|
||||||
|
when import_to_es and sync_es will be lost. Instead, you can run SHOW MASTER
|
||||||
|
STATUS _before_ you run import_to_es. That way you'll definitely pick up any
|
||||||
|
changes that happen while the import_to_es script is dumping stuff from the
|
||||||
|
database into es, at the expense of redoing a (small) amount of indexing.
|
||||||
|
"""
|
||||||
|
from elasticsearch import Elasticsearch
|
||||||
|
from pymysqlreplication import BinLogStreamReader
|
||||||
|
from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent
|
||||||
|
from datetime import datetime
|
||||||
|
from nyaa.models import TorrentFlags
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.basicConfig()
|
||||||
|
|
||||||
|
log = logging.getLogger('sync_es')
|
||||||
|
log.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
#logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
# in prod want in /var/lib somewhere probably
|
||||||
|
SAVE_LOC = "/var/lib/sync_es_position.json"
|
||||||
|
MYSQL_HOST = '127.0.0.1'
|
||||||
|
MYSQL_PORT = 3306
|
||||||
|
MYSQL_USER = 'test'
|
||||||
|
MYSQL_PW = 'test123'
|
||||||
|
NT_DB = 'nyaav2'
|
||||||
|
|
||||||
|
with open(SAVE_LOC) as f:
|
||||||
|
pos = json.load(f)
|
||||||
|
|
||||||
|
es = Elasticsearch()
|
||||||
|
|
||||||
|
stream = BinLogStreamReader(
|
||||||
|
# TODO parse out from config.py or something
|
||||||
|
connection_settings = {
|
||||||
|
'host': MYSQL_HOST,
|
||||||
|
'port': MYSQL_PORT,
|
||||||
|
'user': MYSQL_USER,
|
||||||
|
'passwd': MYSQL_PW
|
||||||
|
},
|
||||||
|
server_id=10, # arbitrary
|
||||||
|
# only care about this database currently
|
||||||
|
only_schemas=[NT_DB],
|
||||||
|
# these tables in the database
|
||||||
|
only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"],
|
||||||
|
# from our save file
|
||||||
|
resume_stream=True,
|
||||||
|
log_file=pos['log_file'],
|
||||||
|
log_pos=pos['log_pos'],
|
||||||
|
# skip the other stuff like table mapping
|
||||||
|
only_events=[UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent],
|
||||||
|
# if we're at the head of the log, block until something happens
|
||||||
|
# note it'd be nice to block async-style instead, but the mainline
|
||||||
|
# binlogreader is synchronous. there is an (unmaintained?) fork
|
||||||
|
# using aiomysql if anybody wants to revive that.
|
||||||
|
blocking=True)
|
||||||
|
|
||||||
|
def reindex_torrent(t, index_name):
|
||||||
|
# XXX annoyingly different from import_to_es, and
|
||||||
|
# you need to keep them in sync manually.
|
||||||
|
f = t['flags']
|
||||||
|
doc = {
|
||||||
|
"id": t['id'],
|
||||||
|
"display_name": t['display_name'],
|
||||||
|
"created_time": t['created_time'],
|
||||||
|
"updated_time": t['updated_time'],
|
||||||
|
"description": t['description'],
|
||||||
|
# not analyzed but included so we can render magnet links
|
||||||
|
# without querying sql again.
|
||||||
|
"info_hash": t['info_hash'].hex(),
|
||||||
|
"filesize": t['filesize'],
|
||||||
|
"uploader_id": t['uploader_id'],
|
||||||
|
"main_category_id": t['main_category_id'],
|
||||||
|
"sub_category_id": t['sub_category_id'],
|
||||||
|
# XXX all the bitflags are numbers
|
||||||
|
"anonymous": bool(f & TorrentFlags.ANONYMOUS),
|
||||||
|
"trusted": bool(f & TorrentFlags.TRUSTED),
|
||||||
|
"remake": bool(f & TorrentFlags.REMAKE),
|
||||||
|
"complete": bool(f & TorrentFlags.COMPLETE),
|
||||||
|
# TODO instead of indexing and filtering later
|
||||||
|
# could delete from es entirely. Probably won't matter
|
||||||
|
# for at least a few months.
|
||||||
|
"hidden": bool(f & TorrentFlags.HIDDEN),
|
||||||
|
"deleted": bool(f & TorrentFlags.DELETED),
|
||||||
|
"has_torrent": bool(t['has_torrent']),
|
||||||
|
}
|
||||||
|
# update, so we don't delete the stats if present
|
||||||
|
es.update(
|
||||||
|
index=index_name,
|
||||||
|
doc_type='torrent',
|
||||||
|
id=t['id'],
|
||||||
|
body={"doc": doc, "doc_as_upsert": True})
|
||||||
|
|
||||||
|
def reindex_stats(s, index_name):
|
||||||
|
es.update(
|
||||||
|
index=index_name,
|
||||||
|
doc_type='torrent',
|
||||||
|
id=s['torrent_id'],
|
||||||
|
body={
|
||||||
|
"doc": {
|
||||||
|
"stats_last_updated": s["last_updated"],
|
||||||
|
"download_count": s["download_count"],
|
||||||
|
"leech_count": s['leech_count'],
|
||||||
|
"seed_count": s['seed_count'],
|
||||||
|
}})
|
||||||
|
|
||||||
|
n = 0
|
||||||
|
last_save = time.time()
|
||||||
|
|
||||||
|
for event in stream:
|
||||||
|
for row in event.rows:
|
||||||
|
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
|
||||||
|
if event.table == "nyaa_torrents":
|
||||||
|
index_name = "nyaa"
|
||||||
|
else:
|
||||||
|
index_name = "sukebei"
|
||||||
|
if type(event) is WriteRowsEvent:
|
||||||
|
reindex_torrent(row['values'], index_name)
|
||||||
|
elif type(event) is UpdateRowsEvent:
|
||||||
|
reindex_torrent(row['after_values'], index_name)
|
||||||
|
elif type(event) is DeleteRowsEvent:
|
||||||
|
# just delete it
|
||||||
|
es.delete(index=index_name, doc_type='torrent', id=row['values']['id'])
|
||||||
|
else:
|
||||||
|
raise Exception(f"unknown event {type(event)}")
|
||||||
|
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
|
||||||
|
if event.table == "nyaa_torrents":
|
||||||
|
index_name = "nyaa"
|
||||||
|
else:
|
||||||
|
index_name = "sukebei"
|
||||||
|
if type(event) is WriteRowsEvent:
|
||||||
|
reindex_stats(row['values'], index_name)
|
||||||
|
elif type(event) is UpdateRowsEvent:
|
||||||
|
reindex_stats(row['after_values'], index_name)
|
||||||
|
elif type(event) is DeleteRowsEvent:
|
||||||
|
# uh ok. assume that the torrent row will get deleted later.
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception(f"unknown event {type(event)}")
|
||||||
|
else:
|
||||||
|
raise Exception(f"unknown table {s.table}")
|
||||||
|
n += 1
|
||||||
|
if n % 100 == 0 or time.time() - last_save > 30:
|
||||||
|
log.info(f"saving position {stream.log_file}/{stream.log_pos}")
|
||||||
|
with open(SAVE_LOC, 'w') as f:
|
||||||
|
json.dump({"log_file": stream.log_file, "log_pos": stream.log_pos}, f)
|
Loading…
Reference in a new issue