mirror of
https://gitlab.com/SIGBUS/nyaa.git
synced 2024-12-22 09:30:01 +00:00
Introduce baked queries (#592)
SQA's baked queries prepares the queries in advance, caching yada yada. Makes thing a bit faster. Also bigger speedup included is a shoddy cache for the total torrent count (only applied to baked queries currently). Caching the value for a few dozen seconds shaves off some wasted time, as it's mostly just used for pagination.
This commit is contained in:
parent
4fcef92b94
commit
e545f8ae19
|
@ -156,6 +156,13 @@ RESULTS_PER_PAGE = 75
|
||||||
# How many pages we'll return at most
|
# How many pages we'll return at most
|
||||||
MAX_PAGES = 100
|
MAX_PAGES = 100
|
||||||
|
|
||||||
|
# How long and how many entries to cache for count queries
|
||||||
|
COUNT_CACHE_SIZE = 256
|
||||||
|
COUNT_CACHE_DURATION = 30
|
||||||
|
|
||||||
|
# Use baked queries for database search
|
||||||
|
USE_BAKED_SEARCH = False
|
||||||
|
|
||||||
# Use better searching with ElasticSearch
|
# Use better searching with ElasticSearch
|
||||||
# See README.MD on setup!
|
# See README.MD on setup!
|
||||||
USE_ELASTIC_SEARCH = False
|
USE_ELASTIC_SEARCH = False
|
||||||
|
|
307
nyaa/search.py
307
nyaa/search.py
|
@ -1,13 +1,17 @@
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
|
from flask_sqlalchemy import Pagination
|
||||||
|
|
||||||
import sqlalchemy
|
import sqlalchemy
|
||||||
import sqlalchemy_fulltext.modes as FullTextMode
|
import sqlalchemy_fulltext.modes as FullTextMode
|
||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
from elasticsearch_dsl import Q, Search
|
from elasticsearch_dsl import Q, Search
|
||||||
|
from sqlalchemy.ext import baked
|
||||||
from sqlalchemy_fulltext import FullTextSearch
|
from sqlalchemy_fulltext import FullTextSearch
|
||||||
|
|
||||||
from nyaa import models
|
from nyaa import models
|
||||||
|
@ -531,3 +535,306 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0',
|
||||||
max_page=MAX_PAGES)
|
max_page=MAX_PAGES)
|
||||||
|
|
||||||
return query
|
return query
|
||||||
|
|
||||||
|
|
||||||
|
# Baked queries follow
|
||||||
|
|
||||||
|
class BakedPair(object):
|
||||||
|
def __init__(self, *items):
|
||||||
|
self.items = list(items)
|
||||||
|
|
||||||
|
def __iadd__(self, other):
|
||||||
|
for item in self.items:
|
||||||
|
item += other
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
bakery = baked.bakery()
|
||||||
|
|
||||||
|
|
||||||
|
BAKED_SORT_KEYS = {
|
||||||
|
'id': models.Torrent.id,
|
||||||
|
'size': models.Torrent.filesize,
|
||||||
|
'comments': models.Torrent.comment_count,
|
||||||
|
'seeders': models.Statistic.seed_count,
|
||||||
|
'leechers': models.Statistic.leech_count,
|
||||||
|
'downloads': models.Statistic.download_count
|
||||||
|
}
|
||||||
|
|
||||||
|
BAKED_SORT_LAMBDAS = {
|
||||||
|
'id-asc': lambda q: q.order_by(models.Torrent.id.asc()),
|
||||||
|
'id-desc': lambda q: q.order_by(models.Torrent.id.desc()),
|
||||||
|
|
||||||
|
'size-asc': lambda q: q.order_by(models.Torrent.filesize.asc()),
|
||||||
|
'size-desc': lambda q: q.order_by(models.Torrent.filesize.desc()),
|
||||||
|
|
||||||
|
'comments-asc': lambda q: q.order_by(models.Torrent.comment_count.asc()),
|
||||||
|
'comments-desc': lambda q: q.order_by(models.Torrent.comment_count.desc()),
|
||||||
|
|
||||||
|
# This is a bit stupid, but programmatically generating these mixed up the baked keys, so deal.
|
||||||
|
'seeders-asc': lambda q: q.join(models.Statistic).with_hint(
|
||||||
|
models.Statistic, 'USE INDEX (idx_nyaa_statistics_seed_count)'
|
||||||
|
).order_by(models.Statistic.seed_count.asc(), models.Torrent.id.asc()),
|
||||||
|
'seeders-desc': lambda q: q.join(models.Statistic).with_hint(
|
||||||
|
models.Statistic, 'USE INDEX (idx_nyaa_statistics_seed_count)'
|
||||||
|
).order_by(models.Statistic.seed_count.desc(), models.Torrent.id.desc()),
|
||||||
|
|
||||||
|
'leechers-asc': lambda q: q.join(models.Statistic).with_hint(
|
||||||
|
models.Statistic, 'USE INDEX (idx_nyaa_statistics_leech_count)'
|
||||||
|
).order_by(models.Statistic.leech_count.asc(), models.Torrent.id.asc()),
|
||||||
|
'leechers-desc': lambda q: q.join(models.Statistic).with_hint(
|
||||||
|
models.Statistic, 'USE INDEX (idx_nyaa_statistics_leech_count)'
|
||||||
|
).order_by(models.Statistic.leech_count.desc(), models.Torrent.id.desc()),
|
||||||
|
|
||||||
|
'downloads-asc': lambda q: q.join(models.Statistic).with_hint(
|
||||||
|
models.Statistic, 'USE INDEX (idx_nyaa_statistics_download_count)'
|
||||||
|
).order_by(models.Statistic.download_count.asc(), models.Torrent.id.asc()),
|
||||||
|
'downloads-desc': lambda q: q.join(models.Statistic).with_hint(
|
||||||
|
models.Statistic, 'USE INDEX (idx_nyaa_statistics_download_count)'
|
||||||
|
).order_by(models.Statistic.download_count.desc(), models.Torrent.id.desc()),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
BAKED_FILTER_LAMBDAS = {
|
||||||
|
'0': None,
|
||||||
|
'1': lambda q: (
|
||||||
|
q.filter(models.Torrent.flags.op('&')(models.TorrentFlags.REMAKE.value).is_(False))
|
||||||
|
),
|
||||||
|
'2': lambda q: (
|
||||||
|
q.filter(models.Torrent.flags.op('&')(models.TorrentFlags.TRUSTED.value).is_(True))
|
||||||
|
),
|
||||||
|
'3': lambda q: (
|
||||||
|
q.filter(models.Torrent.flags.op('&')(models.TorrentFlags.COMPLETE.value).is_(True))
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def search_db_baked(term='', user=None, sort='id', order='desc', category='0_0',
|
||||||
|
quality_filter='0', page=1, rss=False, admin=False,
|
||||||
|
logged_in_user=None, per_page=75):
|
||||||
|
if page > 4294967295:
|
||||||
|
flask.abort(404)
|
||||||
|
|
||||||
|
MAX_PAGES = app.config.get("MAX_PAGES", 0)
|
||||||
|
|
||||||
|
if MAX_PAGES and page > MAX_PAGES:
|
||||||
|
flask.abort(flask.Response("You've exceeded the maximum number of pages. Please "
|
||||||
|
"make your search query less broad.", 403))
|
||||||
|
|
||||||
|
sort_lambda = BAKED_SORT_LAMBDAS.get('{}-{}'.format(sort, order).lower())
|
||||||
|
if not sort_lambda:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
sentinel = object()
|
||||||
|
filter_lambda = BAKED_FILTER_LAMBDAS.get(quality_filter.lower(), sentinel)
|
||||||
|
if filter_lambda is sentinel:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
if user:
|
||||||
|
user = models.User.by_id(user)
|
||||||
|
if not user:
|
||||||
|
flask.abort(404)
|
||||||
|
user = user.id
|
||||||
|
|
||||||
|
main_cat_id = 0
|
||||||
|
sub_cat_id = 0
|
||||||
|
|
||||||
|
if category:
|
||||||
|
cat_match = re.match(r'^(\d+)_(\d+)$', category)
|
||||||
|
if not cat_match:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
main_cat_id = int(cat_match.group(1))
|
||||||
|
sub_cat_id = int(cat_match.group(2))
|
||||||
|
|
||||||
|
if main_cat_id > 0:
|
||||||
|
if sub_cat_id > 0:
|
||||||
|
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
|
||||||
|
if not sub_category:
|
||||||
|
flask.abort(400)
|
||||||
|
else:
|
||||||
|
main_category = models.MainCategory.by_id(main_cat_id)
|
||||||
|
if not main_category:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
# Force sort by id desc if rss
|
||||||
|
if rss:
|
||||||
|
sort_lambda = BAKED_SORT_LAMBDAS['id-desc']
|
||||||
|
|
||||||
|
same_user = False
|
||||||
|
if logged_in_user:
|
||||||
|
same_user = logged_in_user.id == user
|
||||||
|
|
||||||
|
if term:
|
||||||
|
query = bakery(lambda session: session.query(models.TorrentNameSearch))
|
||||||
|
count_query = bakery(lambda session: session.query(
|
||||||
|
sqlalchemy.func.count(models.TorrentNameSearch.id)))
|
||||||
|
else:
|
||||||
|
query = bakery(lambda session: session.query(models.Torrent))
|
||||||
|
# This is... eh. Optimize the COUNT() query since MySQL is bad at that.
|
||||||
|
# See http://docs.sqlalchemy.org/en/rel_1_1/orm/query.html#sqlalchemy.orm.query.Query.count
|
||||||
|
# Wrap the queries into the helper class to deduplicate code and
|
||||||
|
# apply filters to both in one go
|
||||||
|
count_query = bakery(lambda session: session.query(
|
||||||
|
sqlalchemy.func.count(models.Torrent.id)))
|
||||||
|
|
||||||
|
qpc = BakedPair(query, count_query)
|
||||||
|
bp = sqlalchemy.bindparam
|
||||||
|
|
||||||
|
baked_params = {}
|
||||||
|
|
||||||
|
# User view (/user/username)
|
||||||
|
if user:
|
||||||
|
qpc += lambda q: q.filter(models.Torrent.uploader_id == bp('user'))
|
||||||
|
baked_params['user'] = user
|
||||||
|
|
||||||
|
if not admin:
|
||||||
|
# Hide all DELETED torrents if regular user
|
||||||
|
qpc += lambda q: q.filter(models.Torrent.flags.op('&')
|
||||||
|
(int(models.TorrentFlags.DELETED)).is_(False))
|
||||||
|
# If logged in user is not the same as the user being viewed,
|
||||||
|
# show only torrents that aren't hidden or anonymous
|
||||||
|
#
|
||||||
|
# If logged in user is the same as the user being viewed,
|
||||||
|
# show all torrents including hidden and anonymous ones
|
||||||
|
#
|
||||||
|
# On RSS pages in user view,
|
||||||
|
# show only torrents that aren't hidden or anonymous no matter what
|
||||||
|
if not same_user or rss:
|
||||||
|
qpc += lambda q: (
|
||||||
|
q.filter(
|
||||||
|
models.Torrent.flags.op('&')(
|
||||||
|
int(models.TorrentFlags.HIDDEN | models.TorrentFlags.ANONYMOUS)
|
||||||
|
).is_(False)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# General view (homepage, general search view)
|
||||||
|
else:
|
||||||
|
if not admin:
|
||||||
|
# Hide all DELETED torrents if regular user
|
||||||
|
qpc += lambda q: q.filter(models.Torrent.flags.op('&')
|
||||||
|
(int(models.TorrentFlags.DELETED)).is_(False))
|
||||||
|
# If logged in, show all torrents that aren't hidden unless they belong to you
|
||||||
|
# On RSS pages, show all public torrents and nothing more.
|
||||||
|
if logged_in_user and not rss:
|
||||||
|
qpc += lambda q: q.filter(
|
||||||
|
(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
|
||||||
|
(models.Torrent.uploader_id == bp('logged_in_user'))
|
||||||
|
)
|
||||||
|
baked_params['logged_in_user'] = logged_in_user
|
||||||
|
# Otherwise, show all torrents that aren't hidden
|
||||||
|
else:
|
||||||
|
qpc += lambda q: q.filter(models.Torrent.flags.op('&')
|
||||||
|
(int(models.TorrentFlags.HIDDEN)).is_(False))
|
||||||
|
|
||||||
|
if sub_cat_id:
|
||||||
|
qpc += lambda q: q.filter(
|
||||||
|
(models.Torrent.main_category_id == bp('main_cat_id')),
|
||||||
|
(models.Torrent.sub_category_id == bp('sub_cat_id'))
|
||||||
|
)
|
||||||
|
baked_params['main_cat_id'] = main_cat_id
|
||||||
|
baked_params['sub_cat_id'] = sub_cat_id
|
||||||
|
elif main_cat_id:
|
||||||
|
qpc += lambda q: q.filter(models.Torrent.main_category_id == bp('main_cat_id'))
|
||||||
|
baked_params['main_cat_id'] = main_cat_id
|
||||||
|
|
||||||
|
if filter_lambda:
|
||||||
|
qpc += filter_lambda
|
||||||
|
|
||||||
|
if term:
|
||||||
|
raise Exception('Baked search does not support search terms')
|
||||||
|
|
||||||
|
# Sort and order
|
||||||
|
query += sort_lambda
|
||||||
|
|
||||||
|
if rss:
|
||||||
|
query += lambda q: q.limit(bp('per_page'))
|
||||||
|
baked_params['per_page'] = per_page
|
||||||
|
|
||||||
|
return query(db.session()).params(**baked_params).all()
|
||||||
|
|
||||||
|
return baked_paginate(query, count_query, baked_params,
|
||||||
|
page, per_page=per_page, step=5, max_page=MAX_PAGES)
|
||||||
|
|
||||||
|
|
||||||
|
class ShoddyLRU(object):
|
||||||
|
def __init__(self, max_entries=128, expiry=60):
|
||||||
|
self.max_entries = max_entries
|
||||||
|
self.expiry = expiry
|
||||||
|
|
||||||
|
# Contains [value, last_used, expires_at]
|
||||||
|
self.entries = {}
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
self._sentinel = object()
|
||||||
|
|
||||||
|
def get(self, key, default=None):
|
||||||
|
entry = self.entries.get(key)
|
||||||
|
if entry is None:
|
||||||
|
return default
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
if now > entry[2]:
|
||||||
|
with self._lock:
|
||||||
|
del self.entries[key]
|
||||||
|
return default
|
||||||
|
|
||||||
|
entry[1] = now
|
||||||
|
return entry[0]
|
||||||
|
|
||||||
|
def put(self, key, value, expiry=None):
|
||||||
|
with self._lock:
|
||||||
|
overflow = len(self.entries) - self.max_entries
|
||||||
|
if overflow > 0:
|
||||||
|
# Pick the least recently used keys
|
||||||
|
removed_keys = [key for key, value in sorted(
|
||||||
|
self.entries.items(), key=lambda t:t[1][1])][:overflow]
|
||||||
|
for key in removed_keys:
|
||||||
|
del self.entries[key]
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
self.entries[key] = [value, now, now + (expiry or self.expiry)]
|
||||||
|
|
||||||
|
|
||||||
|
LRU_CACHE = ShoddyLRU(256, 60)
|
||||||
|
|
||||||
|
|
||||||
|
def baked_paginate(query, count_query, params, page=1, per_page=50, max_page=None, step=5):
|
||||||
|
if page < 1:
|
||||||
|
flask.abort(404)
|
||||||
|
|
||||||
|
if max_page and page > max_page:
|
||||||
|
flask.abort(404)
|
||||||
|
bp = sqlalchemy.bindparam
|
||||||
|
|
||||||
|
ses = db.session()
|
||||||
|
|
||||||
|
# Count all items, use cache
|
||||||
|
if app.config['COUNT_CACHE_DURATION']:
|
||||||
|
query_key = (count_query._effective_key(ses), tuple(sorted(params.items())))
|
||||||
|
total_query_count = LRU_CACHE.get(query_key)
|
||||||
|
if total_query_count is None:
|
||||||
|
total_query_count = count_query(ses).params(**params).scalar()
|
||||||
|
LRU_CACHE.put(query_key, total_query_count, expiry=app.config['COUNT_CACHE_DURATION'])
|
||||||
|
else:
|
||||||
|
total_query_count = count_query(ses).params(**params).scalar()
|
||||||
|
|
||||||
|
# Grab items on current page
|
||||||
|
query += lambda q: q.limit(bp('limit')).offset(bp('offset'))
|
||||||
|
params['limit'] = per_page
|
||||||
|
params['offset'] = (page - 1) * per_page
|
||||||
|
|
||||||
|
res = query(ses).params(**params)
|
||||||
|
items = res.all()
|
||||||
|
|
||||||
|
if max_page:
|
||||||
|
total_query_count = min(total_query_count, max_page * per_page)
|
||||||
|
|
||||||
|
# Handle case where we've had no results but then have some while in cache
|
||||||
|
total_query_count = max(total_query_count, len(items))
|
||||||
|
|
||||||
|
if not items and page != 1:
|
||||||
|
flask.abort(404)
|
||||||
|
|
||||||
|
return Pagination(None, page, per_page, total_query_count, items)
|
||||||
|
|
|
@ -10,7 +10,7 @@ from flask_paginate import Pagination
|
||||||
from nyaa import models
|
from nyaa import models
|
||||||
from nyaa.extensions import db
|
from nyaa.extensions import db
|
||||||
from nyaa.search import (DEFAULT_MAX_SEARCH_RESULT, DEFAULT_PER_PAGE, SERACH_PAGINATE_DISPLAY_MSG,
|
from nyaa.search import (DEFAULT_MAX_SEARCH_RESULT, DEFAULT_PER_PAGE, SERACH_PAGINATE_DISPLAY_MSG,
|
||||||
_generate_query_string, search_db, search_elastic)
|
_generate_query_string, search_db, search_db_baked, search_elastic)
|
||||||
from nyaa.utils import chain_get
|
from nyaa.utils import chain_get
|
||||||
from nyaa.views.account import logout
|
from nyaa.views.account import logout
|
||||||
|
|
||||||
|
@ -186,7 +186,11 @@ def home(rss):
|
||||||
else: # Otherwise, use db search for everything
|
else: # Otherwise, use db search for everything
|
||||||
query_args['term'] = search_term or ''
|
query_args['term'] = search_term or ''
|
||||||
|
|
||||||
|
if app.config['USE_BAKED_SEARCH']:
|
||||||
|
query = search_db_baked(**query_args)
|
||||||
|
else:
|
||||||
query = search_db(**query_args)
|
query = search_db(**query_args)
|
||||||
|
|
||||||
if render_as_rss:
|
if render_as_rss:
|
||||||
return render_rss('Home', query, use_elastic=False, magnet_links=use_magnet_links)
|
return render_rss('Home', query, use_elastic=False, magnet_links=use_magnet_links)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -12,7 +12,7 @@ from itsdangerous import BadSignature, URLSafeSerializer
|
||||||
from nyaa import forms, models
|
from nyaa import forms, models
|
||||||
from nyaa.extensions import db
|
from nyaa.extensions import db
|
||||||
from nyaa.search import (DEFAULT_MAX_SEARCH_RESULT, DEFAULT_PER_PAGE, SERACH_PAGINATE_DISPLAY_MSG,
|
from nyaa.search import (DEFAULT_MAX_SEARCH_RESULT, DEFAULT_PER_PAGE, SERACH_PAGINATE_DISPLAY_MSG,
|
||||||
_generate_query_string, search_db, search_elastic)
|
_generate_query_string, search_db, search_db_baked, search_elastic)
|
||||||
from nyaa.utils import admin_only, chain_get, sha1_hash
|
from nyaa.utils import admin_only, chain_get, sha1_hash
|
||||||
|
|
||||||
app = flask.current_app
|
app = flask.current_app
|
||||||
|
@ -185,6 +185,9 @@ def view_user(user_name):
|
||||||
query_args['term'] = ''
|
query_args['term'] = ''
|
||||||
else:
|
else:
|
||||||
query_args['term'] = search_term or ''
|
query_args['term'] = search_term or ''
|
||||||
|
if app.config['USE_BAKED_SEARCH']:
|
||||||
|
query = search_db_baked(**query_args)
|
||||||
|
else:
|
||||||
query = search_db(**query_args)
|
query = search_db(**query_args)
|
||||||
return flask.render_template('user.html',
|
return flask.render_template('user.html',
|
||||||
use_elastic=False,
|
use_elastic=False,
|
||||||
|
|
Loading…
Reference in a new issue