mirror of
https://gitlab.com/SIGBUS/nyaa.git
synced 2024-12-21 17:00:00 +00:00
Introduce baked queries (#592)
SQA's baked queries prepares the queries in advance, caching yada yada. Makes thing a bit faster. Also bigger speedup included is a shoddy cache for the total torrent count (only applied to baked queries currently). Caching the value for a few dozen seconds shaves off some wasted time, as it's mostly just used for pagination.
This commit is contained in:
parent
4fcef92b94
commit
e545f8ae19
|
@ -156,6 +156,13 @@ RESULTS_PER_PAGE = 75
|
|||
# How many pages we'll return at most
|
||||
MAX_PAGES = 100
|
||||
|
||||
# How long and how many entries to cache for count queries
|
||||
COUNT_CACHE_SIZE = 256
|
||||
COUNT_CACHE_DURATION = 30
|
||||
|
||||
# Use baked queries for database search
|
||||
USE_BAKED_SEARCH = False
|
||||
|
||||
# Use better searching with ElasticSearch
|
||||
# See README.MD on setup!
|
||||
USE_ELASTIC_SEARCH = False
|
||||
|
|
307
nyaa/search.py
307
nyaa/search.py
|
@ -1,13 +1,17 @@
|
|||
import math
|
||||
import re
|
||||
import shlex
|
||||
import threading
|
||||
import time
|
||||
|
||||
import flask
|
||||
from flask_sqlalchemy import Pagination
|
||||
|
||||
import sqlalchemy
|
||||
import sqlalchemy_fulltext.modes as FullTextMode
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch_dsl import Q, Search
|
||||
from sqlalchemy.ext import baked
|
||||
from sqlalchemy_fulltext import FullTextSearch
|
||||
|
||||
from nyaa import models
|
||||
|
@ -531,3 +535,306 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0',
|
|||
max_page=MAX_PAGES)
|
||||
|
||||
return query
|
||||
|
||||
|
||||
# Baked queries follow
|
||||
|
||||
class BakedPair(object):
|
||||
def __init__(self, *items):
|
||||
self.items = list(items)
|
||||
|
||||
def __iadd__(self, other):
|
||||
for item in self.items:
|
||||
item += other
|
||||
|
||||
return self
|
||||
|
||||
|
||||
bakery = baked.bakery()
|
||||
|
||||
|
||||
BAKED_SORT_KEYS = {
|
||||
'id': models.Torrent.id,
|
||||
'size': models.Torrent.filesize,
|
||||
'comments': models.Torrent.comment_count,
|
||||
'seeders': models.Statistic.seed_count,
|
||||
'leechers': models.Statistic.leech_count,
|
||||
'downloads': models.Statistic.download_count
|
||||
}
|
||||
|
||||
BAKED_SORT_LAMBDAS = {
|
||||
'id-asc': lambda q: q.order_by(models.Torrent.id.asc()),
|
||||
'id-desc': lambda q: q.order_by(models.Torrent.id.desc()),
|
||||
|
||||
'size-asc': lambda q: q.order_by(models.Torrent.filesize.asc()),
|
||||
'size-desc': lambda q: q.order_by(models.Torrent.filesize.desc()),
|
||||
|
||||
'comments-asc': lambda q: q.order_by(models.Torrent.comment_count.asc()),
|
||||
'comments-desc': lambda q: q.order_by(models.Torrent.comment_count.desc()),
|
||||
|
||||
# This is a bit stupid, but programmatically generating these mixed up the baked keys, so deal.
|
||||
'seeders-asc': lambda q: q.join(models.Statistic).with_hint(
|
||||
models.Statistic, 'USE INDEX (idx_nyaa_statistics_seed_count)'
|
||||
).order_by(models.Statistic.seed_count.asc(), models.Torrent.id.asc()),
|
||||
'seeders-desc': lambda q: q.join(models.Statistic).with_hint(
|
||||
models.Statistic, 'USE INDEX (idx_nyaa_statistics_seed_count)'
|
||||
).order_by(models.Statistic.seed_count.desc(), models.Torrent.id.desc()),
|
||||
|
||||
'leechers-asc': lambda q: q.join(models.Statistic).with_hint(
|
||||
models.Statistic, 'USE INDEX (idx_nyaa_statistics_leech_count)'
|
||||
).order_by(models.Statistic.leech_count.asc(), models.Torrent.id.asc()),
|
||||
'leechers-desc': lambda q: q.join(models.Statistic).with_hint(
|
||||
models.Statistic, 'USE INDEX (idx_nyaa_statistics_leech_count)'
|
||||
).order_by(models.Statistic.leech_count.desc(), models.Torrent.id.desc()),
|
||||
|
||||
'downloads-asc': lambda q: q.join(models.Statistic).with_hint(
|
||||
models.Statistic, 'USE INDEX (idx_nyaa_statistics_download_count)'
|
||||
).order_by(models.Statistic.download_count.asc(), models.Torrent.id.asc()),
|
||||
'downloads-desc': lambda q: q.join(models.Statistic).with_hint(
|
||||
models.Statistic, 'USE INDEX (idx_nyaa_statistics_download_count)'
|
||||
).order_by(models.Statistic.download_count.desc(), models.Torrent.id.desc()),
|
||||
}
|
||||
|
||||
|
||||
BAKED_FILTER_LAMBDAS = {
|
||||
'0': None,
|
||||
'1': lambda q: (
|
||||
q.filter(models.Torrent.flags.op('&')(models.TorrentFlags.REMAKE.value).is_(False))
|
||||
),
|
||||
'2': lambda q: (
|
||||
q.filter(models.Torrent.flags.op('&')(models.TorrentFlags.TRUSTED.value).is_(True))
|
||||
),
|
||||
'3': lambda q: (
|
||||
q.filter(models.Torrent.flags.op('&')(models.TorrentFlags.COMPLETE.value).is_(True))
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def search_db_baked(term='', user=None, sort='id', order='desc', category='0_0',
|
||||
quality_filter='0', page=1, rss=False, admin=False,
|
||||
logged_in_user=None, per_page=75):
|
||||
if page > 4294967295:
|
||||
flask.abort(404)
|
||||
|
||||
MAX_PAGES = app.config.get("MAX_PAGES", 0)
|
||||
|
||||
if MAX_PAGES and page > MAX_PAGES:
|
||||
flask.abort(flask.Response("You've exceeded the maximum number of pages. Please "
|
||||
"make your search query less broad.", 403))
|
||||
|
||||
sort_lambda = BAKED_SORT_LAMBDAS.get('{}-{}'.format(sort, order).lower())
|
||||
if not sort_lambda:
|
||||
flask.abort(400)
|
||||
|
||||
sentinel = object()
|
||||
filter_lambda = BAKED_FILTER_LAMBDAS.get(quality_filter.lower(), sentinel)
|
||||
if filter_lambda is sentinel:
|
||||
flask.abort(400)
|
||||
|
||||
if user:
|
||||
user = models.User.by_id(user)
|
||||
if not user:
|
||||
flask.abort(404)
|
||||
user = user.id
|
||||
|
||||
main_cat_id = 0
|
||||
sub_cat_id = 0
|
||||
|
||||
if category:
|
||||
cat_match = re.match(r'^(\d+)_(\d+)$', category)
|
||||
if not cat_match:
|
||||
flask.abort(400)
|
||||
|
||||
main_cat_id = int(cat_match.group(1))
|
||||
sub_cat_id = int(cat_match.group(2))
|
||||
|
||||
if main_cat_id > 0:
|
||||
if sub_cat_id > 0:
|
||||
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
|
||||
if not sub_category:
|
||||
flask.abort(400)
|
||||
else:
|
||||
main_category = models.MainCategory.by_id(main_cat_id)
|
||||
if not main_category:
|
||||
flask.abort(400)
|
||||
|
||||
# Force sort by id desc if rss
|
||||
if rss:
|
||||
sort_lambda = BAKED_SORT_LAMBDAS['id-desc']
|
||||
|
||||
same_user = False
|
||||
if logged_in_user:
|
||||
same_user = logged_in_user.id == user
|
||||
|
||||
if term:
|
||||
query = bakery(lambda session: session.query(models.TorrentNameSearch))
|
||||
count_query = bakery(lambda session: session.query(
|
||||
sqlalchemy.func.count(models.TorrentNameSearch.id)))
|
||||
else:
|
||||
query = bakery(lambda session: session.query(models.Torrent))
|
||||
# This is... eh. Optimize the COUNT() query since MySQL is bad at that.
|
||||
# See http://docs.sqlalchemy.org/en/rel_1_1/orm/query.html#sqlalchemy.orm.query.Query.count
|
||||
# Wrap the queries into the helper class to deduplicate code and
|
||||
# apply filters to both in one go
|
||||
count_query = bakery(lambda session: session.query(
|
||||
sqlalchemy.func.count(models.Torrent.id)))
|
||||
|
||||
qpc = BakedPair(query, count_query)
|
||||
bp = sqlalchemy.bindparam
|
||||
|
||||
baked_params = {}
|
||||
|
||||
# User view (/user/username)
|
||||
if user:
|
||||
qpc += lambda q: q.filter(models.Torrent.uploader_id == bp('user'))
|
||||
baked_params['user'] = user
|
||||
|
||||
if not admin:
|
||||
# Hide all DELETED torrents if regular user
|
||||
qpc += lambda q: q.filter(models.Torrent.flags.op('&')
|
||||
(int(models.TorrentFlags.DELETED)).is_(False))
|
||||
# If logged in user is not the same as the user being viewed,
|
||||
# show only torrents that aren't hidden or anonymous
|
||||
#
|
||||
# If logged in user is the same as the user being viewed,
|
||||
# show all torrents including hidden and anonymous ones
|
||||
#
|
||||
# On RSS pages in user view,
|
||||
# show only torrents that aren't hidden or anonymous no matter what
|
||||
if not same_user or rss:
|
||||
qpc += lambda q: (
|
||||
q.filter(
|
||||
models.Torrent.flags.op('&')(
|
||||
int(models.TorrentFlags.HIDDEN | models.TorrentFlags.ANONYMOUS)
|
||||
).is_(False)
|
||||
)
|
||||
)
|
||||
# General view (homepage, general search view)
|
||||
else:
|
||||
if not admin:
|
||||
# Hide all DELETED torrents if regular user
|
||||
qpc += lambda q: q.filter(models.Torrent.flags.op('&')
|
||||
(int(models.TorrentFlags.DELETED)).is_(False))
|
||||
# If logged in, show all torrents that aren't hidden unless they belong to you
|
||||
# On RSS pages, show all public torrents and nothing more.
|
||||
if logged_in_user and not rss:
|
||||
qpc += lambda q: q.filter(
|
||||
(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
|
||||
(models.Torrent.uploader_id == bp('logged_in_user'))
|
||||
)
|
||||
baked_params['logged_in_user'] = logged_in_user
|
||||
# Otherwise, show all torrents that aren't hidden
|
||||
else:
|
||||
qpc += lambda q: q.filter(models.Torrent.flags.op('&')
|
||||
(int(models.TorrentFlags.HIDDEN)).is_(False))
|
||||
|
||||
if sub_cat_id:
|
||||
qpc += lambda q: q.filter(
|
||||
(models.Torrent.main_category_id == bp('main_cat_id')),
|
||||
(models.Torrent.sub_category_id == bp('sub_cat_id'))
|
||||
)
|
||||
baked_params['main_cat_id'] = main_cat_id
|
||||
baked_params['sub_cat_id'] = sub_cat_id
|
||||
elif main_cat_id:
|
||||
qpc += lambda q: q.filter(models.Torrent.main_category_id == bp('main_cat_id'))
|
||||
baked_params['main_cat_id'] = main_cat_id
|
||||
|
||||
if filter_lambda:
|
||||
qpc += filter_lambda
|
||||
|
||||
if term:
|
||||
raise Exception('Baked search does not support search terms')
|
||||
|
||||
# Sort and order
|
||||
query += sort_lambda
|
||||
|
||||
if rss:
|
||||
query += lambda q: q.limit(bp('per_page'))
|
||||
baked_params['per_page'] = per_page
|
||||
|
||||
return query(db.session()).params(**baked_params).all()
|
||||
|
||||
return baked_paginate(query, count_query, baked_params,
|
||||
page, per_page=per_page, step=5, max_page=MAX_PAGES)
|
||||
|
||||
|
||||
class ShoddyLRU(object):
|
||||
def __init__(self, max_entries=128, expiry=60):
|
||||
self.max_entries = max_entries
|
||||
self.expiry = expiry
|
||||
|
||||
# Contains [value, last_used, expires_at]
|
||||
self.entries = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
self._sentinel = object()
|
||||
|
||||
def get(self, key, default=None):
|
||||
entry = self.entries.get(key)
|
||||
if entry is None:
|
||||
return default
|
||||
|
||||
now = time.time()
|
||||
if now > entry[2]:
|
||||
with self._lock:
|
||||
del self.entries[key]
|
||||
return default
|
||||
|
||||
entry[1] = now
|
||||
return entry[0]
|
||||
|
||||
def put(self, key, value, expiry=None):
|
||||
with self._lock:
|
||||
overflow = len(self.entries) - self.max_entries
|
||||
if overflow > 0:
|
||||
# Pick the least recently used keys
|
||||
removed_keys = [key for key, value in sorted(
|
||||
self.entries.items(), key=lambda t:t[1][1])][:overflow]
|
||||
for key in removed_keys:
|
||||
del self.entries[key]
|
||||
|
||||
now = time.time()
|
||||
self.entries[key] = [value, now, now + (expiry or self.expiry)]
|
||||
|
||||
|
||||
LRU_CACHE = ShoddyLRU(256, 60)
|
||||
|
||||
|
||||
def baked_paginate(query, count_query, params, page=1, per_page=50, max_page=None, step=5):
|
||||
if page < 1:
|
||||
flask.abort(404)
|
||||
|
||||
if max_page and page > max_page:
|
||||
flask.abort(404)
|
||||
bp = sqlalchemy.bindparam
|
||||
|
||||
ses = db.session()
|
||||
|
||||
# Count all items, use cache
|
||||
if app.config['COUNT_CACHE_DURATION']:
|
||||
query_key = (count_query._effective_key(ses), tuple(sorted(params.items())))
|
||||
total_query_count = LRU_CACHE.get(query_key)
|
||||
if total_query_count is None:
|
||||
total_query_count = count_query(ses).params(**params).scalar()
|
||||
LRU_CACHE.put(query_key, total_query_count, expiry=app.config['COUNT_CACHE_DURATION'])
|
||||
else:
|
||||
total_query_count = count_query(ses).params(**params).scalar()
|
||||
|
||||
# Grab items on current page
|
||||
query += lambda q: q.limit(bp('limit')).offset(bp('offset'))
|
||||
params['limit'] = per_page
|
||||
params['offset'] = (page - 1) * per_page
|
||||
|
||||
res = query(ses).params(**params)
|
||||
items = res.all()
|
||||
|
||||
if max_page:
|
||||
total_query_count = min(total_query_count, max_page * per_page)
|
||||
|
||||
# Handle case where we've had no results but then have some while in cache
|
||||
total_query_count = max(total_query_count, len(items))
|
||||
|
||||
if not items and page != 1:
|
||||
flask.abort(404)
|
||||
|
||||
return Pagination(None, page, per_page, total_query_count, items)
|
||||
|
|
|
@ -10,7 +10,7 @@ from flask_paginate import Pagination
|
|||
from nyaa import models
|
||||
from nyaa.extensions import db
|
||||
from nyaa.search import (DEFAULT_MAX_SEARCH_RESULT, DEFAULT_PER_PAGE, SERACH_PAGINATE_DISPLAY_MSG,
|
||||
_generate_query_string, search_db, search_elastic)
|
||||
_generate_query_string, search_db, search_db_baked, search_elastic)
|
||||
from nyaa.utils import chain_get
|
||||
from nyaa.views.account import logout
|
||||
|
||||
|
@ -186,7 +186,11 @@ def home(rss):
|
|||
else: # Otherwise, use db search for everything
|
||||
query_args['term'] = search_term or ''
|
||||
|
||||
query = search_db(**query_args)
|
||||
if app.config['USE_BAKED_SEARCH']:
|
||||
query = search_db_baked(**query_args)
|
||||
else:
|
||||
query = search_db(**query_args)
|
||||
|
||||
if render_as_rss:
|
||||
return render_rss('Home', query, use_elastic=False, magnet_links=use_magnet_links)
|
||||
else:
|
||||
|
|
|
@ -12,7 +12,7 @@ from itsdangerous import BadSignature, URLSafeSerializer
|
|||
from nyaa import forms, models
|
||||
from nyaa.extensions import db
|
||||
from nyaa.search import (DEFAULT_MAX_SEARCH_RESULT, DEFAULT_PER_PAGE, SERACH_PAGINATE_DISPLAY_MSG,
|
||||
_generate_query_string, search_db, search_elastic)
|
||||
_generate_query_string, search_db, search_db_baked, search_elastic)
|
||||
from nyaa.utils import admin_only, chain_get, sha1_hash
|
||||
|
||||
app = flask.current_app
|
||||
|
@ -185,7 +185,10 @@ def view_user(user_name):
|
|||
query_args['term'] = ''
|
||||
else:
|
||||
query_args['term'] = search_term or ''
|
||||
query = search_db(**query_args)
|
||||
if app.config['USE_BAKED_SEARCH']:
|
||||
query = search_db_baked(**query_args)
|
||||
else:
|
||||
query = search_db(**query_args)
|
||||
return flask.render_template('user.html',
|
||||
use_elastic=False,
|
||||
torrent_query=query,
|
||||
|
|
Loading…
Reference in a new issue