[Schema change] Torrents flags bitflag column to indexed columns (#471)

* convert torrent table flags column from bitflag to independent indexed columns

* elasticsearch integration (untested)

* improve performance
This commit is contained in:
A nyaa developer 2018-04-08 07:44:53 +02:00 committed by Arylide
parent c786bd20f8
commit 41a2a32f66
4 changed files with 134 additions and 75 deletions

View File

@ -0,0 +1,98 @@
"""Convert bitflags to seperate indexed columns
Revision ID: ecb0b3b88142
Revises: 6cc823948c5a
Create Date: 2018-04-08 02:52:44.178958
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
import sys
# revision identifiers, used by Alembic.
revision = 'ecb0b3b88142'
down_revision = '6cc823948c5a'
branch_labels = None
depends_on = None
def upgrade():
op.execute(
"ALTER TABLE nyaa_torrents "
"ADD COLUMN anonymous BOOL NOT NULL, "
"ADD COLUMN banned BOOL NOT NULL, "
"ADD COLUMN comment_locked BOOL NOT NULL, "
"ADD COLUMN complete BOOL NOT NULL, "
"ADD COLUMN deleted BOOL NOT NULL, "
"ADD COLUMN hidden BOOL NOT NULL, "
"ADD COLUMN remake BOOL NOT NULL, "
"ADD COLUMN trusted BOOL NOT NULL;"
)
op.create_index(op.f('ix_nyaa_torrents_anonymous'), 'nyaa_torrents', ['anonymous'], unique=False)
op.create_index(op.f('ix_nyaa_torrents_banned'), 'nyaa_torrents', ['banned'], unique=False)
op.create_index(op.f('ix_nyaa_torrents_comment_locked'), 'nyaa_torrents', ['comment_locked'], unique=False)
op.create_index(op.f('ix_nyaa_torrents_complete'), 'nyaa_torrents', ['complete'], unique=False)
op.create_index(op.f('ix_nyaa_torrents_deleted'), 'nyaa_torrents', ['deleted'], unique=False)
op.create_index(op.f('ix_nyaa_torrents_hidden'), 'nyaa_torrents', ['hidden'], unique=False)
op.create_index(op.f('ix_nyaa_torrents_remake'), 'nyaa_torrents', ['remake'], unique=False)
op.create_index(op.f('ix_nyaa_torrents_trusted'), 'nyaa_torrents', ['trusted'], unique=False)
op.drop_index('ix_nyaa_torrents_flags', table_name='nyaa_torrents')
op.create_index(op.f('ix_nyaa_torrents_uploader_id'), 'nyaa_torrents', ['uploader_id'], unique=False)
op.drop_index('uploader_flag_idx', table_name='nyaa_torrents')
op.create_index('ix_nyaa_super', 'nyaa_torrents', ['id', 'uploader_id', 'main_category_id', 'sub_category_id', 'anonymous', 'hidden', 'deleted', 'banned', 'trusted', 'remake', 'complete'], unique=False)
op.execute('UPDATE nyaa_torrents SET anonymous = TRUE WHERE flags & 1 IS TRUE;')
op.execute('UPDATE nyaa_torrents SET hidden = TRUE WHERE flags & 2 IS TRUE;')
op.execute('UPDATE nyaa_torrents SET trusted = TRUE WHERE flags & 4 IS TRUE;')
op.execute('UPDATE nyaa_torrents SET remake = TRUE WHERE flags & 8 IS TRUE;')
op.execute('UPDATE nyaa_torrents SET complete = TRUE WHERE flags & 16 IS TRUE;')
op.execute('UPDATE nyaa_torrents SET deleted = TRUE WHERE flags & 32 IS TRUE;')
op.execute('UPDATE nyaa_torrents SET banned = TRUE WHERE flags & 64 IS TRUE;')
op.execute('UPDATE nyaa_torrents SET comment_locked = TRUE WHERE flags & 128 IS TRUE;')
#op.drop_column('nyaa_torrents', 'flags')
op.execute(
"ALTER TABLE sukebei_torrents "
"ADD COLUMN anonymous BOOL NOT NULL, "
"ADD COLUMN banned BOOL NOT NULL, "
"ADD COLUMN comment_locked BOOL NOT NULL, "
"ADD COLUMN complete BOOL NOT NULL, "
"ADD COLUMN deleted BOOL NOT NULL, "
"ADD COLUMN hidden BOOL NOT NULL, "
"ADD COLUMN remake BOOL NOT NULL, "
"ADD COLUMN trusted BOOL NOT NULL;"
)
op.create_index(op.f('ix_sukebei_torrents_anonymous'), 'sukebei_torrents', ['anonymous'], unique=False)
op.create_index(op.f('ix_sukebei_torrents_banned'), 'sukebei_torrents', ['banned'], unique=False)
op.create_index(op.f('ix_sukebei_torrents_comment_locked'), 'sukebei_torrents', ['comment_locked'], unique=False)
op.create_index(op.f('ix_sukebei_torrents_complete'), 'sukebei_torrents', ['complete'], unique=False)
op.create_index(op.f('ix_sukebei_torrents_deleted'), 'sukebei_torrents', ['deleted'], unique=False)
op.create_index(op.f('ix_sukebei_torrents_hidden'), 'sukebei_torrents', ['hidden'], unique=False)
op.create_index(op.f('ix_sukebei_torrents_remake'), 'sukebei_torrents', ['remake'], unique=False)
op.create_index(op.f('ix_sukebei_torrents_trusted'), 'sukebei_torrents', ['trusted'], unique=False)
op.drop_index('ix_sukebei_torrents_flags', table_name='sukebei_torrents')
op.create_index(op.f('ix_sukebei_torrents_uploader_id'), 'sukebei_torrents', ['uploader_id'], unique=False)
op.drop_index('uploader_flag_idx', table_name='sukebei_torrents')
op.create_index('ix_sukebei_super', 'sukebei_torrents', ['id', 'uploader_id', 'main_category_id', 'sub_category_id', 'anonymous', 'hidden', 'deleted', 'banned', 'trusted', 'remake', 'complete'], unique=False)
op.execute('UPDATE sukebei_torrents SET anonymous = TRUE WHERE flags & 1 IS TRUE;')
op.execute('UPDATE sukebei_torrents SET hidden = TRUE WHERE flags & 2 IS TRUE;')
op.execute('UPDATE sukebei_torrents SET trusted = TRUE WHERE flags & 4 IS TRUE;')
op.execute('UPDATE sukebei_torrents SET remake = TRUE WHERE flags & 8 IS TRUE;')
op.execute('UPDATE sukebei_torrents SET complete = TRUE WHERE flags & 16 IS TRUE;')
op.execute('UPDATE sukebei_torrents SET deleted = TRUE WHERE flags & 32 IS TRUE;')
op.execute('UPDATE sukebei_torrents SET banned = TRUE WHERE flags & 64 IS TRUE;')
op.execute('UPDATE sukebei_torrents SET comment_locked = TRUE WHERE flags & 128 IS TRUE;')
#op.drop_column('sukebei_torrents', 'flags')
def downgrade():
print("downgrade not supported")
sys.exit(1)

View File

@ -67,42 +67,6 @@ class DeclarativeHelperBase(object):
return cls._table_prefix(cls.__tablename_base__)
class FlagProperty(object):
''' This class will act as a wrapper between the given flag and the class's
flag collection. '''
def __init__(self, flag, flags_attr='flags'):
self._flag = flag
self._flags_attr_name = flags_attr
def _get_flags(self, instance):
return getattr(instance, self._flags_attr_name)
def _set_flags(self, instance, value):
return setattr(instance, self._flags_attr_name, value)
def __get__(self, instance, owner_class):
if instance is None:
raise AttributeError()
return bool(self._get_flags(instance) & self._flag)
def __set__(self, instance, value):
new_flags = (self._get_flags(instance) & ~self._flag) | (bool(value) and self._flag)
self._set_flags(instance, new_flags)
class TorrentFlags(IntEnum):
NONE = 0
ANONYMOUS = 1
HIDDEN = 2
TRUSTED = 4
REMAKE = 8
COMPLETE = 16
DELETED = 32
BANNED = 64
COMMENT_LOCKED = 128
class TorrentBase(DeclarativeHelperBase):
__tablename_base__ = 'torrents'
@ -116,12 +80,11 @@ class TorrentBase(DeclarativeHelperBase):
filesize = db.Column(db.BIGINT, default=0, nullable=False, index=True)
encoding = db.Column(db.String(length=32), nullable=False)
flags = db.Column(db.Integer, default=0, nullable=False, index=True)
@declarative.declared_attr
def uploader_id(cls):
# Even though this is same for both tables, declarative requires this
return db.Column(db.Integer, db.ForeignKey('users.id'), nullable=True)
return db.Column(db.Integer, db.ForeignKey('users.id'), nullable=True, index=True)
uploader_ip = db.Column(db.Binary(length=16), default=None, nullable=True)
has_torrent = db.Column(db.Boolean, nullable=False, default=False)
@ -132,6 +95,15 @@ class TorrentBase(DeclarativeHelperBase):
updated_time = db.Column(db.DateTime(timezone=False), default=datetime.utcnow,
onupdate=datetime.utcnow, nullable=False)
anonymous = db.Column(db.Boolean, nullable=False, default=False, index=True)
hidden = db.Column(db.Boolean, nullable=False, default=False, index=True)
deleted = db.Column(db.Boolean, nullable=False, default=False, index=True)
banned = db.Column(db.Boolean, nullable=False, default=False, index=True)
trusted = db.Column(db.Boolean, nullable=False, default=False, index=True)
remake = db.Column(db.Boolean, nullable=False, default=False, index=True)
complete = db.Column(db.Boolean, nullable=False, default=False, index=True)
comment_locked = db.Column(db.Boolean, nullable=False, default=False, index=True)
@declarative.declared_attr
def main_category_id(cls):
fk = db.ForeignKey(cls._table_prefix('main_categories.id'))
@ -147,7 +119,9 @@ class TorrentBase(DeclarativeHelperBase):
@declarative.declared_attr
def __table_args__(cls):
return (
Index(cls._table_prefix('uploader_flag_idx'), 'uploader_id', 'flags'),
Index('ix_' + cls._table_prefix('super'), 'id', 'uploader_id',
'main_category_id', 'sub_category_id',
'anonymous', 'hidden', 'deleted', 'banned', 'trusted', 'remake', 'complete'),
ForeignKeyConstraint(
['main_category_id', 'sub_category_id'],
[cls._table_prefix('sub_categories.main_category_id'),
@ -251,17 +225,6 @@ class TorrentBase(DeclarativeHelperBase):
if self.uploader_ip:
return str(ip_address(self.uploader_ip))
# Flag properties below
anonymous = FlagProperty(TorrentFlags.ANONYMOUS)
hidden = FlagProperty(TorrentFlags.HIDDEN)
deleted = FlagProperty(TorrentFlags.DELETED)
banned = FlagProperty(TorrentFlags.BANNED)
trusted = FlagProperty(TorrentFlags.TRUSTED)
remake = FlagProperty(TorrentFlags.REMAKE)
complete = FlagProperty(TorrentFlags.COMPLETE)
comment_locked = FlagProperty(TorrentFlags.COMMENT_LOCKED)
# Class methods
@classmethod

View File

@ -8,6 +8,7 @@ import sqlalchemy
import sqlalchemy_fulltext.modes as FullTextMode
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Q, Search
from sqlalchemy.sql.expression import false
from sqlalchemy_fulltext import FullTextSearch
from nyaa import models
@ -294,9 +295,9 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0',
filter_keys = {
'0': None,
'1': (models.TorrentFlags.REMAKE, False),
'2': (models.TorrentFlags.TRUSTED, True),
'3': (models.TorrentFlags.COMPLETE, True)
'1': (models.Torrent.remake, False),
'2': (models.Torrent.trusted, True),
'3': (models.Torrent.complete, True)
}
sentinel = object()
@ -356,8 +357,7 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0',
if not admin:
# Hide all DELETED torrents if regular user
qpc.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.DELETED)).is_(False))
qpc.filter(models.Torrent.deleted == false())
# If logged in user is not the same as the user being viewed,
# show only torrents that aren't hidden or anonymous
#
@ -367,24 +367,21 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0',
# On RSS pages in user view,
# show only torrents that aren't hidden or anonymous no matter what
if not same_user or rss:
qpc.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.HIDDEN | models.TorrentFlags.ANONYMOUS)).is_(False))
qpc.filter((models.Torrent.hidden == false()) &
(models.Torrent.anonymous == false()))
# General view (homepage, general search view)
else:
if not admin:
# Hide all DELETED torrents if regular user
qpc.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.DELETED)).is_(False))
qpc.filter(models.Torrent.deleted == false())
# If logged in, show all torrents that aren't hidden unless they belong to you
# On RSS pages, show all public torrents and nothing more.
if logged_in_user and not rss:
qpc.filter(
(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
(models.Torrent.uploader_id == logged_in_user.id))
qpc.filter((models.Torrent.hidden == false()) |
(models.Torrent.uploader_id == logged_in_user.id))
# Otherwise, show all torrents that aren't hidden
else:
qpc.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.HIDDEN)).is_(False))
qpc.filter(models.Torrent.hidden == false())
if main_category:
qpc.filter(models.Torrent.main_category_id == main_cat_id)
@ -393,8 +390,7 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0',
(models.Torrent.sub_category_id == sub_cat_id))
if filter_tuple:
qpc.filter(models.Torrent.flags.op('&')(
int(filter_tuple[0])).is_(filter_tuple[1]))
qpc.filter(filter_tuple[0] == filter_tuple[1])
if term:
for item in shlex.split(term, posix=False):
@ -403,11 +399,16 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0',
item, models.TorrentNameSearch, FullTextMode.NATURAL))
query, count_query = qpc.items
super_index = 'ix_' + models.Torrent._table_prefix('super')
# Sort and order
if sort_column.class_ != models.Torrent:
index_name = _get_index_name(sort_column)
query = query.join(sort_column.class_)
query = query.with_hint(sort_column.class_, 'USE INDEX ({0})'.format(index_name))
else:
query = query.with_hint(models.Torrent, 'USE INDEX ({0})'.format(super_index))
count_query = count_query.with_hint(models.Torrent, 'USE INDEX ({0})'.format(super_index))
query = query.order_by(getattr(sort_column, order)())

View File

@ -34,7 +34,6 @@ from elasticsearch.helpers import bulk, BulkIndexError
from pymysqlreplication import BinLogStreamReader
from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent
from datetime import datetime
from nyaa.models import TorrentFlags
import sys
import json
import time
@ -79,7 +78,6 @@ def pad_bytes(in_bytes, size):
def reindex_torrent(t, index_name):
# XXX annoyingly different from import_to_es, and
# you need to keep them in sync manually.
f = t['flags']
doc = {
"id": t['id'],
"display_name": t['display_name'],
@ -94,16 +92,15 @@ def reindex_torrent(t, index_name):
"main_category_id": t['main_category_id'],
"sub_category_id": t['sub_category_id'],
"comment_count": t['comment_count'],
# XXX all the bitflags are numbers
"anonymous": bool(f & TorrentFlags.ANONYMOUS),
"trusted": bool(f & TorrentFlags.TRUSTED),
"remake": bool(f & TorrentFlags.REMAKE),
"complete": bool(f & TorrentFlags.COMPLETE),
"anonymous": bool(t['anonymous']),
"trusted": bool(t['trusted']),
"remake": bool(t['remake']),
"complete": bool(t['complete']),
# TODO instead of indexing and filtering later
# could delete from es entirely. Probably won't matter
# for at least a few months.
"hidden": bool(f & TorrentFlags.HIDDEN),
"deleted": bool(f & TorrentFlags.DELETED),
"hidden": bool(t['hidden']),
"deleted": bool(t['deleted']),
"has_torrent": bool(t['has_torrent']),
}
# update, so we don't delete the stats if present