1
0
Fork 0
mirror of https://gitlab.com/SIGBUS/nyaa.git synced 2025-01-11 16:57:03 +00:00

Merge branch 'master' into reports

This commit is contained in:
nyaazi 2017-05-21 18:48:45 +03:00 committed by GitHub
commit 92a6074fa2
15 changed files with 528 additions and 332 deletions

11
config_es_sync.json Normal file
View file

@ -0,0 +1,11 @@
{
"save_loc": "/tmp/pos.json",
"mysql_host": "127.0.0.1",
"mysql_port": 13306,
"mysql_user": "root",
"mysql_password": "dunnolol",
"database": "nyaav2",
"internal_queue_depth": 10000,
"es_chunk_size": 10000,
"flush_interval": 5
}

View file

@ -130,7 +130,8 @@ UPLOAD_API_FORM_KEYMAP = {
'is_anonymous': 'anonymous', 'is_anonymous': 'anonymous',
'is_hidden': 'hidden', 'is_hidden': 'hidden',
'is_complete': 'complete', 'is_complete': 'complete',
'is_remake': 'remake' 'is_remake': 'remake',
'is_trusted': 'trusted'
} }
UPLOAD_API_FORM_KEYMAP_REVERSE = {v: k for k, v in UPLOAD_API_FORM_KEYMAP.items()} UPLOAD_API_FORM_KEYMAP_REVERSE = {v: k for k, v in UPLOAD_API_FORM_KEYMAP.items()}
UPLOAD_API_KEYS = [ UPLOAD_API_KEYS = [
@ -140,6 +141,7 @@ UPLOAD_API_KEYS = [
'hidden', 'hidden',
'complete', 'complete',
'remake', 'remake',
'trusted',
'information', 'information',
'description' 'description'
] ]
@ -161,7 +163,7 @@ def v2_api_upload():
# Map api keys to upload form fields # Map api keys to upload form fields
for key in UPLOAD_API_KEYS: for key in UPLOAD_API_KEYS:
mapped_key = UPLOAD_API_FORM_KEYMAP_REVERSE.get(key, key) mapped_key = UPLOAD_API_FORM_KEYMAP_REVERSE.get(key, key)
mapped_dict[mapped_key] = request_data.get(key) mapped_dict[mapped_key] = request_data.get(key) or ''
# Flask-WTF (very helpfully!!) automatically grabs the request form, so force a None formdata # Flask-WTF (very helpfully!!) automatically grabs the request form, so force a None formdata
upload_form = forms.UploadForm(None, data=mapped_dict) upload_form = forms.UploadForm(None, data=mapped_dict)

View file

@ -68,8 +68,8 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False):
torrent.remake = upload_form.is_remake.data torrent.remake = upload_form.is_remake.data
torrent.complete = upload_form.is_complete.data torrent.complete = upload_form.is_complete.data
# Copy trusted status from user if possible # Copy trusted status from user if possible
torrent.trusted = (uploading_user.level >= can_mark_trusted = uploading_user and uploading_user.is_trusted
models.UserLevelType.TRUSTED) if uploading_user else False torrent.trusted = upload_form.is_trusted.data if can_mark_trusted else False
# Set category ids # Set category ids
torrent.main_category_id, torrent.sub_category_id = \ torrent.main_category_id, torrent.sub_category_id = \
upload_form.category.parsed_data.get_category_ids() upload_form.category.parsed_data.get_category_ids()
@ -100,7 +100,9 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False):
for directory in path_parts: for directory in path_parts:
current_directory = current_directory.setdefault(directory, {}) current_directory = current_directory.setdefault(directory, {})
current_directory[filename] = file_dict['length'] # Don't add empty filenames (BitComet directory)
if filename:
current_directory[filename] = file_dict['length']
parsed_file_tree = utils.sorted_pathdict(parsed_file_tree) parsed_file_tree = utils.sorted_pathdict(parsed_file_tree)

View file

@ -153,6 +153,7 @@ class EditForm(FlaskForm):
is_remake = BooleanField('Remake') is_remake = BooleanField('Remake')
is_anonymous = BooleanField('Anonymous') is_anonymous = BooleanField('Anonymous')
is_complete = BooleanField('Complete') is_complete = BooleanField('Complete')
is_trusted = BooleanField('Trusted')
information = StringField('Information', [ information = StringField('Information', [
Length(max=255, message='Information must be at most %(max)d characters long.') Length(max=255, message='Information must be at most %(max)d characters long.')
@ -200,6 +201,7 @@ class UploadForm(FlaskForm):
is_remake = BooleanField('Remake') is_remake = BooleanField('Remake')
is_anonymous = BooleanField('Anonymous') is_anonymous = BooleanField('Anonymous')
is_complete = BooleanField('Complete') is_complete = BooleanField('Complete')
is_trusted = BooleanField('Trusted')
information = StringField('Information', [ information = StringField('Information', [
Length(max=255, message='Information must be at most %(max)d characters long.') Length(max=255, message='Information must be at most %(max)d characters long.')
@ -295,7 +297,7 @@ class ReportActionForm(FlaskForm):
def _validate_trackers(torrent_dict, tracker_to_check_for=None): def _validate_trackers(torrent_dict, tracker_to_check_for=None):
announce = torrent_dict.get('announce') announce = torrent_dict.get('announce')
announce_string = _validate_bytes(announce, 'announce', 'utf-8') announce_string = _validate_bytes(announce, 'announce', test_decode='utf-8')
tracker_found = tracker_to_check_for and ( tracker_found = tracker_to_check_for and (
announce_string.lower() == tracker_to_check_for.lower()) or False announce_string.lower() == tracker_to_check_for.lower()) or False
@ -307,7 +309,7 @@ def _validate_trackers(torrent_dict, tracker_to_check_for=None):
for announce in announce_list: for announce in announce_list:
_validate_list(announce, 'announce-list item') _validate_list(announce, 'announce-list item')
announce_string = _validate_bytes(announce[0], 'announce-list item url', 'utf-8') announce_string = _validate_bytes(announce[0], 'announce-list item url', test_decode='utf-8')
if tracker_to_check_for and announce_string.lower() == tracker_to_check_for.lower(): if tracker_to_check_for and announce_string.lower() == tracker_to_check_for.lower():
tracker_found = True tracker_found = True
@ -323,7 +325,7 @@ def _validate_torrent_metadata(torrent_dict):
assert isinstance(info_dict, dict), 'info is not a dict' assert isinstance(info_dict, dict), 'info is not a dict'
encoding_bytes = torrent_dict.get('encoding', b'utf-8') encoding_bytes = torrent_dict.get('encoding', b'utf-8')
encoding = _validate_bytes(encoding_bytes, 'encoding', 'utf-8').lower() encoding = _validate_bytes(encoding_bytes, 'encoding', test_decode='utf-8').lower()
name = info_dict.get('name') name = info_dict.get('name')
_validate_bytes(name, 'name', test_decode=encoding) _validate_bytes(name, 'name', test_decode=encoding)
@ -345,17 +347,21 @@ def _validate_torrent_metadata(torrent_dict):
path_list = file_dict.get('path') path_list = file_dict.get('path')
_validate_list(path_list, 'path') _validate_list(path_list, 'path')
for path_part in path_list: # Validate possible directory names
for path_part in path_list[:-1]:
_validate_bytes(path_part, 'path part', test_decode=encoding) _validate_bytes(path_part, 'path part', test_decode=encoding)
# Validate actual filename, allow b'' to specify an empty directory
_validate_bytes(path_list[-1], 'filename', check_empty=False, test_decode=encoding)
else: else:
length = info_dict.get('length') length = info_dict.get('length')
_validate_number(length, 'length', check_positive=True) _validate_number(length, 'length', check_positive=True)
def _validate_bytes(value, name='value', test_decode=None): def _validate_bytes(value, name='value', check_empty=True, test_decode=None):
assert isinstance(value, bytes), name + ' is not bytes' assert isinstance(value, bytes), name + ' is not bytes'
assert len(value) > 0, name + ' is empty' if check_empty:
assert len(value) > 0, name + ' is empty'
if test_decode: if test_decode:
try: try:
return value.decode(test_decode) return value.decode(test_decode)

View file

@ -8,6 +8,7 @@ from werkzeug.security import generate_password_hash, check_password_hash
from sqlalchemy_fulltext import FullText from sqlalchemy_fulltext import FullText
import re import re
import base64
from markupsafe import escape as escape_markup from markupsafe import escape as escape_markup
from urllib.parse import unquote as unquote_url from urllib.parse import unquote as unquote_url
@ -88,10 +89,14 @@ class Torrent(db.Model):
primaryjoin=( primaryjoin=(
"and_(SubCategory.id == foreign(Torrent.sub_category_id), " "and_(SubCategory.id == foreign(Torrent.sub_category_id), "
"SubCategory.main_category_id == Torrent.main_category_id)")) "SubCategory.main_category_id == Torrent.main_category_id)"))
info = db.relationship('TorrentInfo', uselist=False, back_populates='torrent') info = db.relationship('TorrentInfo', uselist=False,
filelist = db.relationship('TorrentFilelist', uselist=False, back_populates='torrent') cascade="all, delete-orphan", back_populates='torrent')
stats = db.relationship('Statistic', uselist=False, back_populates='torrent', lazy='joined') filelist = db.relationship('TorrentFilelist', uselist=False,
trackers = db.relationship('TorrentTrackers', uselist=True, lazy='joined') cascade="all, delete-orphan", back_populates='torrent')
stats = db.relationship('Statistic', uselist=False,
cascade="all, delete-orphan", back_populates='torrent', lazy='joined')
trackers = db.relationship('TorrentTrackers', uselist=True,
cascade="all, delete-orphan", lazy='joined')
def __repr__(self): def __repr__(self):
return '<{0} #{1.id} \'{1.display_name}\' {1.filesize}b>'.format(type(self).__name__, self) return '<{0} #{1.id} \'{1.display_name}\' {1.filesize}b>'.format(type(self).__name__, self)
@ -121,6 +126,14 @@ class Torrent(db.Model):
# Escaped # Escaped
return escape_markup(self.information) return escape_markup(self.information)
@property
def info_hash_as_b32(self):
return base64.b32encode(self.info_hash).decode('utf-8')
@property
def info_hash_as_hex(self):
return self.info_hash.hex()
@property @property
def magnet_uri(self): def magnet_uri(self):
return create_magnet(self) return create_magnet(self)
@ -370,15 +383,15 @@ class User(db.Model):
@property @property
def is_admin(self): def is_admin(self):
return self.level is UserLevelType.ADMIN or self.level is UserLevelType.SUPERADMIN return self.level >= UserLevelType.ADMIN
@property @property
def is_superadmin(self): def is_superadmin(self):
return self.level is UserLevelType.SUPERADMIN return self.level == UserLevelType.SUPERADMIN
@property @property
def is_trusted(self): def is_trusted(self):
return self.level is UserLevelType.TRUSTED return self.level >= UserLevelType.TRUSTED
class ReportStatus(IntEnum): class ReportStatus(IntEnum):

View file

@ -134,24 +134,44 @@ def get_category_id_map():
app.register_blueprint(api_handler.api_blueprint, url_prefix='/api') app.register_blueprint(api_handler.api_blueprint, url_prefix='/api')
def chain_get(source, *args):
''' Tries to return values from source by the given keys.
Returns None if none match.
Note: can return a None from the source. '''
sentinel = object()
for key in args:
value = source.get(key, sentinel)
if value is not sentinel:
return value
return None
@app.route('/rss', defaults={'rss': True}) @app.route('/rss', defaults={'rss': True})
@app.route('/', defaults={'rss': False}) @app.route('/', defaults={'rss': False})
def home(rss): def home(rss):
if flask.request.args.get('page') == 'rss': render_as_rss = rss
rss = True req_args = flask.request.args
if req_args.get('page') == 'rss':
render_as_rss = True
term = flask.request.args.get('q', flask.request.args.get('term')) search_term = chain_get(req_args, 'q', 'term')
sort = flask.request.args.get('s')
order = flask.request.args.get('o')
category = flask.request.args.get('c', flask.request.args.get('cats'))
quality_filter = flask.request.args.get('f', flask.request.args.get('filter'))
user_name = flask.request.args.get('u', flask.request.args.get('user'))
page = flask.request.args.get('p', flask.request.args.get('offset', 1, int), int)
per_page = app.config.get('RESULTS_PER_PAGE') sort_key = req_args.get('s')
if not per_page: sort_order = req_args.get('o')
per_page = DEFAULT_PER_PAGE
category = chain_get(req_args, 'c', 'cats')
quality_filter = chain_get(req_args, 'f', 'filter')
user_name = chain_get(req_args, 'u', 'user')
page_number = chain_get(req_args, 'p', 'page', 'offset')
try:
page_number = max(1, int(page_number))
except (ValueError, TypeError):
page_number = 1
# Check simply if the key exists
use_magnet_links = 'magnets' in req_args or 'm' in req_args
results_per_page = app.config.get('RESULTS_PER_PAGE', DEFAULT_PER_PAGE)
user_id = None user_id = None
if user_name: if user_name:
@ -162,13 +182,13 @@ def home(rss):
query_args = { query_args = {
'user': user_id, 'user': user_id,
'sort': sort or 'id', 'sort': sort_key or 'id',
'order': order or 'desc', 'order': sort_order or 'desc',
'category': category or '0_0', 'category': category or '0_0',
'quality_filter': quality_filter or '0', 'quality_filter': quality_filter or '0',
'page': page, 'page': page_number,
'rss': rss, 'rss': render_as_rss,
'per_page': per_page 'per_page': results_per_page
} }
if flask.g.user: if flask.g.user:
@ -178,28 +198,26 @@ def home(rss):
# If searching, we get results from elastic search # If searching, we get results from elastic search
use_elastic = app.config.get('USE_ELASTIC_SEARCH') use_elastic = app.config.get('USE_ELASTIC_SEARCH')
if use_elastic and term: if use_elastic and search_term:
query_args['term'] = term query_args['term'] = search_term
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT') max_search_results = app.config.get('ES_MAX_SEARCH_RESULT', DEFAULT_MAX_SEARCH_RESULT)
if not max_search_results:
max_search_results = DEFAULT_MAX_SEARCH_RESULT
# Only allow up to (max_search_results / page) pages # Only allow up to (max_search_results / page) pages
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) max_page = min(query_args['page'], int(math.ceil(max_search_results / results_per_page)))
query_args['page'] = max_page query_args['page'] = max_page
query_args['max_search_results'] = max_search_results query_args['max_search_results'] = max_search_results
query_results = search_elastic(**query_args) query_results = search_elastic(**query_args)
if rss: if render_as_rss:
return render_rss('/', query_results, use_elastic=True) return render_rss('"{}"'.format(search_term), query_results, use_elastic=True, magnet_links=use_magnet_links)
else: else:
rss_query_string = _generate_query_string(term, category, quality_filter, user_name) rss_query_string = _generate_query_string(search_term, category, quality_filter, user_name)
max_results = min(max_search_results, query_results['hits']['total']) max_results = min(max_search_results, query_results['hits']['total'])
# change p= argument to whatever you change page_parameter to or pagination breaks # change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=per_page, pagination = Pagination(p=query_args['page'], per_page=results_per_page,
total=max_results, bs_version=3, page_parameter='p', total=max_results, bs_version=3, page_parameter='p',
display_msg=SERACH_PAGINATE_DISPLAY_MSG) display_msg=SERACH_PAGINATE_DISPLAY_MSG)
return flask.render_template('home.html', return flask.render_template('home.html',
@ -213,13 +231,13 @@ def home(rss):
if use_elastic: if use_elastic:
query_args['term'] = '' query_args['term'] = ''
else: # Otherwise, use db search for everything else: # Otherwise, use db search for everything
query_args['term'] = term or '' query_args['term'] = search_term or ''
query = search_db(**query_args) query = search_db(**query_args)
if rss: if render_as_rss:
return render_rss('/', query, use_elastic=False) return render_rss('Home', query, use_elastic=False, magnet_links=use_magnet_links)
else: else:
rss_query_string = _generate_query_string(term, category, quality_filter, user_name) rss_query_string = _generate_query_string(search_term, category, quality_filter, user_name)
# Use elastic is always false here because we only hit this section # Use elastic is always false here because we only hit this section
# if we're browsing without a search term (which means we default to DB) # if we're browsing without a search term (which means we default to DB)
# or if ES is disabled # or if ES is disabled
@ -256,39 +274,38 @@ def view_user(user_name):
db.session.add(user) db.session.add(user)
db.session.commit() db.session.commit()
return flask.redirect('/user/' + user.username) return flask.redirect(flask.url_for('view_user', user_name=user.username))
level = 'Regular' user_level = ['Regular', 'Trusted', 'Moderator', 'Administrator'][user.level]
if user.is_admin:
level = 'Moderator'
if user.is_superadmin: # check this second because user can be admin AND superadmin
level = 'Administrator'
elif user.is_trusted:
level = 'Trusted'
term = flask.request.args.get('q') req_args = flask.request.args
sort = flask.request.args.get('s')
order = flask.request.args.get('o')
category = flask.request.args.get('c')
quality_filter = flask.request.args.get('f')
page = flask.request.args.get('p')
if page:
page = int(page)
per_page = app.config.get('RESULTS_PER_PAGE') search_term = chain_get(req_args, 'q', 'term')
if not per_page:
per_page = DEFAULT_PER_PAGE sort_key = req_args.get('s')
sort_order = req_args.get('o')
category = chain_get(req_args, 'c', 'cats')
quality_filter = chain_get(req_args, 'f', 'filter')
page_number = chain_get(req_args, 'p', 'page', 'offset')
try:
page_number = max(1, int(page_number))
except (ValueError, TypeError):
page_number = 1
results_per_page = app.config.get('RESULTS_PER_PAGE', DEFAULT_PER_PAGE)
query_args = { query_args = {
'term': term or '', 'term': search_term or '',
'user': user.id, 'user': user.id,
'sort': sort or 'id', 'sort': sort_key or 'id',
'order': order or 'desc', 'order': sort_order or 'desc',
'category': category or '0_0', 'category': category or '0_0',
'quality_filter': quality_filter or '0', 'quality_filter': quality_filter or '0',
'page': page or 1, 'page': page_number,
'rss': False, 'rss': False,
'per_page': per_page 'per_page': results_per_page
} }
if flask.g.user: if flask.g.user:
@ -297,17 +314,15 @@ def view_user(user_name):
query_args['admin'] = True query_args['admin'] = True
# Use elastic search for term searching # Use elastic search for term searching
rss_query_string = _generate_query_string(term, category, quality_filter, user_name) rss_query_string = _generate_query_string(search_term, category, quality_filter, user_name)
use_elastic = app.config.get('USE_ELASTIC_SEARCH') use_elastic = app.config.get('USE_ELASTIC_SEARCH')
if use_elastic and term: if use_elastic and search_term:
query_args['term'] = term query_args['term'] = search_term
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT') max_search_results = app.config.get('ES_MAX_SEARCH_RESULT', DEFAULT_MAX_SEARCH_RESULT)
if not max_search_results:
max_search_results = DEFAULT_MAX_SEARCH_RESULT
# Only allow up to (max_search_results / page) pages # Only allow up to (max_search_results / page) pages
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) max_page = min(query_args['page'], int(math.ceil(max_search_results / results_per_page)))
query_args['page'] = max_page query_args['page'] = max_page
query_args['max_search_results'] = max_search_results query_args['max_search_results'] = max_search_results
@ -316,7 +331,7 @@ def view_user(user_name):
max_results = min(max_search_results, query_results['hits']['total']) max_results = min(max_search_results, query_results['hits']['total'])
# change p= argument to whatever you change page_parameter to or pagination breaks # change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=per_page, pagination = Pagination(p=query_args['page'], per_page=results_per_page,
total=max_results, bs_version=3, page_parameter='p', total=max_results, bs_version=3, page_parameter='p',
display_msg=SERACH_PAGINATE_DISPLAY_MSG) display_msg=SERACH_PAGINATE_DISPLAY_MSG)
return flask.render_template('user.html', return flask.render_template('user.html',
@ -327,7 +342,7 @@ def view_user(user_name):
user=user, user=user,
user_page=True, user_page=True,
rss_filter=rss_query_string, rss_filter=rss_query_string,
level=level, level=user_level,
admin=admin, admin=admin,
superadmin=superadmin, superadmin=superadmin,
form=form) form=form)
@ -336,7 +351,7 @@ def view_user(user_name):
if use_elastic: if use_elastic:
query_args['term'] = '' query_args['term'] = ''
else: else:
query_args['term'] = term or '' query_args['term'] = search_term or ''
query = search_db(**query_args) query = search_db(**query_args)
return flask.render_template('user.html', return flask.render_template('user.html',
use_elastic=False, use_elastic=False,
@ -345,7 +360,7 @@ def view_user(user_name):
user=user, user=user,
user_page=True, user_page=True,
rss_filter=rss_query_string, rss_filter=rss_query_string,
level=level, level=user_level,
admin=admin, admin=admin,
superadmin=superadmin, superadmin=superadmin,
form=form) form=form)
@ -361,9 +376,10 @@ def _jinja2_filter_rfc822(datestr, fmt=None):
return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s'))) return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s')))
def render_rss(label, query, use_elastic): def render_rss(label, query, use_elastic, magnet_links=False):
rss_xml = flask.render_template('rss.xml', rss_xml = flask.render_template('rss.xml',
use_elastic=use_elastic, use_elastic=use_elastic,
magnet_links=magnet_links,
term=label, term=label,
site_url=flask.request.url_root, site_url=flask.request.url_root,
torrent_query=query) torrent_query=query)
@ -538,7 +554,8 @@ def _create_upload_category_choices():
cat_names = id_map[key] cat_names = id_map[key]
is_main_cat = key.endswith('_0') is_main_cat = key.endswith('_0')
cat_name = is_main_cat and cat_names[0] or (' - ' + cat_names[1]) # cat_name = is_main_cat and cat_names[0] or (' - ' + cat_names[1])
cat_name = ' - '.join(cat_names)
choices.append((key, cat_name, is_main_cat)) choices.append((key, cat_name, is_main_cat))
return choices return choices
@ -562,16 +579,17 @@ def upload():
def view_torrent(torrent_id): def view_torrent(torrent_id):
torrent = models.Torrent.by_id(torrent_id) torrent = models.Torrent.by_id(torrent_id)
viewer = flask.g.user
if not torrent: if not torrent:
flask.abort(404) flask.abort(404)
if torrent.deleted and (not flask.g.user or not flask.g.user.is_admin): # Only allow admins see deleted torrents
if torrent.deleted and not (viewer and viewer.is_admin):
flask.abort(404) flask.abort(404)
if flask.g.user: # Only allow owners and admins to edit torrents
can_edit = flask.g.user is torrent.user or flask.g.user.is_admin can_edit = viewer and (viewer is torrent.user or viewer.is_admin)
else:
can_edit = False
files = None files = None
if torrent.filelist: if torrent.filelist:
@ -580,6 +598,7 @@ def view_torrent(torrent_id):
report_form = forms.ReportForm() report_form = forms.ReportForm()
return flask.render_template('view.html', torrent=torrent, return flask.render_template('view.html', torrent=torrent,
files=files, files=files,
viewer=viewer,
can_edit=can_edit, can_edit=can_edit,
report_form=report_form) report_form=report_form)
@ -589,15 +608,18 @@ def edit_torrent(torrent_id):
torrent = models.Torrent.by_id(torrent_id) torrent = models.Torrent.by_id(torrent_id)
form = forms.EditForm(flask.request.form) form = forms.EditForm(flask.request.form)
form.category.choices = _create_upload_category_choices() form.category.choices = _create_upload_category_choices()
category = str(torrent.main_category_id) + "_" + str(torrent.sub_category_id)
editor = flask.g.user
if not torrent: if not torrent:
flask.abort(404) flask.abort(404)
if torrent.deleted and (not flask.g.user or not flask.g.user.is_admin): # Only allow admins edit deleted torrents
if torrent.deleted and not (editor and editor.is_admin):
flask.abort(404) flask.abort(404)
if not flask.g.user or (flask.g.user is not torrent.user and not flask.g.user.is_admin): # Only allow torrent owners or admins edit torrents
if not editor or not (editor is torrent.user or editor.is_admin):
flask.abort(403) flask.abort(403)
if flask.request.method == 'POST' and form.validate(): if flask.request.method == 'POST' and form.validate():
@ -607,36 +629,43 @@ def edit_torrent(torrent_id):
torrent.display_name = (form.display_name.data or '').strip() torrent.display_name = (form.display_name.data or '').strip()
torrent.information = (form.information.data or '').strip() torrent.information = (form.information.data or '').strip()
torrent.description = (form.description.data or '').strip() torrent.description = (form.description.data or '').strip()
if flask.g.user.is_admin:
torrent.deleted = form.is_deleted.data
torrent.hidden = form.is_hidden.data torrent.hidden = form.is_hidden.data
torrent.remake = form.is_remake.data torrent.remake = form.is_remake.data
torrent.complete = form.is_complete.data torrent.complete = form.is_complete.data
torrent.anonymous = form.is_anonymous.data torrent.anonymous = form.is_anonymous.data
if editor.is_trusted:
torrent.trusted = form.is_trusted.data
if editor.is_admin:
torrent.deleted = form.is_deleted.data
db.session.commit() db.session.commit()
flask.flash(flask.Markup( flask.flash(flask.Markup(
'Torrent has been successfully edited! Changes might take a few minutes to show up.'), 'info') 'Torrent has been successfully edited! Changes might take a few minutes to show up.'), 'info')
return flask.redirect('/view/' + str(torrent_id)) return flask.redirect(flask.url_for('view_torrent', torrent_id=torrent.id))
else: else:
# Setup form with pre-formatted form. if flask.request.method != 'POST':
form.category.data = category # Fill form data only if the POST didn't fail
form.display_name.data = torrent.display_name form.category.data = torrent.sub_category.id_as_string
form.information.data = torrent.information form.display_name.data = torrent.display_name
form.description.data = torrent.description form.information.data = torrent.information
form.is_hidden.data = torrent.hidden form.description.data = torrent.description
if flask.g.user.is_admin:
form.is_hidden.data = torrent.hidden
form.is_remake.data = torrent.remake
form.is_complete.data = torrent.complete
form.is_anonymous.data = torrent.anonymous
form.is_trusted.data = torrent.trusted
form.is_deleted.data = torrent.deleted form.is_deleted.data = torrent.deleted
form.is_remake.data = torrent.remake
form.is_complete.data = torrent.complete
form.is_anonymous.data = torrent.anonymous
return flask.render_template('edit.html', return flask.render_template('edit.html',
form=form, form=form,
torrent=torrent, torrent=torrent,
admin=flask.g.user.is_admin) editor=editor)
@app.route('/view/<int:torrent_id>/magnet') @app.route('/view/<int:torrent_id>/magnet')

View file

@ -58,6 +58,10 @@ table.torrent-list thead th.sorting_desc:after {
content: "\f0dd"; content: "\f0dd";
} }
table.torrent-list tbody tr td a:visited {
color: #1d4568;
}
#torrent-description img { #torrent-description img {
max-width: 100%; max-width: 100%;
} }

View file

@ -1,10 +1,12 @@
{% macro render_field(field) %} {% macro render_field(field, render_label=True) %}
{% if field.errors %} {% if field.errors %}
<div class="form-group has-error"> <div class="form-group has-error">
{% else %} {% else %}
<div class="form-group"> <div class="form-group">
{% endif %} {% endif %}
{% if render_label %}
{{ field.label(class='control-label') }} {{ field.label(class='control-label') }}
{% endif %}
{{ field(title=field.description,**kwargs) | safe }} {{ field(title=field.description,**kwargs) | safe }}
{% if field.errors %} {% if field.errors %}
<div class="help-block"> <div class="help-block">
@ -27,33 +29,33 @@
{% macro render_markdown_editor(field, field_name='') %} {% macro render_markdown_editor(field, field_name='') %}
{% if field.errors %} {% if field.errors %}
<div class="form-group has-error"> <div class="form-group has-error">
{% else %} {% else %}
<div class="form-group"> <div class="form-group">
{% endif %} {% endif %}
<div class="markdown-editor" id="{{ field_name }}-markdown-editor" data-field-name="{{ field_name }}"> <div class="markdown-editor" id="{{ field_name }}-markdown-editor" data-field-name="{{ field_name }}">
<ul class="nav nav-tabs" role="tablist"> {{ field.label(class='control-label') }}
<li role="presentation" class="active"> <ul class="nav nav-tabs" role="tablist">
<a href="#{{ field_name }}-tab" aria-controls="" role="tab" data-toggle="tab"> <li role="presentation" class="active">
Write <a href="#{{ field_name }}-tab" aria-controls="" role="tab" data-toggle="tab">
</a> Write
</li> </a>
<li role="presentation"> </li>
<a href="#{{ field_name }}-preview" id="{{ field_name }}-preview-tab" aria-controls="preview" role="tab" data-toggle="tab"> <li role="presentation">
Preview <a href="#{{ field_name }}-preview" id="{{ field_name }}-preview-tab" aria-controls="preview" role="tab" data-toggle="tab">
</a> Preview
</li> </a>
</ul> </li>
<div class="tab-content"> </ul>
<div role="tabpanel" class="tab-pane active" id="{{ field_name }}-tab" data-markdown-target="#{{ field_name }}-markdown-target"> <div class="tab-content">
{{ render_field(field, class_='form-control markdown-source') }} <div role="tabpanel" class="tab-pane active" id="{{ field_name }}-tab" data-markdown-target="#{{ field_name }}-markdown-target">
</div> {{ render_field(field, False, class_='form-control markdown-source') }}
<div role="tabpanel" class="tab-pane" id="{{ field_name }}-preview"> </div>
{{ field.label(class='control-label') }} <div role="tabpanel" class="tab-pane" id="{{ field_name }}-preview">
<div class="well" id="{{ field_name }}-markdown-target"></div> <div class="well" id="{{ field_name }}-markdown-target"></div>
</div> </div>
</div> </div>
</div> </div>
</div> </div>
{% endmacro %} {% endmacro %}

View file

@ -4,79 +4,73 @@
{% from "_formhelpers.html" import render_field %} {% from "_formhelpers.html" import render_field %}
{% from "_formhelpers.html" import render_markdown_editor %} {% from "_formhelpers.html" import render_markdown_editor %}
<h1>Edit Torrent</h1> {% set torrent_url = url_for('view_torrent', torrent_id=torrent.id) %}
<h1>
Edit Torrent <a href="{{ torrent_url }}">#{{torrent.id}}</a>
{% if (torrent.user != None) and (torrent.user != editor) %}
(by <a href="{{ url_for('view_user', user_name=torrent.user.username) }}">{{ torrent.user.username }}</a>)
{% endif %}
</h1>
<form method="POST" enctype="multipart/form-data"> <form method="POST" enctype="multipart/form-data">
{{ form.csrf_token }} {{ form.csrf_token }}
<div class="row"> <div class="row">
<div class="form-group col-md-6"> <div class="col-md-6">
{{ render_field(form.category, class_='form-control')}} {{ render_field(form.display_name, class_='form-control', placeholder='Display name') }}
</div>
<div class="col-md-4">
{{ render_field(form.category, class_='form-control')}}
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<div class="form-group col-md-6"> <div class="col-md-6">
{{ render_field(form.display_name, class_='form-control', placeholder='Display name') }} {{ render_field(form.information, class_='form-control', placeholder='Your website or IRC channel') }}
</div> </div>
</div> <div class="col-md-6">
<label class="control-label">Torrent flags</label>
<div>
{% if editor.is_admin %}
<label class="btn btn-primary">
{{ form.is_deleted }}
Deleted
</label>
{% endif %}
<div class="row"> <label class="btn btn-default" style="background-color: darkgray; border-color: #ccc;" title="Hide torrent from listing">
<div class="form-group col-md-6">
{{ render_field(form.information, class_='form-control', placeholder='Your website or IRC channel') }}
</div>
</div>
<div class="row">
<div class="form-group col-md-6">
{{ render_markdown_editor(form.description, field_name='description') }}
</div>
</div>
{% if admin %}
<div class="row">
<div class="form-group col-md-6">
<label>
{{ form.is_deleted }}
Deleted
</label>
</div>
</div>
{% endif %}
<div class="row">
<div class="form-group col-md-6">
<label>
{{ form.is_hidden }} {{ form.is_hidden }}
Hidden Hidden
</label> </label>
</div> <label class="btn btn-danger" title="This torrent is derived from another release">
</div>
<div class="row">
<div class="form-group col-md-6">
<label>
{{ form.is_remake }} {{ form.is_remake }}
Remake Remake
</label> </label>
</div> <label class="btn btn-primary" title="This torrent is a complete batch (eg. season)">
</div>
<div class="row">
<div class="form-group col-md-6">
<label>
{{ form.is_complete }} {{ form.is_complete }}
Complete Complete
</label> </label>
{# Only allow changing anonymous status when an uploader exists #}
{% if torrent.uploader_id %}
<label class="btn btn-primary" title="Upload torrent anonymously (don't display your username)">
{{ form.is_anonymous }}
Anonymous
</label>
{% endif %}
{% if editor.is_trusted %}
<label class="btn btn-success" title="Mark torrent trusted">
{{ form.is_trusted }}
Trusted
</label>
{% endif %}
</div>
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<div class="form-group col-md-6"> <div class="col-md-12">
<label> {{ render_markdown_editor(form.description, field_name='description') }}
{{ form.is_anonymous }}
Anonymous
</label>
</div> </div>
</div> </div>

View file

@ -1,37 +1,45 @@
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0"> <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
<channel> <channel>
<title>{{ config.SITE_NAME }} Torrent File RSS (No magnets)</title> <title>{{ config.SITE_NAME }} Torrent File RSS</title>
<description>RSS Feed for {{ term }}</description> <description>RSS Feed for {{ term }}</description>
<link>{{ url_for('home', _external=True) }}</link> <link>{{ url_for('home', _external=True) }}</link>
<atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" /> <atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" />
{% for torrent in torrent_query %} {% for torrent in torrent_query %}
{% if torrent.has_torrent %}
<item> <item>
<title>{{ torrent.display_name }}</title> <title>{{ torrent.display_name }}</title>
{# <description><![CDATA[{{ torrent.description }}]]></description> #}
{% if use_elastic %} {% if use_elastic %}
<link>{{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }}</link> {% if torrent.has_torrent and not magnet_links %}
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid> <link>{{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }}</link>
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate> {% else %}
<link>{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}</link>
{% endif %}
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
<seeders> {{- torrent.seed_count }}</seeders>
<leechers> {{- torrent.leech_count }}</leechers>
<downloads>{{- torrent.download_count }}</downloads>
<infoHash> {{- torrent.info_hash }}</infoHash>
{% else %} {% else %}
<link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link> {% if torrent.has_torrent and not magnet_links %}
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid> <link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link>
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate> {% else %}
<link>{{ torrent.magnet_uri }}</link>
{% endif %}
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
<seeders> {{- torrent.stats.seed_count }}</seeders>
<leechers> {{- torrent.stats.leech_count }}</leechers>
<downloads>{{- torrent.stats.download_count }}</downloads>
<infoHash> {{- torrent.info_hash_as_hex }}</infoHash>
{% endif %} {% endif %}
{% set cat_id = use_elastic and ((torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string)) or torrent.sub_category.id_as_string %}
<categoryId>{{- cat_id }}</categoryId>
<category> {{- category_name(cat_id) }}</category>
<size> {{- torrent.filesize | filesizeformat(True) }}</size>
</item> </item>
{% else %}
<item>
<title>{{ torrent.display_name }}</title>
{% if use_elastic %}
<link>{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
{% else %}
<link>{{ torrent.magnet_uri }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
{% endif %}
</item>
{% endif %}
{% endfor %} {% endfor %}
</channel> </channel>
</rss> </rss>

View file

@ -16,68 +16,57 @@
<form method="POST" enctype="multipart/form-data"> <form method="POST" enctype="multipart/form-data">
{% if config.ENFORCE_MAIN_ANNOUNCE_URL %}<p><strong>Important:</strong> Please include <kbd>{{config.MAIN_ANNOUNCE_URL}}</kbd> in your trackers</p>{% endif %} {% if config.ENFORCE_MAIN_ANNOUNCE_URL %}<p><strong>Important:</strong> Please include <kbd>{{config.MAIN_ANNOUNCE_URL}}</kbd> in your trackers</p>{% endif %}
<div class="row"> <div class="row">
<div class="form-group col-md-6"> <div class="col-md-6">
{{ render_upload(form.torrent_file, accept=".torrent") }} {{ render_upload(form.torrent_file, accept=".torrent") }}
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<div class="form-group col-md-6"> <div class="col-md-6">
{{ render_field(form.category, class_='form-control')}} {{ render_field(form.display_name, class_='form-control', placeholder='Display name') }}
</div>
<div class="col-md-4">
{{ render_field(form.category, class_='form-control')}}
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<div class="form-group col-md-6">
{{ render_field(form.display_name, class_='form-control', placeholder='Display name') }}
</div>
</div> </div>
<div class="row form-group">
<div class="row"> <div class="col-md-6">
<div class="form-group col-md-6">
{{ render_field(form.information, class_='form-control', placeholder='Your website or IRC channel') }} {{ render_field(form.information, class_='form-control', placeholder='Your website or IRC channel') }}
</div> </div>
</div> <div class="col-md-6">
<label class="control-label">Torrent flags</label>
<div class="row"> <div>
<div class="form-group col-md-6"> <label class="btn btn-primary" title="Upload torrent anonymously (don't display your username)">
{{ render_markdown_editor(form.description, field_name='description') }} {{ form.is_anonymous(disabled=(False if user else ""), checked=(False if user else "")) }}
</div> Anonymous
</div> </label>
<label class="btn btn-default" style="background-color: darkgray; border-color: #ccc;" title="Hide torrent from listing">
<div class="row">
<div class="form-group col-md-6">
<label>
{{ form.is_hidden }} {{ form.is_hidden }}
Hidden Hidden
</label> </label>
</div> <label class="btn btn-danger" title="This torrent is derived from another release">
</div>
<div class="row">
<div class="form-group col-md-6">
<label>
{{ form.is_remake }} {{ form.is_remake }}
Remake Remake
</label> </label>
</div> <label class="btn btn-primary" title="This torrent is a complete batch (eg. season)">
</div>
<div class="row">
<div class="form-group col-md-6">
<label>
{{ form.is_complete }} {{ form.is_complete }}
Complete Complete
</label> </label>
</div> {% if user.is_trusted %}
</div> <label class="btn btn-success" title="Mark torrent trusted">
{{ form.is_trusted(checked="") }}
<div class="row"> Trusted
<div class="form-group col-md-6">
<label>
{{ form.is_anonymous }}
Anonymous
</label> </label>
{% endif %}
</div>
</div>
</div>
<div class="row">
<div class="col-md-12">
{{ render_markdown_editor(form.description, field_name='description') }}
</div> </div>
</div> </div>

View file

@ -6,7 +6,7 @@
<div class="panel-heading"{% if torrent.hidden %} style="background-color: darkgray;"{% endif %}> <div class="panel-heading"{% if torrent.hidden %} style="background-color: darkgray;"{% endif %}>
<h3 class="panel-title"> <h3 class="panel-title">
{% if can_edit %} {% if can_edit %}
<a href="{{ request.url }}/edit"><i class="fa fa-fw fa-pencil"></i></a> <a href="{{ request.url }}/edit" title="Edit torrent"><i class="fa fa-fw fa-pencil"></i></a>
{% endif %} {% endif %}
{{ torrent.display_name }} {{ torrent.display_name }}
</h3> </h3>
@ -24,7 +24,14 @@
<div class="row"> <div class="row">
<div class="col-md-1">Submitter:</div> <div class="col-md-1">Submitter:</div>
<div class="col-md-5">{% if not torrent.anonymous and torrent.user %}<a href="{{ url_for('view_user', user_name=torrent.user.username) }}">{{ torrent.user.username }}</a>{% else %}Anonymous{% endif %}</div> <div class="col-md-5">
{% set user_url = torrent.user and url_for('view_user', user_name=torrent.user.username) %}
{%- if not torrent.anonymous and torrent.user -%}
<a href="{{ user_url }}">{{ torrent.user.username }}</a>
{%- else -%}
Anonymous {% if torrent.user and (viewer == torrent.user or viewer.is_admin) %}(<a href="{{ user_url }}">{{ torrent.user.username }}</a>){% endif %}
{%- endif -%}
</div>
<div class="col-md-1">Seeders:</div> <div class="col-md-1">Seeders:</div>
<div class="col-md-5"><span style="color: green;">{% if config.ENABLE_SHOW_STATS %}{{ torrent.stats.seed_count }}{% else %}Coming soon{% endif %}</span></div> <div class="col-md-5"><span style="color: green;">{% if config.ENABLE_SHOW_STATS %}{{ torrent.stats.seed_count }}{% else %}Coming soon{% endif %}</span></div>

View file

@ -38,3 +38,4 @@ elasticsearch-dsl==5.2.0
progressbar2==3.20.0 progressbar2==3.20.0
mysql-replication==0.13 mysql-replication==0.13
flask-paginate==0.4.5 flask-paginate==0.4.5
statsd==3.2.1

View file

@ -21,9 +21,12 @@ when import_to_es and sync_es will be lost. Instead, you can run SHOW MASTER
STATUS _before_ you run import_to_es. That way you'll definitely pick up any STATUS _before_ you run import_to_es. That way you'll definitely pick up any
changes that happen while the import_to_es script is dumping stuff from the changes that happen while the import_to_es script is dumping stuff from the
database into es, at the expense of redoing a (small) amount of indexing. database into es, at the expense of redoing a (small) amount of indexing.
This uses multithreading so we don't have to block on socket io (both binlog
reading and es POSTing). asyncio soon
""" """
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk from elasticsearch.helpers import bulk, BulkIndexError
from pymysqlreplication import BinLogStreamReader from pymysqlreplication import BinLogStreamReader
from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent
from datetime import datetime from datetime import datetime
@ -32,51 +35,39 @@ import sys
import json import json
import time import time
import logging import logging
from statsd import StatsClient
from threading import Thread
from queue import Queue, Empty
logging.basicConfig() logging.basicConfig(format='%(asctime)s %(levelname)s %(name)s - %(message)s')
log = logging.getLogger('sync_es') log = logging.getLogger('sync_es')
log.setLevel(logging.INFO) log.setLevel(logging.INFO)
# config in json, 2lazy to argparse
if len(sys.argv) != 2:
print("need config.json location", file=sys.stderr)
sys.exit(-1)
with open(sys.argv[1]) as f:
config = json.load(f)
# goes to netdata or other statsd listener
stats = StatsClient('localhost', 8125, prefix="sync_es")
#logging.getLogger('elasticsearch').setLevel(logging.DEBUG) #logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
# in prod want in /var/lib somewhere probably # in prod want in /var/lib somewhere probably
SAVE_LOC = "/var/lib/sync_es_position.json" SAVE_LOC = config.get('save_loc', "/tmp/pos.json")
MYSQL_HOST = '127.0.0.1' MYSQL_HOST = config.get('mysql_host', '127.0.0.1')
MYSQL_PORT = 3306 MYSQL_PORT = config.get('mysql_port', 3306)
MYSQL_USER = 'test' MYSQL_USER = config.get('mysql_user', 'root')
MYSQL_PW = 'test123' MYSQL_PW = config.get('mysql_password', 'dunnolol')
NT_DB = 'nyaav2' NT_DB = config.get('database', 'nyaav2')
INTERNAL_QUEUE_DEPTH = config.get('internal_queue_depth', 10000)
with open(SAVE_LOC) as f: ES_CHUNK_SIZE = config.get('es_chunk_size', 10000)
pos = json.load(f) # seconds since no events happening to flush to es. remember this also
# interacts with es' refresh_interval setting.
es = Elasticsearch(timeout=30) FLUSH_INTERVAL = config.get('flush_interval', 5)
stream = BinLogStreamReader(
# TODO parse out from config.py or something
connection_settings = {
'host': MYSQL_HOST,
'port': MYSQL_PORT,
'user': MYSQL_USER,
'passwd': MYSQL_PW
},
server_id=10, # arbitrary
# only care about this database currently
only_schemas=[NT_DB],
# these tables in the database
only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"],
# from our save file
resume_stream=True,
log_file=pos['log_file'],
log_pos=pos['log_pos'],
# skip the other stuff like table mapping
only_events=[UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent],
# if we're at the head of the log, block until something happens
# note it'd be nice to block async-style instead, but the mainline
# binlogreader is synchronous. there is an (unmaintained?) fork
# using aiomysql if anybody wants to revive that.
blocking=True)
def reindex_torrent(t, index_name): def reindex_torrent(t, index_name):
# XXX annoyingly different from import_to_es, and # XXX annoyingly different from import_to_es, and
@ -120,8 +111,8 @@ def reindex_torrent(t, index_name):
def reindex_stats(s, index_name): def reindex_stats(s, index_name):
# update the torrent at torrent_id, assumed to exist; # update the torrent at torrent_id, assumed to exist;
# this will always be the case if you're reading the binlog # this will always be the case if you're reading the binlog
# in order; the foreign key constraint on torrrent_id prevents # in order; the foreign key constraint on torrent_id prevents
# the stats row rom existing if the torrent isn't around. # the stats row from existing if the torrent isn't around.
return { return {
'_op_type': 'update', '_op_type': 'update',
'_index': index_name, '_index': index_name,
@ -141,45 +132,176 @@ def delet_this(row, index_name):
'_type': 'torrent', '_type': 'torrent',
'_id': str(row['values']['id'])} '_id': str(row['values']['id'])}
n = 0
last_save = time.time()
for event in stream: class BinlogReader(Thread):
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents": # write_buf is the Queue we communicate with
if event.table == "nyaa_torrents": def __init__(self, write_buf):
index_name = "nyaa" Thread.__init__(self)
else: self.write_buf = write_buf
index_name = "sukebei"
if type(event) is WriteRowsEvent:
bulk(es, (reindex_torrent(row['values'], index_name) for row in event.rows))
elif type(event) is UpdateRowsEvent:
# UpdateRowsEvent includes the old values too, but we don't care
bulk(es, (reindex_torrent(row['after_values'], index_name) for row in event.rows))
elif type(event) is DeleteRowsEvent:
# ok, bye
bulk(es, (delet_this(row, index_name) for row in event.rows))
else:
raise Exception(f"unknown event {type(event)}")
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
if event.table == "nyaa_statistics":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
bulk(es, (reindex_stats(row['values'], index_name) for row in event.rows))
elif type(event) is UpdateRowsEvent:
bulk(es, (reindex_stats(row['after_values'], index_name) for row in event.rows))
elif type(event) is DeleteRowsEvent:
# uh ok. assume that the torrent row will get deleted later,
# which will clean up the entire es "torrent" document
pass
else:
raise Exception(f"unknown event {type(event)}")
else:
raise Exception(f"unknown table {s.table}")
n += 1 def run(self):
if n % 100 == 0 or time.time() - last_save > 30: with open(SAVE_LOC) as f:
log.info(f"saving position {stream.log_file}/{stream.log_pos}") pos = json.load(f)
with open(SAVE_LOC, 'w') as f:
json.dump({"log_file": stream.log_file, "log_pos": stream.log_pos}, f) stream = BinLogStreamReader(
# TODO parse out from config.py or something
connection_settings = {
'host': MYSQL_HOST,
'port': MYSQL_PORT,
'user': MYSQL_USER,
'passwd': MYSQL_PW
},
server_id=10, # arbitrary
# only care about this database currently
only_schemas=[NT_DB],
# these tables in the database
only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"],
# from our save file
resume_stream=True,
log_file=pos['log_file'],
log_pos=pos['log_pos'],
# skip the other stuff like table mapping
only_events=[UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent],
# if we're at the head of the log, block until something happens
# note it'd be nice to block async-style instead, but the mainline
# binlogreader is synchronous. there is an (unmaintained?) fork
# using aiomysql if anybody wants to revive that.
blocking=True)
log.info(f"reading binlog from {stream.log_file}/{stream.log_pos}")
for event in stream:
# save the pos of the stream and timestamp with each message, so we
# can commit in the other thread. and keep track of process latency
pos = (stream.log_file, stream.log_pos, event.timestamp)
with stats.pipeline() as s:
s.incr('total_events')
s.incr(f"event.{event.table}.{type(event).__name__}")
s.incr('total_rows', len(event.rows))
s.incr(f"rows.{event.table}.{type(event).__name__}", len(event.rows))
# XXX not a "timer", but we get a histogram out of it
s.timing(f"rows_per_event.{event.table}.{type(event).__name__}", len(event.rows))
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
if event.table == "nyaa_torrents":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
for row in event.rows:
self.write_buf.put(
(pos, reindex_torrent(row['values'], index_name)),
block=True)
elif type(event) is UpdateRowsEvent:
# UpdateRowsEvent includes the old values too, but we don't care
for row in event.rows:
self.write_buf.put(
(pos, reindex_torrent(row['after_values'], index_name)),
block=True)
elif type(event) is DeleteRowsEvent:
# ok, bye
for row in event.rows:
self.write_buf.put((pos, delet_this(row, index_name)), block=True)
else:
raise Exception(f"unknown event {type(event)}")
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
if event.table == "nyaa_statistics":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
for row in event.rows:
self.write_buf.put(
(pos, reindex_stats(row['values'], index_name)),
block=True)
elif type(event) is UpdateRowsEvent:
for row in event.rows:
self.write_buf.put(
(pos, reindex_stats(row['after_values'], index_name)),
block=True)
elif type(event) is DeleteRowsEvent:
# uh ok. Assume that the torrent row will get deleted later,
# which will clean up the entire es "torrent" document
pass
else:
raise Exception(f"unknown event {type(event)}")
else:
raise Exception(f"unknown table {s.table}")
class EsPoster(Thread):
# read_buf is the queue of stuff to bulk post
def __init__(self, read_buf, chunk_size=1000, flush_interval=5):
Thread.__init__(self)
self.read_buf = read_buf
self.chunk_size = chunk_size
self.flush_interval = flush_interval
def run(self):
es = Elasticsearch(timeout=30)
last_save = time.time()
since_last = 0
while True:
actions = []
while len(actions) < self.chunk_size:
try:
# grab next event from queue with metadata that creepily
# updates, surviving outside the scope of the loop
((log_file, log_pos, timestamp), action) = \
self.read_buf.get(block=True, timeout=self.flush_interval)
actions.append(action)
except Empty:
# nothing new for the whole interval
break
if not actions:
# nothing to post
log.debug("no changes...")
continue
# XXX "time" to get histogram of no events per bulk
stats.timing('actions_per_bulk', len(actions))
try:
with stats.timer('post_bulk'):
bulk(es, actions, chunk_size=self.chunk_size)
except BulkIndexError as bie:
# in certain cases where we're really out of sync, we update a
# stat when the torrent doc is, causing a "document missing"
# error from es, with no way to suppress that server-side.
# Thus ignore that type of error if it's the only problem
for e in bie.errors:
try:
if e['update']['error']['type'] != 'document_missing_exception':
raise bie
except KeyError:
raise bie
# how far we're behind, wall clock
stats.gauge('process_latency', int((time.time() - timestamp) * 1000))
since_last += len(actions)
if since_last >= 10000 or (time.time() - last_save) > 10:
log.info(f"saving position {log_file}/{log_pos}, {time.time() - timestamp:,.3f} seconds behind")
with stats.timer('save_pos'):
with open(SAVE_LOC, 'w') as f:
json.dump({"log_file": log_file, "log_pos": log_pos}, f)
last_save = time.time()
since_last = 0
# in-memory queue between binlog and es. The bigger it is, the more events we
# can parse in memory while waiting for es to catch up, at the expense of heap.
buf = Queue(maxsize=INTERNAL_QUEUE_DEPTH)
reader = BinlogReader(buf)
reader.daemon = True
writer = EsPoster(buf, chunk_size=ES_CHUNK_SIZE, flush_interval=FLUSH_INTERVAL)
writer.daemon = True
reader.start()
writer.start()
# on the main thread, poll the queue size for monitoring
while True:
stats.gauge('queue_depth', buf.qsize())
time.sleep(1)

View file

@ -87,6 +87,11 @@ tor_group.add_argument('-H', '--hidden', default=False, action='store_true', hel
tor_group.add_argument('-C', '--complete', default=False, action='store_true', help='Mark torrent as complete (eg. season batch)') tor_group.add_argument('-C', '--complete', default=False, action='store_true', help='Mark torrent as complete (eg. season batch)')
tor_group.add_argument('-R', '--remake', default=False, action='store_true', help='Mark torrent as remake (derivative work from another release)') tor_group.add_argument('-R', '--remake', default=False, action='store_true', help='Mark torrent as remake (derivative work from another release)')
trusted_group = tor_group.add_mutually_exclusive_group(required=False)
trusted_group.add_argument('-T', '--trusted', dest='trusted', action='store_true', help='Mark torrent as trusted, if possible. Defaults to true')
trusted_group.add_argument('--no-trusted', dest='trusted', action='store_false', help='Do not mark torrent as trusted')
parser.set_defaults(trusted=True)
tor_group.add_argument('torrent', metavar='TORRENT_FILE', help='The .torrent file to upload') tor_group.add_argument('torrent', metavar='TORRENT_FILE', help='The .torrent file to upload')
@ -145,6 +150,7 @@ if __name__ == "__main__":
'hidden' : args.hidden, 'hidden' : args.hidden,
'complete' : args.complete, 'complete' : args.complete,
'remake' : args.remake, 'remake' : args.remake,
'trusted' : args.trusted,
} }
encoded_data = { encoded_data = {
'torrent_data' : json.dumps(data) 'torrent_data' : json.dumps(data)