diff --git a/config_es_sync.json b/config_es_sync.json new file mode 100644 index 0000000..d2cb889 --- /dev/null +++ b/config_es_sync.json @@ -0,0 +1,11 @@ +{ +"save_loc": "/tmp/pos.json", +"mysql_host": "127.0.0.1", +"mysql_port": 13306, +"mysql_user": "root", +"mysql_password": "dunnolol", +"database": "nyaav2", +"internal_queue_depth": 10000, +"es_chunk_size": 10000, +"flush_interval": 5 +} diff --git a/nyaa/api_handler.py b/nyaa/api_handler.py index 81bb595..6638af7 100644 --- a/nyaa/api_handler.py +++ b/nyaa/api_handler.py @@ -130,7 +130,8 @@ UPLOAD_API_FORM_KEYMAP = { 'is_anonymous': 'anonymous', 'is_hidden': 'hidden', 'is_complete': 'complete', - 'is_remake': 'remake' + 'is_remake': 'remake', + 'is_trusted': 'trusted' } UPLOAD_API_FORM_KEYMAP_REVERSE = {v: k for k, v in UPLOAD_API_FORM_KEYMAP.items()} UPLOAD_API_KEYS = [ @@ -140,6 +141,7 @@ UPLOAD_API_KEYS = [ 'hidden', 'complete', 'remake', + 'trusted', 'information', 'description' ] @@ -161,7 +163,7 @@ def v2_api_upload(): # Map api keys to upload form fields for key in UPLOAD_API_KEYS: mapped_key = UPLOAD_API_FORM_KEYMAP_REVERSE.get(key, key) - mapped_dict[mapped_key] = request_data.get(key) + mapped_dict[mapped_key] = request_data.get(key) or '' # Flask-WTF (very helpfully!!) automatically grabs the request form, so force a None formdata upload_form = forms.UploadForm(None, data=mapped_dict) diff --git a/nyaa/backend.py b/nyaa/backend.py index 5257f28..6be9b5d 100644 --- a/nyaa/backend.py +++ b/nyaa/backend.py @@ -68,8 +68,8 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): torrent.remake = upload_form.is_remake.data torrent.complete = upload_form.is_complete.data # Copy trusted status from user if possible - torrent.trusted = (uploading_user.level >= - models.UserLevelType.TRUSTED) if uploading_user else False + can_mark_trusted = uploading_user and uploading_user.is_trusted + torrent.trusted = upload_form.is_trusted.data if can_mark_trusted else False # Set category ids torrent.main_category_id, torrent.sub_category_id = \ upload_form.category.parsed_data.get_category_ids() @@ -100,7 +100,9 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): for directory in path_parts: current_directory = current_directory.setdefault(directory, {}) - current_directory[filename] = file_dict['length'] + # Don't add empty filenames (BitComet directory) + if filename: + current_directory[filename] = file_dict['length'] parsed_file_tree = utils.sorted_pathdict(parsed_file_tree) diff --git a/nyaa/forms.py b/nyaa/forms.py index 603f4c3..7197a61 100644 --- a/nyaa/forms.py +++ b/nyaa/forms.py @@ -153,6 +153,7 @@ class EditForm(FlaskForm): is_remake = BooleanField('Remake') is_anonymous = BooleanField('Anonymous') is_complete = BooleanField('Complete') + is_trusted = BooleanField('Trusted') information = StringField('Information', [ Length(max=255, message='Information must be at most %(max)d characters long.') @@ -200,6 +201,7 @@ class UploadForm(FlaskForm): is_remake = BooleanField('Remake') is_anonymous = BooleanField('Anonymous') is_complete = BooleanField('Complete') + is_trusted = BooleanField('Trusted') information = StringField('Information', [ Length(max=255, message='Information must be at most %(max)d characters long.') @@ -295,7 +297,7 @@ class ReportActionForm(FlaskForm): def _validate_trackers(torrent_dict, tracker_to_check_for=None): announce = torrent_dict.get('announce') - announce_string = _validate_bytes(announce, 'announce', 'utf-8') + announce_string = _validate_bytes(announce, 'announce', test_decode='utf-8') tracker_found = tracker_to_check_for and ( announce_string.lower() == tracker_to_check_for.lower()) or False @@ -307,7 +309,7 @@ def _validate_trackers(torrent_dict, tracker_to_check_for=None): for announce in announce_list: _validate_list(announce, 'announce-list item') - announce_string = _validate_bytes(announce[0], 'announce-list item url', 'utf-8') + announce_string = _validate_bytes(announce[0], 'announce-list item url', test_decode='utf-8') if tracker_to_check_for and announce_string.lower() == tracker_to_check_for.lower(): tracker_found = True @@ -323,7 +325,7 @@ def _validate_torrent_metadata(torrent_dict): assert isinstance(info_dict, dict), 'info is not a dict' encoding_bytes = torrent_dict.get('encoding', b'utf-8') - encoding = _validate_bytes(encoding_bytes, 'encoding', 'utf-8').lower() + encoding = _validate_bytes(encoding_bytes, 'encoding', test_decode='utf-8').lower() name = info_dict.get('name') _validate_bytes(name, 'name', test_decode=encoding) @@ -345,17 +347,21 @@ def _validate_torrent_metadata(torrent_dict): path_list = file_dict.get('path') _validate_list(path_list, 'path') - for path_part in path_list: + # Validate possible directory names + for path_part in path_list[:-1]: _validate_bytes(path_part, 'path part', test_decode=encoding) + # Validate actual filename, allow b'' to specify an empty directory + _validate_bytes(path_list[-1], 'filename', check_empty=False, test_decode=encoding) else: length = info_dict.get('length') _validate_number(length, 'length', check_positive=True) -def _validate_bytes(value, name='value', test_decode=None): +def _validate_bytes(value, name='value', check_empty=True, test_decode=None): assert isinstance(value, bytes), name + ' is not bytes' - assert len(value) > 0, name + ' is empty' + if check_empty: + assert len(value) > 0, name + ' is empty' if test_decode: try: return value.decode(test_decode) diff --git a/nyaa/models.py b/nyaa/models.py index d106763..609786c 100644 --- a/nyaa/models.py +++ b/nyaa/models.py @@ -8,6 +8,7 @@ from werkzeug.security import generate_password_hash, check_password_hash from sqlalchemy_fulltext import FullText import re +import base64 from markupsafe import escape as escape_markup from urllib.parse import unquote as unquote_url @@ -88,10 +89,14 @@ class Torrent(db.Model): primaryjoin=( "and_(SubCategory.id == foreign(Torrent.sub_category_id), " "SubCategory.main_category_id == Torrent.main_category_id)")) - info = db.relationship('TorrentInfo', uselist=False, back_populates='torrent') - filelist = db.relationship('TorrentFilelist', uselist=False, back_populates='torrent') - stats = db.relationship('Statistic', uselist=False, back_populates='torrent', lazy='joined') - trackers = db.relationship('TorrentTrackers', uselist=True, lazy='joined') + info = db.relationship('TorrentInfo', uselist=False, + cascade="all, delete-orphan", back_populates='torrent') + filelist = db.relationship('TorrentFilelist', uselist=False, + cascade="all, delete-orphan", back_populates='torrent') + stats = db.relationship('Statistic', uselist=False, + cascade="all, delete-orphan", back_populates='torrent', lazy='joined') + trackers = db.relationship('TorrentTrackers', uselist=True, + cascade="all, delete-orphan", lazy='joined') def __repr__(self): return '<{0} #{1.id} \'{1.display_name}\' {1.filesize}b>'.format(type(self).__name__, self) @@ -121,6 +126,14 @@ class Torrent(db.Model): # Escaped return escape_markup(self.information) + @property + def info_hash_as_b32(self): + return base64.b32encode(self.info_hash).decode('utf-8') + + @property + def info_hash_as_hex(self): + return self.info_hash.hex() + @property def magnet_uri(self): return create_magnet(self) @@ -370,15 +383,15 @@ class User(db.Model): @property def is_admin(self): - return self.level is UserLevelType.ADMIN or self.level is UserLevelType.SUPERADMIN + return self.level >= UserLevelType.ADMIN @property def is_superadmin(self): - return self.level is UserLevelType.SUPERADMIN + return self.level == UserLevelType.SUPERADMIN @property def is_trusted(self): - return self.level is UserLevelType.TRUSTED + return self.level >= UserLevelType.TRUSTED class ReportStatus(IntEnum): diff --git a/nyaa/routes.py b/nyaa/routes.py index 6fd06e4..b7a34e6 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -134,24 +134,44 @@ def get_category_id_map(): app.register_blueprint(api_handler.api_blueprint, url_prefix='/api') +def chain_get(source, *args): + ''' Tries to return values from source by the given keys. + Returns None if none match. + Note: can return a None from the source. ''' + sentinel = object() + for key in args: + value = source.get(key, sentinel) + if value is not sentinel: + return value + return None @app.route('/rss', defaults={'rss': True}) @app.route('/', defaults={'rss': False}) def home(rss): - if flask.request.args.get('page') == 'rss': - rss = True + render_as_rss = rss + req_args = flask.request.args + if req_args.get('page') == 'rss': + render_as_rss = True - term = flask.request.args.get('q', flask.request.args.get('term')) - sort = flask.request.args.get('s') - order = flask.request.args.get('o') - category = flask.request.args.get('c', flask.request.args.get('cats')) - quality_filter = flask.request.args.get('f', flask.request.args.get('filter')) - user_name = flask.request.args.get('u', flask.request.args.get('user')) - page = flask.request.args.get('p', flask.request.args.get('offset', 1, int), int) + search_term = chain_get(req_args, 'q', 'term') - per_page = app.config.get('RESULTS_PER_PAGE') - if not per_page: - per_page = DEFAULT_PER_PAGE + sort_key = req_args.get('s') + sort_order = req_args.get('o') + + category = chain_get(req_args, 'c', 'cats') + quality_filter = chain_get(req_args, 'f', 'filter') + + user_name = chain_get(req_args, 'u', 'user') + page_number = chain_get(req_args, 'p', 'page', 'offset') + try: + page_number = max(1, int(page_number)) + except (ValueError, TypeError): + page_number = 1 + + # Check simply if the key exists + use_magnet_links = 'magnets' in req_args or 'm' in req_args + + results_per_page = app.config.get('RESULTS_PER_PAGE', DEFAULT_PER_PAGE) user_id = None if user_name: @@ -162,13 +182,13 @@ def home(rss): query_args = { 'user': user_id, - 'sort': sort or 'id', - 'order': order or 'desc', + 'sort': sort_key or 'id', + 'order': sort_order or 'desc', 'category': category or '0_0', 'quality_filter': quality_filter or '0', - 'page': page, - 'rss': rss, - 'per_page': per_page + 'page': page_number, + 'rss': render_as_rss, + 'per_page': results_per_page } if flask.g.user: @@ -178,28 +198,26 @@ def home(rss): # If searching, we get results from elastic search use_elastic = app.config.get('USE_ELASTIC_SEARCH') - if use_elastic and term: - query_args['term'] = term + if use_elastic and search_term: + query_args['term'] = search_term - max_search_results = app.config.get('ES_MAX_SEARCH_RESULT') - if not max_search_results: - max_search_results = DEFAULT_MAX_SEARCH_RESULT + max_search_results = app.config.get('ES_MAX_SEARCH_RESULT', DEFAULT_MAX_SEARCH_RESULT) # Only allow up to (max_search_results / page) pages - max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) + max_page = min(query_args['page'], int(math.ceil(max_search_results / results_per_page))) query_args['page'] = max_page query_args['max_search_results'] = max_search_results query_results = search_elastic(**query_args) - if rss: - return render_rss('/', query_results, use_elastic=True) + if render_as_rss: + return render_rss('"{}"'.format(search_term), query_results, use_elastic=True, magnet_links=use_magnet_links) else: - rss_query_string = _generate_query_string(term, category, quality_filter, user_name) + rss_query_string = _generate_query_string(search_term, category, quality_filter, user_name) max_results = min(max_search_results, query_results['hits']['total']) # change p= argument to whatever you change page_parameter to or pagination breaks - pagination = Pagination(p=query_args['page'], per_page=per_page, + pagination = Pagination(p=query_args['page'], per_page=results_per_page, total=max_results, bs_version=3, page_parameter='p', display_msg=SERACH_PAGINATE_DISPLAY_MSG) return flask.render_template('home.html', @@ -213,13 +231,13 @@ def home(rss): if use_elastic: query_args['term'] = '' else: # Otherwise, use db search for everything - query_args['term'] = term or '' + query_args['term'] = search_term or '' query = search_db(**query_args) - if rss: - return render_rss('/', query, use_elastic=False) + if render_as_rss: + return render_rss('Home', query, use_elastic=False, magnet_links=use_magnet_links) else: - rss_query_string = _generate_query_string(term, category, quality_filter, user_name) + rss_query_string = _generate_query_string(search_term, category, quality_filter, user_name) # Use elastic is always false here because we only hit this section # if we're browsing without a search term (which means we default to DB) # or if ES is disabled @@ -256,39 +274,38 @@ def view_user(user_name): db.session.add(user) db.session.commit() - return flask.redirect('/user/' + user.username) + return flask.redirect(flask.url_for('view_user', user_name=user.username)) - level = 'Regular' - if user.is_admin: - level = 'Moderator' - if user.is_superadmin: # check this second because user can be admin AND superadmin - level = 'Administrator' - elif user.is_trusted: - level = 'Trusted' + user_level = ['Regular', 'Trusted', 'Moderator', 'Administrator'][user.level] - term = flask.request.args.get('q') - sort = flask.request.args.get('s') - order = flask.request.args.get('o') - category = flask.request.args.get('c') - quality_filter = flask.request.args.get('f') - page = flask.request.args.get('p') - if page: - page = int(page) + req_args = flask.request.args - per_page = app.config.get('RESULTS_PER_PAGE') - if not per_page: - per_page = DEFAULT_PER_PAGE + search_term = chain_get(req_args, 'q', 'term') + + sort_key = req_args.get('s') + sort_order = req_args.get('o') + + category = chain_get(req_args, 'c', 'cats') + quality_filter = chain_get(req_args, 'f', 'filter') + + page_number = chain_get(req_args, 'p', 'page', 'offset') + try: + page_number = max(1, int(page_number)) + except (ValueError, TypeError): + page_number = 1 + + results_per_page = app.config.get('RESULTS_PER_PAGE', DEFAULT_PER_PAGE) query_args = { - 'term': term or '', + 'term': search_term or '', 'user': user.id, - 'sort': sort or 'id', - 'order': order or 'desc', + 'sort': sort_key or 'id', + 'order': sort_order or 'desc', 'category': category or '0_0', 'quality_filter': quality_filter or '0', - 'page': page or 1, + 'page': page_number, 'rss': False, - 'per_page': per_page + 'per_page': results_per_page } if flask.g.user: @@ -297,17 +314,15 @@ def view_user(user_name): query_args['admin'] = True # Use elastic search for term searching - rss_query_string = _generate_query_string(term, category, quality_filter, user_name) + rss_query_string = _generate_query_string(search_term, category, quality_filter, user_name) use_elastic = app.config.get('USE_ELASTIC_SEARCH') - if use_elastic and term: - query_args['term'] = term + if use_elastic and search_term: + query_args['term'] = search_term - max_search_results = app.config.get('ES_MAX_SEARCH_RESULT') - if not max_search_results: - max_search_results = DEFAULT_MAX_SEARCH_RESULT + max_search_results = app.config.get('ES_MAX_SEARCH_RESULT', DEFAULT_MAX_SEARCH_RESULT) # Only allow up to (max_search_results / page) pages - max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) + max_page = min(query_args['page'], int(math.ceil(max_search_results / results_per_page))) query_args['page'] = max_page query_args['max_search_results'] = max_search_results @@ -316,7 +331,7 @@ def view_user(user_name): max_results = min(max_search_results, query_results['hits']['total']) # change p= argument to whatever you change page_parameter to or pagination breaks - pagination = Pagination(p=query_args['page'], per_page=per_page, + pagination = Pagination(p=query_args['page'], per_page=results_per_page, total=max_results, bs_version=3, page_parameter='p', display_msg=SERACH_PAGINATE_DISPLAY_MSG) return flask.render_template('user.html', @@ -327,7 +342,7 @@ def view_user(user_name): user=user, user_page=True, rss_filter=rss_query_string, - level=level, + level=user_level, admin=admin, superadmin=superadmin, form=form) @@ -336,7 +351,7 @@ def view_user(user_name): if use_elastic: query_args['term'] = '' else: - query_args['term'] = term or '' + query_args['term'] = search_term or '' query = search_db(**query_args) return flask.render_template('user.html', use_elastic=False, @@ -345,7 +360,7 @@ def view_user(user_name): user=user, user_page=True, rss_filter=rss_query_string, - level=level, + level=user_level, admin=admin, superadmin=superadmin, form=form) @@ -361,9 +376,10 @@ def _jinja2_filter_rfc822(datestr, fmt=None): return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s'))) -def render_rss(label, query, use_elastic): +def render_rss(label, query, use_elastic, magnet_links=False): rss_xml = flask.render_template('rss.xml', use_elastic=use_elastic, + magnet_links=magnet_links, term=label, site_url=flask.request.url_root, torrent_query=query) @@ -538,7 +554,8 @@ def _create_upload_category_choices(): cat_names = id_map[key] is_main_cat = key.endswith('_0') - cat_name = is_main_cat and cat_names[0] or (' - ' + cat_names[1]) + # cat_name = is_main_cat and cat_names[0] or (' - ' + cat_names[1]) + cat_name = ' - '.join(cat_names) choices.append((key, cat_name, is_main_cat)) return choices @@ -562,16 +579,17 @@ def upload(): def view_torrent(torrent_id): torrent = models.Torrent.by_id(torrent_id) + viewer = flask.g.user + if not torrent: flask.abort(404) - if torrent.deleted and (not flask.g.user or not flask.g.user.is_admin): + # Only allow admins see deleted torrents + if torrent.deleted and not (viewer and viewer.is_admin): flask.abort(404) - if flask.g.user: - can_edit = flask.g.user is torrent.user or flask.g.user.is_admin - else: - can_edit = False + # Only allow owners and admins to edit torrents + can_edit = viewer and (viewer is torrent.user or viewer.is_admin) files = None if torrent.filelist: @@ -580,6 +598,7 @@ def view_torrent(torrent_id): report_form = forms.ReportForm() return flask.render_template('view.html', torrent=torrent, files=files, + viewer=viewer, can_edit=can_edit, report_form=report_form) @@ -589,15 +608,18 @@ def edit_torrent(torrent_id): torrent = models.Torrent.by_id(torrent_id) form = forms.EditForm(flask.request.form) form.category.choices = _create_upload_category_choices() - category = str(torrent.main_category_id) + "_" + str(torrent.sub_category_id) + + editor = flask.g.user if not torrent: flask.abort(404) - if torrent.deleted and (not flask.g.user or not flask.g.user.is_admin): + # Only allow admins edit deleted torrents + if torrent.deleted and not (editor and editor.is_admin): flask.abort(404) - if not flask.g.user or (flask.g.user is not torrent.user and not flask.g.user.is_admin): + # Only allow torrent owners or admins edit torrents + if not editor or not (editor is torrent.user or editor.is_admin): flask.abort(403) if flask.request.method == 'POST' and form.validate(): @@ -607,36 +629,43 @@ def edit_torrent(torrent_id): torrent.display_name = (form.display_name.data or '').strip() torrent.information = (form.information.data or '').strip() torrent.description = (form.description.data or '').strip() - if flask.g.user.is_admin: - torrent.deleted = form.is_deleted.data + torrent.hidden = form.is_hidden.data torrent.remake = form.is_remake.data torrent.complete = form.is_complete.data torrent.anonymous = form.is_anonymous.data + if editor.is_trusted: + torrent.trusted = form.is_trusted.data + if editor.is_admin: + torrent.deleted = form.is_deleted.data + db.session.commit() flask.flash(flask.Markup( 'Torrent has been successfully edited! Changes might take a few minutes to show up.'), 'info') - return flask.redirect('/view/' + str(torrent_id)) + return flask.redirect(flask.url_for('view_torrent', torrent_id=torrent.id)) else: - # Setup form with pre-formatted form. - form.category.data = category - form.display_name.data = torrent.display_name - form.information.data = torrent.information - form.description.data = torrent.description - form.is_hidden.data = torrent.hidden - if flask.g.user.is_admin: + if flask.request.method != 'POST': + # Fill form data only if the POST didn't fail + form.category.data = torrent.sub_category.id_as_string + form.display_name.data = torrent.display_name + form.information.data = torrent.information + form.description.data = torrent.description + + form.is_hidden.data = torrent.hidden + form.is_remake.data = torrent.remake + form.is_complete.data = torrent.complete + form.is_anonymous.data = torrent.anonymous + + form.is_trusted.data = torrent.trusted form.is_deleted.data = torrent.deleted - form.is_remake.data = torrent.remake - form.is_complete.data = torrent.complete - form.is_anonymous.data = torrent.anonymous return flask.render_template('edit.html', form=form, torrent=torrent, - admin=flask.g.user.is_admin) + editor=editor) @app.route('/view//magnet') diff --git a/nyaa/static/css/main.css b/nyaa/static/css/main.css index c1367ff..07e5a6a 100644 --- a/nyaa/static/css/main.css +++ b/nyaa/static/css/main.css @@ -58,6 +58,10 @@ table.torrent-list thead th.sorting_desc:after { content: "\f0dd"; } +table.torrent-list tbody tr td a:visited { + color: #1d4568; +} + #torrent-description img { max-width: 100%; } diff --git a/nyaa/templates/_formhelpers.html b/nyaa/templates/_formhelpers.html index a84581e..2588d04 100644 --- a/nyaa/templates/_formhelpers.html +++ b/nyaa/templates/_formhelpers.html @@ -1,10 +1,12 @@ -{% macro render_field(field) %} +{% macro render_field(field, render_label=True) %} {% if field.errors %}
{% else %}
{% endif %} + {% if render_label %} {{ field.label(class='control-label') }} + {% endif %} {{ field(title=field.description,**kwargs) | safe }} {% if field.errors %}
@@ -27,33 +29,33 @@ {% macro render_markdown_editor(field, field_name='') %} {% if field.errors %} -
+
{% else %} -
+
{% endif %} -
- -
-
- {{ render_field(field, class_='form-control markdown-source') }} -
-
- {{ field.label(class='control-label') }} -
-
-
-
+
+ {{ field.label(class='control-label') }} + +
+
+ {{ render_field(field, False, class_='form-control markdown-source') }} +
+
+
+
+
+
{% endmacro %} diff --git a/nyaa/templates/edit.html b/nyaa/templates/edit.html index 6ea01d4..44dfd51 100644 --- a/nyaa/templates/edit.html +++ b/nyaa/templates/edit.html @@ -4,79 +4,73 @@ {% from "_formhelpers.html" import render_field %} {% from "_formhelpers.html" import render_markdown_editor %} -

Edit Torrent

+{% set torrent_url = url_for('view_torrent', torrent_id=torrent.id) %} +

+ Edit Torrent #{{torrent.id}} + {% if (torrent.user != None) and (torrent.user != editor) %} + (by {{ torrent.user.username }}) + {% endif %} +

{{ form.csrf_token }} +
-
- {{ render_field(form.category, class_='form-control')}} +
+ {{ render_field(form.display_name, class_='form-control', placeholder='Display name') }} +
+
+ {{ render_field(form.category, class_='form-control')}}
-
-
- {{ render_field(form.display_name, class_='form-control', placeholder='Display name') }} +
+ {{ render_field(form.information, class_='form-control', placeholder='Your website or IRC channel') }}
-
+
+ +
+ {% if editor.is_admin %} + + {% endif %} -
-
- {{ render_field(form.information, class_='form-control', placeholder='Your website or IRC channel') }} -
-
- -
-
- {{ render_markdown_editor(form.description, field_name='description') }} -
-
- - {% if admin %} -
-
- -
-
- {% endif %} - -
-
-
-
- -
-
-
-
- -
-
-
-
- +
+ {{ render_markdown_editor(form.description, field_name='description') }}
diff --git a/nyaa/templates/rss.xml b/nyaa/templates/rss.xml index e1787d2..7664da9 100644 --- a/nyaa/templates/rss.xml +++ b/nyaa/templates/rss.xml @@ -1,37 +1,45 @@ - {{ config.SITE_NAME }} Torrent File RSS (No magnets) + {{ config.SITE_NAME }} Torrent File RSS RSS Feed for {{ term }} {{ url_for('home', _external=True) }} {% for torrent in torrent_query %} - {% if torrent.has_torrent %} {{ torrent.display_name }} + {# #} {% if use_elastic %} - {{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }} - {{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }} - {{ torrent.created_time|rfc822_es }} + {% if torrent.has_torrent and not magnet_links %} + {{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }} + {% else %} + {{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }} + {% endif %} + {{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }} + {{ torrent.created_time|rfc822_es }} + + {{- torrent.seed_count }} + {{- torrent.leech_count }} + {{- torrent.download_count }} + {{- torrent.info_hash }} {% else %} - {{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }} - {{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }} - {{ torrent.created_time|rfc822 }} - {% endif %} - - {% else %} - - {{ torrent.display_name }} - {% if use_elastic %} - {{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }} - {{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }} - {{ torrent.created_time|rfc822_es }} - {% else %} - {{ torrent.magnet_uri }} - {{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }} - {{ torrent.created_time|rfc822 }} + {% if torrent.has_torrent and not magnet_links %} + {{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }} + {% else %} + {{ torrent.magnet_uri }} + {% endif %} + {{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }} + {{ torrent.created_time|rfc822 }} + + {{- torrent.stats.seed_count }} + {{- torrent.stats.leech_count }} + {{- torrent.stats.download_count }} + {{- torrent.info_hash_as_hex }} {% endif %} + {% set cat_id = use_elastic and ((torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string)) or torrent.sub_category.id_as_string %} + {{- cat_id }} + {{- category_name(cat_id) }} + {{- torrent.filesize | filesizeformat(True) }} - {% endif %} {% endfor %} diff --git a/nyaa/templates/upload.html b/nyaa/templates/upload.html index 0a49282..778a37d 100644 --- a/nyaa/templates/upload.html +++ b/nyaa/templates/upload.html @@ -16,68 +16,57 @@ {% if config.ENFORCE_MAIN_ANNOUNCE_URL %}

Important: Please include {{config.MAIN_ANNOUNCE_URL}} in your trackers

{% endif %}
-
- {{ render_upload(form.torrent_file, accept=".torrent") }} +
+ {{ render_upload(form.torrent_file, accept=".torrent") }}
-
-
- {{ render_field(form.category, class_='form-control')}} +
+ {{ render_field(form.display_name, class_='form-control', placeholder='Display name') }} +
+
+ {{ render_field(form.category, class_='form-control')}}
-
-
- {{ render_field(form.display_name, class_='form-control', placeholder='Display name') }} -
+
- -
-
+
+
{{ render_field(form.information, class_='form-control', placeholder='Your website or IRC channel') }}
-
- -
-
- {{ render_markdown_editor(form.description, field_name='description') }} -
-
- -
-
-
+
+
+ {{ render_markdown_editor(form.description, field_name='description') }}
diff --git a/nyaa/templates/view.html b/nyaa/templates/view.html index 21d40d7..61512e9 100644 --- a/nyaa/templates/view.html +++ b/nyaa/templates/view.html @@ -6,7 +6,7 @@

{% if can_edit %} - + {% endif %} {{ torrent.display_name }}

@@ -24,7 +24,14 @@
Submitter:
-
{% if not torrent.anonymous and torrent.user %}{{ torrent.user.username }}{% else %}Anonymous{% endif %}
+
+ {% set user_url = torrent.user and url_for('view_user', user_name=torrent.user.username) %} + {%- if not torrent.anonymous and torrent.user -%} + {{ torrent.user.username }} + {%- else -%} + Anonymous {% if torrent.user and (viewer == torrent.user or viewer.is_admin) %}({{ torrent.user.username }}){% endif %} + {%- endif -%} +
Seeders:
{% if config.ENABLE_SHOW_STATS %}{{ torrent.stats.seed_count }}{% else %}Coming soon{% endif %}
diff --git a/requirements.txt b/requirements.txt index 843b935..6e23eca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,4 +37,5 @@ elasticsearch==5.3.0 elasticsearch-dsl==5.2.0 progressbar2==3.20.0 mysql-replication==0.13 -flask-paginate==0.4.5 \ No newline at end of file +flask-paginate==0.4.5 +statsd==3.2.1 diff --git a/sync_es.py b/sync_es.py index 4cbd9f2..cafafb5 100644 --- a/sync_es.py +++ b/sync_es.py @@ -21,9 +21,12 @@ when import_to_es and sync_es will be lost. Instead, you can run SHOW MASTER STATUS _before_ you run import_to_es. That way you'll definitely pick up any changes that happen while the import_to_es script is dumping stuff from the database into es, at the expense of redoing a (small) amount of indexing. + +This uses multithreading so we don't have to block on socket io (both binlog +reading and es POSTing). asyncio soon™ """ from elasticsearch import Elasticsearch -from elasticsearch.helpers import bulk +from elasticsearch.helpers import bulk, BulkIndexError from pymysqlreplication import BinLogStreamReader from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent from datetime import datetime @@ -32,51 +35,39 @@ import sys import json import time import logging +from statsd import StatsClient +from threading import Thread +from queue import Queue, Empty -logging.basicConfig() +logging.basicConfig(format='%(asctime)s %(levelname)s %(name)s - %(message)s') log = logging.getLogger('sync_es') log.setLevel(logging.INFO) +# config in json, 2lazy to argparse +if len(sys.argv) != 2: + print("need config.json location", file=sys.stderr) + sys.exit(-1) +with open(sys.argv[1]) as f: + config = json.load(f) + +# goes to netdata or other statsd listener +stats = StatsClient('localhost', 8125, prefix="sync_es") + #logging.getLogger('elasticsearch').setLevel(logging.DEBUG) # in prod want in /var/lib somewhere probably -SAVE_LOC = "/var/lib/sync_es_position.json" -MYSQL_HOST = '127.0.0.1' -MYSQL_PORT = 3306 -MYSQL_USER = 'test' -MYSQL_PW = 'test123' -NT_DB = 'nyaav2' - -with open(SAVE_LOC) as f: - pos = json.load(f) - -es = Elasticsearch(timeout=30) - -stream = BinLogStreamReader( - # TODO parse out from config.py or something - connection_settings = { - 'host': MYSQL_HOST, - 'port': MYSQL_PORT, - 'user': MYSQL_USER, - 'passwd': MYSQL_PW - }, - server_id=10, # arbitrary - # only care about this database currently - only_schemas=[NT_DB], - # these tables in the database - only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"], - # from our save file - resume_stream=True, - log_file=pos['log_file'], - log_pos=pos['log_pos'], - # skip the other stuff like table mapping - only_events=[UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent], - # if we're at the head of the log, block until something happens - # note it'd be nice to block async-style instead, but the mainline - # binlogreader is synchronous. there is an (unmaintained?) fork - # using aiomysql if anybody wants to revive that. - blocking=True) +SAVE_LOC = config.get('save_loc', "/tmp/pos.json") +MYSQL_HOST = config.get('mysql_host', '127.0.0.1') +MYSQL_PORT = config.get('mysql_port', 3306) +MYSQL_USER = config.get('mysql_user', 'root') +MYSQL_PW = config.get('mysql_password', 'dunnolol') +NT_DB = config.get('database', 'nyaav2') +INTERNAL_QUEUE_DEPTH = config.get('internal_queue_depth', 10000) +ES_CHUNK_SIZE = config.get('es_chunk_size', 10000) +# seconds since no events happening to flush to es. remember this also +# interacts with es' refresh_interval setting. +FLUSH_INTERVAL = config.get('flush_interval', 5) def reindex_torrent(t, index_name): # XXX annoyingly different from import_to_es, and @@ -120,8 +111,8 @@ def reindex_torrent(t, index_name): def reindex_stats(s, index_name): # update the torrent at torrent_id, assumed to exist; # this will always be the case if you're reading the binlog - # in order; the foreign key constraint on torrrent_id prevents - # the stats row rom existing if the torrent isn't around. + # in order; the foreign key constraint on torrent_id prevents + # the stats row from existing if the torrent isn't around. return { '_op_type': 'update', '_index': index_name, @@ -141,45 +132,176 @@ def delet_this(row, index_name): '_type': 'torrent', '_id': str(row['values']['id'])} -n = 0 -last_save = time.time() -for event in stream: - if event.table == "nyaa_torrents" or event.table == "sukebei_torrents": - if event.table == "nyaa_torrents": - index_name = "nyaa" - else: - index_name = "sukebei" - if type(event) is WriteRowsEvent: - bulk(es, (reindex_torrent(row['values'], index_name) for row in event.rows)) - elif type(event) is UpdateRowsEvent: - # UpdateRowsEvent includes the old values too, but we don't care - bulk(es, (reindex_torrent(row['after_values'], index_name) for row in event.rows)) - elif type(event) is DeleteRowsEvent: - # ok, bye - bulk(es, (delet_this(row, index_name) for row in event.rows)) - else: - raise Exception(f"unknown event {type(event)}") - elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics": - if event.table == "nyaa_statistics": - index_name = "nyaa" - else: - index_name = "sukebei" - if type(event) is WriteRowsEvent: - bulk(es, (reindex_stats(row['values'], index_name) for row in event.rows)) - elif type(event) is UpdateRowsEvent: - bulk(es, (reindex_stats(row['after_values'], index_name) for row in event.rows)) - elif type(event) is DeleteRowsEvent: - # uh ok. assume that the torrent row will get deleted later, - # which will clean up the entire es "torrent" document - pass - else: - raise Exception(f"unknown event {type(event)}") - else: - raise Exception(f"unknown table {s.table}") +class BinlogReader(Thread): + # write_buf is the Queue we communicate with + def __init__(self, write_buf): + Thread.__init__(self) + self.write_buf = write_buf - n += 1 - if n % 100 == 0 or time.time() - last_save > 30: - log.info(f"saving position {stream.log_file}/{stream.log_pos}") - with open(SAVE_LOC, 'w') as f: - json.dump({"log_file": stream.log_file, "log_pos": stream.log_pos}, f) + def run(self): + with open(SAVE_LOC) as f: + pos = json.load(f) + + stream = BinLogStreamReader( + # TODO parse out from config.py or something + connection_settings = { + 'host': MYSQL_HOST, + 'port': MYSQL_PORT, + 'user': MYSQL_USER, + 'passwd': MYSQL_PW + }, + server_id=10, # arbitrary + # only care about this database currently + only_schemas=[NT_DB], + # these tables in the database + only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"], + # from our save file + resume_stream=True, + log_file=pos['log_file'], + log_pos=pos['log_pos'], + # skip the other stuff like table mapping + only_events=[UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent], + # if we're at the head of the log, block until something happens + # note it'd be nice to block async-style instead, but the mainline + # binlogreader is synchronous. there is an (unmaintained?) fork + # using aiomysql if anybody wants to revive that. + blocking=True) + + log.info(f"reading binlog from {stream.log_file}/{stream.log_pos}") + + for event in stream: + # save the pos of the stream and timestamp with each message, so we + # can commit in the other thread. and keep track of process latency + pos = (stream.log_file, stream.log_pos, event.timestamp) + with stats.pipeline() as s: + s.incr('total_events') + s.incr(f"event.{event.table}.{type(event).__name__}") + s.incr('total_rows', len(event.rows)) + s.incr(f"rows.{event.table}.{type(event).__name__}", len(event.rows)) + # XXX not a "timer", but we get a histogram out of it + s.timing(f"rows_per_event.{event.table}.{type(event).__name__}", len(event.rows)) + + if event.table == "nyaa_torrents" or event.table == "sukebei_torrents": + if event.table == "nyaa_torrents": + index_name = "nyaa" + else: + index_name = "sukebei" + if type(event) is WriteRowsEvent: + for row in event.rows: + self.write_buf.put( + (pos, reindex_torrent(row['values'], index_name)), + block=True) + elif type(event) is UpdateRowsEvent: + # UpdateRowsEvent includes the old values too, but we don't care + for row in event.rows: + self.write_buf.put( + (pos, reindex_torrent(row['after_values'], index_name)), + block=True) + elif type(event) is DeleteRowsEvent: + # ok, bye + for row in event.rows: + self.write_buf.put((pos, delet_this(row, index_name)), block=True) + else: + raise Exception(f"unknown event {type(event)}") + elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics": + if event.table == "nyaa_statistics": + index_name = "nyaa" + else: + index_name = "sukebei" + if type(event) is WriteRowsEvent: + for row in event.rows: + self.write_buf.put( + (pos, reindex_stats(row['values'], index_name)), + block=True) + elif type(event) is UpdateRowsEvent: + for row in event.rows: + self.write_buf.put( + (pos, reindex_stats(row['after_values'], index_name)), + block=True) + elif type(event) is DeleteRowsEvent: + # uh ok. Assume that the torrent row will get deleted later, + # which will clean up the entire es "torrent" document + pass + else: + raise Exception(f"unknown event {type(event)}") + else: + raise Exception(f"unknown table {s.table}") + +class EsPoster(Thread): + # read_buf is the queue of stuff to bulk post + def __init__(self, read_buf, chunk_size=1000, flush_interval=5): + Thread.__init__(self) + self.read_buf = read_buf + self.chunk_size = chunk_size + self.flush_interval = flush_interval + + def run(self): + es = Elasticsearch(timeout=30) + + last_save = time.time() + since_last = 0 + + while True: + actions = [] + while len(actions) < self.chunk_size: + try: + # grab next event from queue with metadata that creepily + # updates, surviving outside the scope of the loop + ((log_file, log_pos, timestamp), action) = \ + self.read_buf.get(block=True, timeout=self.flush_interval) + actions.append(action) + except Empty: + # nothing new for the whole interval + break + + if not actions: + # nothing to post + log.debug("no changes...") + continue + + # XXX "time" to get histogram of no events per bulk + stats.timing('actions_per_bulk', len(actions)) + + try: + with stats.timer('post_bulk'): + bulk(es, actions, chunk_size=self.chunk_size) + except BulkIndexError as bie: + # in certain cases where we're really out of sync, we update a + # stat when the torrent doc is, causing a "document missing" + # error from es, with no way to suppress that server-side. + # Thus ignore that type of error if it's the only problem + for e in bie.errors: + try: + if e['update']['error']['type'] != 'document_missing_exception': + raise bie + except KeyError: + raise bie + + # how far we're behind, wall clock + stats.gauge('process_latency', int((time.time() - timestamp) * 1000)) + + since_last += len(actions) + if since_last >= 10000 or (time.time() - last_save) > 10: + log.info(f"saving position {log_file}/{log_pos}, {time.time() - timestamp:,.3f} seconds behind") + with stats.timer('save_pos'): + with open(SAVE_LOC, 'w') as f: + json.dump({"log_file": log_file, "log_pos": log_pos}, f) + last_save = time.time() + since_last = 0 + +# in-memory queue between binlog and es. The bigger it is, the more events we +# can parse in memory while waiting for es to catch up, at the expense of heap. +buf = Queue(maxsize=INTERNAL_QUEUE_DEPTH) + +reader = BinlogReader(buf) +reader.daemon = True +writer = EsPoster(buf, chunk_size=ES_CHUNK_SIZE, flush_interval=FLUSH_INTERVAL) +writer.daemon = True +reader.start() +writer.start() + +# on the main thread, poll the queue size for monitoring +while True: + stats.gauge('queue_depth', buf.qsize()) + time.sleep(1) diff --git a/utils/api_uploader_v2.py b/utils/api_uploader_v2.py index 70d6371..60d3c36 100755 --- a/utils/api_uploader_v2.py +++ b/utils/api_uploader_v2.py @@ -87,6 +87,11 @@ tor_group.add_argument('-H', '--hidden', default=False, action='store_true', hel tor_group.add_argument('-C', '--complete', default=False, action='store_true', help='Mark torrent as complete (eg. season batch)') tor_group.add_argument('-R', '--remake', default=False, action='store_true', help='Mark torrent as remake (derivative work from another release)') +trusted_group = tor_group.add_mutually_exclusive_group(required=False) +trusted_group.add_argument('-T', '--trusted', dest='trusted', action='store_true', help='Mark torrent as trusted, if possible. Defaults to true') +trusted_group.add_argument('--no-trusted', dest='trusted', action='store_false', help='Do not mark torrent as trusted') +parser.set_defaults(trusted=True) + tor_group.add_argument('torrent', metavar='TORRENT_FILE', help='The .torrent file to upload') @@ -145,6 +150,7 @@ if __name__ == "__main__": 'hidden' : args.hidden, 'complete' : args.complete, 'remake' : args.remake, + 'trusted' : args.trusted, } encoded_data = { 'torrent_data' : json.dumps(data)