diff --git a/README.md b/README.md index 5ec0077..632ce73 100644 --- a/README.md +++ b/README.md @@ -44,5 +44,43 @@ - Start the dev server with `python run.py` - Deactivate `source deactivate` +# Enabling ElasticSearch + +## Basics +- Install jdk `sudo apt-get install openjdk-8-jdk` +- Install elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html +- `sudo systemctl enable elasticsearch.service` +- `sudo systemctl start elasticsearch.service` +- Run `curl -XGET 'localhost:9200'` and make sure ES is running +- Optional: install Kabana as a search frontend for ES + +## Enable MySQL Binlogging +- Add the `[mariadb]` bin-log section to my.cnf and reload mysql server +- Connect to mysql +- `SHOW VARIABLES LIKE 'binlog_format';` + - Make sure it shows ROW +- Connect to root user +- `GRANT REPLICATION SLAVE ON *.* TO 'test'@'localhost';` where test is the user you will be running `sync_es.py` with + +## Setting up ES +- Run `./create_es.sh` and this creates two indicies: `nyaa` and `sukebei` +- The output should show `akncolwedged: true` twice +- The safest bet is to disable the webapp here to ensure there's no database writes +- Run `python import_to_es.py` with `SITE_FLAVOR` set to `nyaa` +- Run `python import_to_es.py` with `SITE_FLAVOR` set to `sukebei` +- These will take some time to run as it's indexing + +## Setting up sync_es.py +- Sync_es.py keeps the ElasticSearch index updated by reading the BinLog +- Configure the MySQL options with the user where you granted the REPLICATION permissions +- Connect to MySQL, run `SHOW MASTER STATUS;`. +- Copy the output to `/var/lib/sync_es_position.json` with the contents `{"log_file": "FILE", "log_pos": POSITION}` and replace FILENAME with File (something like master1-bin.000002) in the SQL output and POSITION (something like 892528513) with Position +- Set up `sync_es.py` as a service and run it, preferably as the system/root +- Make sure `sync_es.py` runs within venv with the right dependencies + +## Good to go! +- After that, enable the `USE_ELASTIC_SEARCH` flag and restart the webapp and you're good to go + + ## Code Quality: - Remember to follow PEP8 style guidelines and run `./lint.sh` before committing. diff --git a/config.example.py b/config.example.py index f34c554..73702b9 100644 --- a/config.example.py +++ b/config.example.py @@ -33,8 +33,6 @@ MAIL_FROM_ADDRESS = '***' SMTP_USERNAME = '***' SMTP_PASSWORD = '***' -RESULTS_PER_PAGE = 75 - # What the site identifies itself as. SITE_NAME = 'Nyaa' @@ -49,3 +47,14 @@ ENFORCE_MAIN_ANNOUNCE_URL = False MAIN_ANNOUNCE_URL = '' BACKUP_TORRENT_FOLDER = 'torrents' + +# +# Search Options +# +# Max ES search results, do not set over 10000 +RESULTS_PER_PAGE = 75 + +USE_ELASTIC_SEARCH = False +ENABLE_ELASTIC_SEARCH_HIGHLIGHT = False +ES_MAX_SEARCH_RESULT = 1000 +ES_INDEX_NAME = SITE_FLAVOR # we create indicies named nyaa or sukebei \ No newline at end of file diff --git a/my.cnf b/configs/my.cnf similarity index 67% rename from my.cnf rename to configs/my.cnf index 657a8f6..d586484 100644 --- a/my.cnf +++ b/configs/my.cnf @@ -4,3 +4,9 @@ ft_min_word_len=2 innodb_ft_cache_size = 80000000 innodb_ft_total_cache_size = 1600000000 max_allowed_packet = 100M + +[mariadb] +log-bin +server_id=1 +log-basename=master1 +binlog-format = row diff --git a/create_es.sh b/create_es.sh new file mode 100755 index 0000000..5b0c564 --- /dev/null +++ b/create_es.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +# create indicies named "nyaa" and "sukebei", these are hardcoded +curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml +curl -v -XPUT 'localhost:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml diff --git a/es_mapping.yml b/es_mapping.yml new file mode 100644 index 0000000..9085ec2 --- /dev/null +++ b/es_mapping.yml @@ -0,0 +1,91 @@ +--- +# CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml +# fo inline comments. +settings: + analysis: + analyzer: + my_search_analyzer: + type: custom + tokenizer: standard + char_filter: + - my_char_filter + filter: + - standard + - lowercase + my_index_analyzer: + type: custom + tokenizer: standard + char_filter: + - my_char_filter + filter: + - lowercase + - my_ngram + filter: + my_ngram: + type: edgeNGram + min_gram: 1 + max_gram: 15 + char_filter: + my_char_filter: + type: mapping + mappings: ["-=>_", "!=>_"] + index: + # we're running a single es node, so no sharding necessary, + # plus replicas don't really help either. + number_of_shards: 1 + number_of_replicas : 0 + mapper: + # disable elasticsearch's "helpful" autoschema + dynamic: false + # since we disabled the _all field, default query the + # name of the torrent. + query: + default_field: display_name +mappings: + torrent: + # don't want everything concatenated + _all: + enabled: false + properties: + id: + type: long + display_name: + # TODO could do a fancier tokenizer here to parse out the + # the scene convention of stuff in brackets, plus stuff like k-on + type: text + analyzer: my_index_analyzer + fielddata: true + created_time: + type: date + # Only in the ES index for generating magnet links + info_hash: + enabled: false + filesize: + type: long + anonymous: + type: boolean + trusted: + type: boolean + remake: + type: boolean + complete: + type: boolean + hidden: + type: boolean + deleted: + type: boolean + has_torrent: + type: boolean + download_count: + type: long + leech_count: + type: long + seed_count: + type: long + # these ids are really only for filtering, thus keyword + uploader_id: + type: keyword + main_category_id: + type: keyword + sub_category_id: + type: keyword \ No newline at end of file diff --git a/import_to_es.py b/import_to_es.py new file mode 100644 index 0000000..106cbc1 --- /dev/null +++ b/import_to_es.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +""" +Bulk load torents from mysql into elasticsearch `nyaav2` index, +which is assumed to already exist. +This is a one-shot deal, so you'd either need to complement it +with a cron job or some binlog-reading thing (TODO) +""" +from nyaa import app +from nyaa.models import Torrent +from elasticsearch import Elasticsearch +from elasticsearch.client import IndicesClient +from elasticsearch import helpers +import progressbar +import sys + +bar = progressbar.ProgressBar( + max_value=Torrent.query.count(), + widgets=[ + progressbar.SimpleProgress(), + ' [', progressbar.Timer(), '] ', + progressbar.Bar(), + ' (', progressbar.ETA(), ') ', + ]) + +es = Elasticsearch(timeout=30) +ic = IndicesClient(es) + +# turn into thing that elasticsearch indexes. We flatten in +# the stats (seeders/leechers) so we can order by them in es naturally. +# we _don't_ dereference uploader_id to the user's display name however, +# instead doing that at query time. I _think_ this is right because +# we don't want to reindex all the user's torrents just because they +# changed their name, and we don't really want to FTS search on the user anyway. +# Maybe it's more convenient to derefence though. +def mk_es(t): + return { + "_id": t.id, + "_type": "torrent", + "_index": app.config['ES_INDEX_NAME'], + "_source": { + # we're also indexing the id as a number so you can + # order by it. seems like this is just equivalent to + # order by created_time, but oh well + "id": t.id, + "display_name": t.display_name, + "created_time": t.created_time, + # not analyzed but included so we can render magnet links + # without querying sql again. + "info_hash": t.info_hash.hex(), + "filesize": t.filesize, + "uploader_id": t.uploader_id, + "main_category_id": t.main_category_id, + "sub_category_id": t.sub_category_id, + # XXX all the bitflags are numbers + "anonymous": bool(t.anonymous), + "trusted": bool(t.trusted), + "remake": bool(t.remake), + "complete": bool(t.complete), + # TODO instead of indexing and filtering later + # could delete from es entirely. Probably won't matter + # for at least a few months. + "hidden": bool(t.hidden), + "deleted": bool(t.deleted), + "has_torrent": t.has_torrent, + # Stats + "download_count": t.stats.download_count, + "leech_count": t.stats.leech_count, + "seed_count": t.stats.seed_count, + } + } + +# page through an sqlalchemy query, like the per_fetch but +# doesn't break the eager joins its doing against the stats table. +# annoying that this isn't built in somehow. +def page_query(query, limit=sys.maxsize, batch_size=10000): + start = 0 + while True: + # XXX very inelegant way to do this, i'm confus + stop = min(limit, start + batch_size) + if stop == start: + break + things = query.slice(start, stop) + if not things: + break + had_things = False + for thing in things: + had_things = True + yield(thing) + if not had_things or stop == limit: + break + bar.update(start) + start = min(limit, start + batch_size) + +# turn off refreshes while bulk loading +ic.put_settings(body={'index': {'refresh_interval': '-1'}}, index=app.config['ES_INDEX_NAME']) + +helpers.bulk(es, (mk_es(t) for t in page_query(Torrent.query)), chunk_size=10000) + +# restore to near-enough real time +ic.put_settings(body={'index': {'refresh_interval': '30s'}}, index=app.config['ES_INDEX_NAME']) diff --git a/nyaa/__init__.py b/nyaa/__init__.py index aeda6be..7e934cb 100644 --- a/nyaa/__init__.py +++ b/nyaa/__init__.py @@ -60,4 +60,4 @@ assets = Environment(app) # output='style.css', depends='**/*.scss') # assets.register('style_all', css) -from nyaa import routes +from nyaa import routes # noqa diff --git a/nyaa/api_handler.py b/nyaa/api_handler.py index 7ecd720..2ce1d70 100644 --- a/nyaa/api_handler.py +++ b/nyaa/api_handler.py @@ -10,7 +10,7 @@ from orderedset import OrderedSet from werkzeug import secure_filename DEBUG_API = False -#################################### API ROUTES #################################### +# #################################### API ROUTES #################################### CATEGORIES = [ ('Anime', ['Anime Music Video', 'English-translated', 'Non-English-translated', 'Raw']), ('Audio', ['Lossless', 'Lossy']), @@ -30,7 +30,7 @@ def validate_main_sub_cat(main_cat_name, sub_cat_name): cat_id = main_cat.id_as_string sub_cat_id = sub_cat.id_as_string cat_sub_cat = sub_cat_id.split('_') - #print('cat: {0} sub_cat: {1}'.format(cat_sub_cat[0], cat_sub_cat[1])) + # print('cat: {0} sub_cat: {1}'.format(cat_sub_cat[0], cat_sub_cat[1])) return True, cat_sub_cat[0], cat_sub_cat[1] @@ -112,17 +112,22 @@ def api_upload(upload_request): if DEBUG_API: print(json.dumps(j, indent=4)) - _json_keys = ['username', 'password', - 'display_name', 'main_cat', 'sub_cat', 'flags'] # 'information' and 'description' are not required + _json_keys = ['username', + 'password', + 'display_name', + 'main_cat', + 'sub_cat', + 'flags'] # 'information' and 'description' are not required # Check that required fields are present for _k in _json_keys: if _k not in j.keys(): - return flask.make_response(flask.jsonify({"Error": "Missing JSON field: {0}.".format(_k)}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Missing JSON field: {0}.".format(_k)}), 400) # Check that no extra fields are present for k in j.keys(): - if k not in ['username', 'password', - 'display_name', 'main_cat', 'sub_cat', 'information', 'description', 'flags']: - return flask.make_response(flask.jsonify({"Error": "Incorrect JSON field(s)."}), 400) + if k not in set(_json_keys + ['information', 'description']): + return flask.make_response(flask.jsonify( + {"Error": "Incorrect JSON field(s)."}), 400) else: return flask.make_response(flask.jsonify({"Error": "No metadata."}), 400) if 'torrent' in upload_request.files: @@ -143,14 +148,17 @@ def api_upload(upload_request): if not user: user = models.User.by_email(username) - if not user or password != user.password_hash or user.status == models.UserStatusType.INACTIVE: - return flask.make_response(flask.jsonify({"Error": "Incorrect username or password"}), 403) + if (not user or password != user.password_hash + or user.status == models.UserStatusType.INACTIVE): + return flask.make_response(flask.jsonify( + {"Error": "Incorrect username or password"}), 403) current_user = user display_name = j['display_name'] if (len(display_name) < 3) or (len(display_name) > 1024): - return flask.make_response(flask.jsonify({"Error": "Torrent name must be between 3 and 1024 characters."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Torrent name must be between 3 and 1024 characters."}), 400) main_cat_name = j['main_cat'] sub_cat_name = j['sub_cat'] @@ -158,14 +166,16 @@ def api_upload(upload_request): cat_subcat_status, cat_id, sub_cat_id = validate_main_sub_cat( main_cat_name, sub_cat_name) if not cat_subcat_status: - return flask.make_response(flask.jsonify({"Error": "Incorrect Category / Sub-Category."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Incorrect Category / Sub-Category."}), 400) # TODO Sanitize information information = None try: information = j['information'] if len(information) > 255: - return flask.make_response(flask.jsonify({"Error": "Information is limited to 255 characters."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Information is limited to 255 characters."}), 400) except Exception as e: information = '' @@ -173,8 +183,10 @@ def api_upload(upload_request): description = None try: description = j['description'] - if len(description) > (10 * 1024): - return flask.make_response(flask.jsonify({"Error": "Description is limited to {0} characters.".format(10 * 1024)}), 403) + limit = 10 * 1024 + if len(description) > limit: + return flask.make_response(flask.jsonify( + {"Error": "Description is limited to {0} characters.".format(limit)}), 403) except Exception as e: description = '' @@ -182,13 +194,15 @@ def api_upload(upload_request): if v_flags: torrent_flags = j['flags'] else: - return flask.make_response(flask.jsonify({"Error": "Incorrect torrent flags."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Incorrect torrent flags."}), 400) torrent_status, torrent_data = validate_torrent_file( torrent_file.filename, torrent_file.read()) # Needs validation if not torrent_status: - return flask.make_response(flask.jsonify({"Error": "Invalid or Duplicate torrent file."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Invalid or Duplicate torrent file."}), 400) # The torrent has been validated and is safe to access with ['foo'] etc - all relevant # keys and values have been checked for (see UploadForm in forms.py for details) @@ -297,21 +311,24 @@ def api_upload(upload_request): # Store tracker refs in DB for order, tracker in enumerate(db_trackers): torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, - tracker_id=tracker.id, order=order) + tracker_id=tracker.id, order=order) db.session.add(torrent_tracker) db.session.commit() if app.config.get('BACKUP_TORRENT_FOLDER'): torrent_file.seek(0, 0) - torrent_path = os.path.join(app.config['BACKUP_TORRENT_FOLDER'], '{}.{}'.format(torrent.id, secure_filename(torrent_file.filename))) + torrent_path = os.path.join(app.config['BACKUP_TORRENT_FOLDER'], '{}.{}'.format( + torrent.id, secure_filename(torrent_file.filename))) torrent_file.save(torrent_path) torrent_file.close() - #print('Success? {0}'.format(torrent.id)) - return flask.make_response(flask.jsonify({"Success": "Request was processed {0}".format(torrent.id)}), 200) + # print('Success? {0}'.format(torrent.id)) + return flask.make_response(flask.jsonify( + {"Success": "Request was processed {0}".format(torrent.id)}), 200) except Exception as e: print('Exception: {0}'.format(e)) - return flask.make_response(flask.jsonify({"Error": "Incorrect JSON. Please see HELP page for examples."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Incorrect JSON. Please see HELP page for examples."}), 400) else: return flask.make_response(flask.jsonify({"Error": "Bad request"}), 400) diff --git a/nyaa/backend.py b/nyaa/backend.py index 240a895..5b10397 100644 --- a/nyaa/backend.py +++ b/nyaa/backend.py @@ -72,7 +72,8 @@ def handle_torrent_upload(upload_form, uploading_user=None): models.UserLevelType.TRUSTED) if uploading_user else False # Set category ids - torrent.main_category_id, torrent.sub_category_id = upload_form.category.parsed_data.get_category_ids() + torrent.main_category_id, torrent.sub_category_id = \ + upload_form.category.parsed_data.get_category_ids() # print('Main cat id: {0}, Sub cat id: {1}'.format( # torrent.main_category_id, torrent.sub_category_id)) @@ -142,7 +143,7 @@ def handle_torrent_upload(upload_form, uploading_user=None): # Store tracker refs in DB for order, tracker in enumerate(db_trackers): torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, - tracker_id=tracker.id, order=order) + tracker_id=tracker.id, order=order) db.session.add(torrent_tracker) db.session.commit() @@ -156,8 +157,9 @@ def handle_torrent_upload(upload_form, uploading_user=None): if not os.path.exists(torrent_dir): os.makedirs(torrent_dir) - torrent_path = os.path.join(torrent_dir, '{}.{}'.format(torrent.id, secure_filename(torrent_file.filename))) + torrent_path = os.path.join(torrent_dir, '{}.{}'.format( + torrent.id, secure_filename(torrent_file.filename))) torrent_file.save(torrent_path) torrent_file.close() - return torrent \ No newline at end of file + return torrent diff --git a/nyaa/fix_paginate.py b/nyaa/fix_paginate.py index 38b7808..59334b3 100644 --- a/nyaa/fix_paginate.py +++ b/nyaa/fix_paginate.py @@ -1,6 +1,7 @@ from flask_sqlalchemy import Pagination, BaseQuery from flask import abort + def paginate_faste(self, page=1, per_page=50, max_page=None, step=5): if page < 1: abort(404) @@ -25,4 +26,5 @@ def paginate_faste(self, page=1, per_page=50, max_page=None, step=5): return Pagination(self, page, per_page, total, items) + BaseQuery.paginate_faste = paginate_faste diff --git a/nyaa/forms.py b/nyaa/forms.py index bd681ae..97726ab 100644 --- a/nyaa/forms.py +++ b/nyaa/forms.py @@ -72,23 +72,23 @@ class RegisterForm(FlaskForm): class ProfileForm(FlaskForm): - email = TextField('New email address', [ + email = TextField('New Email Address', [ Email(), Optional(), Length(min=5, max=128), - Unique(User, User.email, 'Email is taken') + Unique(User, User.email, 'This email address has been taken') ]) - current_password = PasswordField('Current password', [Optional()]) + current_password = PasswordField('Current Password', [Required()]) - new_password = PasswordField('New password (confirm)', [ + new_password = PasswordField('New Password', [ Optional(), - EqualTo('password_confirm', message='Passwords must match'), + EqualTo('password_confirm', message='Two passwords must match'), Length(min=6, max=1024, message='Password must be at least %(min)d characters long.') ]) - password_confirm = PasswordField('Repeat Password') + password_confirm = PasswordField('Repeat New Password') # Classes for a SelectField that can be set to disable options (id, name, disabled) @@ -126,7 +126,8 @@ class DisabledSelectField(SelectField): class EditForm(FlaskForm): display_name = TextField('Torrent display name', [ Length(min=3, max=255, - message='Torrent display name must be at least %(min)d characters long and %(max)d at most.') + message='Torrent display name must be at least %(min)d characters long ' + 'and %(max)d at most.') ]) category = DisabledSelectField('Category') @@ -172,7 +173,8 @@ class UploadForm(FlaskForm): display_name = TextField('Torrent display name (optional)', [ Optional(), Length(min=3, max=255, - message='Torrent display name must be at least %(min)d characters long and %(max)d at most.') + message='Torrent display name must be at least %(min)d characters long and ' + '%(max)d at most.') ]) # category = SelectField('Category') @@ -209,7 +211,7 @@ class UploadForm(FlaskForm): # Decode and ensure data is bencoded data try: torrent_dict = bencode.decode(field.data) - #field.data.close() + # field.data.close() except (bencode.MalformedBencodeException, UnicodeError): raise ValidationError('Malformed torrent file') @@ -221,7 +223,6 @@ class UploadForm(FlaskForm): except AssertionError as e: raise ValidationError('Malformed torrent metadata ({})'.format(e.args[0])) - site_tracker = app.config.get('MAIN_ANNOUNCE_URL') ensure_tracker = app.config.get('ENFORCE_MAIN_ANNOUNCE_URL') @@ -233,11 +234,12 @@ class UploadForm(FlaskForm): # Ensure private torrents are using our tracker if torrent_dict['info'].get('private') == 1: if torrent_dict['announce'].decode('utf-8') != site_tracker: - raise ValidationError('Private torrent: please set {} as the main tracker'.format(site_tracker)) + raise ValidationError( + 'Private torrent: please set {} as the main tracker'.format(site_tracker)) elif ensure_tracker and not tracker_found: - raise ValidationError('Please include {} in the trackers of the torrent'.format(site_tracker)) - + raise ValidationError( + 'Please include {} in the trackers of the torrent'.format(site_tracker)) # Note! bencode will sort dict keys, as per the spec # This may result in a different hash if the uploaded torrent does not match the @@ -274,11 +276,13 @@ class TorrentFileData(object): # https://wiki.theory.org/BitTorrentSpecification#Metainfo_File_Structure + def _validate_trackers(torrent_dict, tracker_to_check_for=None): announce = torrent_dict.get('announce') announce_string = _validate_bytes(announce, 'announce', 'utf-8') - tracker_found = tracker_to_check_for and (announce_string.lower() == tracker_to_check_for.lower()) or False + tracker_found = tracker_to_check_for and ( + announce_string.lower() == tracker_to_check_for.lower()) or False announce_list = torrent_dict.get('announce-list') if announce_list is not None: diff --git a/nyaa/models.py b/nyaa/models.py index 2f512e4..9ae3597 100644 --- a/nyaa/models.py +++ b/nyaa/models.py @@ -41,8 +41,10 @@ class TorrentFlags(IntEnum): COMPLETE = 16 DELETED = 32 + DB_TABLE_PREFIX = app.config['TABLE_PREFIX'] + class Torrent(db.Model): __tablename__ = DB_TABLE_PREFIX + 'torrents' @@ -83,8 +85,9 @@ class Torrent(db.Model): main_category = db.relationship('MainCategory', uselist=False, back_populates='torrents', lazy="joined") sub_category = db.relationship('SubCategory', uselist=False, backref='torrents', lazy="joined", - primaryjoin="and_(SubCategory.id == foreign(Torrent.sub_category_id), " - "SubCategory.main_category_id == Torrent.main_category_id)") + primaryjoin=( + "and_(SubCategory.id == foreign(Torrent.sub_category_id), " + "SubCategory.main_category_id == Torrent.main_category_id)")) info = db.relationship('TorrentInfo', uselist=False, back_populates='torrent') filelist = db.relationship('TorrentFilelist', uselist=False, back_populates='torrent') stats = db.relationship('Statistic', uselist=False, back_populates='torrent', lazy='joined') @@ -118,7 +121,6 @@ class Torrent(db.Model): # Escaped return escape_markup(self.information) - @property def magnet_uri(self): return create_magnet(self) @@ -224,7 +226,8 @@ class Trackers(db.Model): __tablename__ = 'trackers' id = db.Column(db.Integer, primary_key=True) - uri = db.Column(db.String(length=255, collation=COL_UTF8_GENERAL_CI), nullable=False, unique=True) + uri = db.Column(db.String(length=255, collation=COL_UTF8_GENERAL_CI), + nullable=False, unique=True) disabled = db.Column(db.Boolean, nullable=False, default=False) @classmethod @@ -235,8 +238,10 @@ class Trackers(db.Model): class TorrentTrackers(db.Model): __tablename__ = DB_TABLE_PREFIX + 'torrent_trackers' - torrent_id = db.Column(db.Integer, db.ForeignKey(DB_TABLE_PREFIX + 'torrents.id', ondelete="CASCADE"), primary_key=True) - tracker_id = db.Column(db.Integer, db.ForeignKey('trackers.id', ondelete="CASCADE"), primary_key=True) + torrent_id = db.Column(db.Integer, db.ForeignKey( + DB_TABLE_PREFIX + 'torrents.id', ondelete="CASCADE"), primary_key=True) + tracker_id = db.Column(db.Integer, db.ForeignKey( + 'trackers.id', ondelete="CASCADE"), primary_key=True) order = db.Column(db.Integer, nullable=False, index=True) tracker = db.relationship('Trackers', uselist=False, lazy='joined') diff --git a/nyaa/routes.py b/nyaa/routes.py index 93c0f80..0502db0 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -6,18 +6,16 @@ from nyaa import bencode, utils from nyaa import torrents from nyaa import backend from nyaa import api_handler +from nyaa.search import search_elastic, search_db import config import json -import re from datetime import datetime, timedelta import ipaddress import os.path import base64 from urllib.parse import quote -import sqlalchemy_fulltext.modes as FullTextMode -from sqlalchemy_fulltext import FullTextSearch -import shlex +import math from werkzeug import url_encode from itsdangerous import URLSafeSerializer, BadSignature @@ -27,7 +25,15 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.utils import formatdate +from flask_paginate import Pagination + + DEBUG_API = False +DEFAULT_MAX_SEARCH_RESULT = 1000 +DEFAULT_PER_PAGE = 75 +SERACH_PAGINATE_DISPLAY_MSG = ('Displaying results {start}-{end} out of {total} results.<br>\n' + 'Please refine your search results if you can\'t find ' + 'what you were looking for.') def redirect_url(): @@ -48,144 +54,13 @@ def modify_query(**new_values): return '{}?{}'.format(flask.request.path, url_encode(args)) + @app.template_global() def filter_truthy(input_list): ''' Jinja2 can't into list comprehension so this is for the search_results.html template ''' return [item for item in input_list if item] -def search(term='', user=None, sort='id', order='desc', category='0_0', quality_filter='0', page=1, rss=False, admin=False): - sort_keys = { - 'id': models.Torrent.id, - 'size': models.Torrent.filesize, - 'name': models.Torrent.display_name, - 'seeders': models.Statistic.seed_count, - 'leechers': models.Statistic.leech_count, - 'downloads': models.Statistic.download_count - } - - sort_ = sort.lower() - if sort_ not in sort_keys: - flask.abort(400) - sort = sort_keys[sort] - - order_keys = { - 'desc': 'desc', - 'asc': 'asc' - } - - order_ = order.lower() - if order_ not in order_keys: - flask.abort(400) - - filter_keys = { - '0': None, - '1': (models.TorrentFlags.REMAKE, False), - '2': (models.TorrentFlags.TRUSTED, True), - '3': (models.TorrentFlags.COMPLETE, True) - } - - sentinel = object() - filter_tuple = filter_keys.get(quality_filter.lower(), sentinel) - if filter_tuple is sentinel: - flask.abort(400) - - if user: - user = models.User.by_id(user) - if not user: - flask.abort(404) - user = user.id - - main_category = None - sub_category = None - main_cat_id = 0 - sub_cat_id = 0 - if category: - cat_match = re.match(r'^(\d+)_(\d+)$', category) - if not cat_match: - flask.abort(400) - - main_cat_id = int(cat_match.group(1)) - sub_cat_id = int(cat_match.group(2)) - - if main_cat_id > 0: - if sub_cat_id > 0: - sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id) - else: - main_category = models.MainCategory.by_id(main_cat_id) - - if not category: - flask.abort(400) - - # Force sort by id desc if rss - if rss: - sort = sort_keys['id'] - order = 'desc' - - same_user = False - if flask.g.user: - same_user = flask.g.user.id == user - - if term: - query = db.session.query(models.TorrentNameSearch) - else: - query = models.Torrent.query - - # User view (/user/username) - if user: - query = query.filter(models.Torrent.uploader_id == user) - - if not admin: - # Hide all DELETED torrents if regular user - query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False)) - # If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous - # If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones - # On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what - if not same_user or rss: - query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN | - models.TorrentFlags.ANONYMOUS)).is_(False)) - # General view (homepage, general search view) - else: - if not admin: - # Hide all DELETED torrents if regular user - query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False)) - # If logged in, show all torrents that aren't hidden unless they belong to you - # On RSS pages, show all public torrents and nothing more. - if flask.g.user and not rss: - query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) | - (models.Torrent.uploader_id == flask.g.user.id)) - # Otherwise, show all torrents that aren't hidden - else: - query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) - - if main_category: - query = query.filter(models.Torrent.main_category_id == main_cat_id) - elif sub_category: - query = query.filter((models.Torrent.main_category_id == main_cat_id) & - (models.Torrent.sub_category_id == sub_cat_id)) - - if filter_tuple: - query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1])) - - if term: - for item in shlex.split(term, posix=False): - if len(item) >= 2: - query = query.filter(FullTextSearch( - item, models.TorrentNameSearch, FullTextMode.NATURAL)) - - # Sort and order - if sort.class_ != models.Torrent: - query = query.join(sort.class_) - - query = query.order_by(getattr(sort, order)()) - - if rss: - query = query.limit(app.config['RESULTS_PER_PAGE']) - else: - query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5) - - return query - @app.errorhandler(404) def not_found(error): @@ -202,8 +77,7 @@ def before_request(): flask.g.user = user - if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now(): - print("hio") + if 'timeout' not in flask.session or flask.session['timeout'] < datetime.now(): flask.session['timeout'] = datetime.now() + timedelta(days=7) flask.session.permanent = True flask.session.modified = True @@ -225,21 +99,35 @@ def _generate_query_string(term, category, filter, user): return params +@app.template_filter('utc_time') +def get_utc_timestamp(datetime_str): + ''' Returns a UTC POSIX timestamp, as seconds ''' + UTC_EPOCH = datetime.utcfromtimestamp(0) + return int((datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S') - UTC_EPOCH).total_seconds()) + + +@app.template_filter('display_time') +def get_display_time(datetime_str): + return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d %H:%M') + + @app.route('/rss', defaults={'rss': True}) @app.route('/', defaults={'rss': False}) def home(rss): if flask.request.args.get('page') == 'rss': rss = True - term = flask.request.args.get('q') + term = flask.request.args.get('q', flask.request.args.get('term')) sort = flask.request.args.get('s') order = flask.request.args.get('o') - category = flask.request.args.get('c') - quality_filter = flask.request.args.get('f') - user_name = flask.request.args.get('u') - page = flask.request.args.get('p') - if page: - page = int(page) + category = flask.request.args.get('c', flask.request.args.get('cats')) + quality_filter = flask.request.args.get('f', flask.request.args.get('filter')) + user_name = flask.request.args.get('u', flask.request.args.get('user')) + page = flask.request.args.get('p', flask.request.args.get('offset', 1, int), int) + + per_page = app.config.get('RESULTS_PER_PAGE') + if not per_page: + per_page = DEFAULT_PER_PAGE user_id = None if user_name: @@ -249,33 +137,76 @@ def home(rss): user_id = user.id query_args = { - 'term': term or '', 'user': user_id, 'sort': sort or 'id', 'order': order or 'desc', 'category': category or '0_0', 'quality_filter': quality_filter or '0', - 'page': page or 1, - 'rss': rss + 'page': page, + 'rss': rss, + 'per_page': per_page } - # God mode - if flask.g.user and flask.g.user.is_admin: - query_args['admin'] = True + if flask.g.user: + query_args['logged_in_user'] = flask.g.user + if flask.g.user.is_admin: # God mode + query_args['admin'] = True - query = search(**query_args) + # If searching, we get results from elastic search + use_elastic = app.config.get('USE_ELASTIC_SEARCH') + if use_elastic and term: + query_args['term'] = term - if rss: - return render_rss('/', query) + max_search_results = app.config.get('ES_MAX_SEARCH_RESULT') + if not max_search_results: + max_search_results = DEFAULT_MAX_SEARCH_RESULT + + # Only allow up to (max_search_results / page) pages + max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) + + query_args['page'] = max_page + query_args['max_search_results'] = max_search_results + + query_results = search_elastic(**query_args) + + if rss: + return render_rss('/', query_results, use_elastic=True) + else: + rss_query_string = _generate_query_string(term, category, quality_filter, user_name) + max_results = min(max_search_results, query_results['hits']['total']) + # change p= argument to whatever you change page_parameter to or pagination breaks + pagination = Pagination(p=query_args['page'], per_page=per_page, + total=max_results, bs_version=3, page_parameter='p', + display_msg=SERACH_PAGINATE_DISPLAY_MSG) + return flask.render_template('home.html', + use_elastic=True, + pagination=pagination, + torrent_query=query_results, + search=query_args, + rss_filter=rss_query_string) else: - rss_query_string = _generate_query_string(term, category, quality_filter, user_name) - return flask.render_template('home.html', - torrent_query=query, - search=query_args, - rss_filter=rss_query_string) + # If ES is enabled, default to db search for browsing + if use_elastic: + query_args['term'] = '' + else: # Otherwise, use db search for everything + query_args['term'] = term or '' + + query = search_db(**query_args) + if rss: + return render_rss('/', query, use_elastic=False) + else: + rss_query_string = _generate_query_string(term, category, quality_filter, user_name) + # Use elastic is always false here because we only hit this section + # if we're browsing without a search term (which means we default to DB) + # or if ES is disabled + return flask.render_template('home.html', + use_elastic=False, + torrent_query=query, + search=query_args, + rss_filter=rss_query_string) -@app.route('/user/<user_name>', methods=['GET', 'POST']) +@app.route('/user/<user_name>') def view_user(user_name): user = models.User.by_username(user_name) @@ -320,6 +251,10 @@ def view_user(user_name): if page: page = int(page) + per_page = app.config.get('RESULTS_PER_PAGE') + if not per_page: + per_page = DEFAULT_PER_PAGE + query_args = { 'term': term or '', 'user': user.id, @@ -328,27 +263,68 @@ def view_user(user_name): 'category': category or '0_0', 'quality_filter': quality_filter or '0', 'page': page or 1, - 'rss': False + 'rss': False, + 'per_page': per_page } - # God mode - if flask.g.user and flask.g.user.is_admin: - query_args['admin'] = True - - query = search(**query_args) + if flask.g.user: + query_args['logged_in_user'] = flask.g.user + if flask.g.user.is_admin: # God mode + query_args['admin'] = True + # Use elastic search for term searching rss_query_string = _generate_query_string(term, category, quality_filter, user_name) + use_elastic = app.config.get('USE_ELASTIC_SEARCH') + if use_elastic and term: + query_args['term'] = term - return flask.render_template('user.html', - form=form, - torrent_query=query, - search=query_args, - user=user, - user_page=True, - rss_filter=rss_query_string, - level=level, - admin=admin, - superadmin=superadmin) + max_search_results = app.config.get('ES_MAX_SEARCH_RESULT') + if not max_search_results: + max_search_results = DEFAULT_MAX_SEARCH_RESULT + + # Only allow up to (max_search_results / page) pages + max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) + + query_args['page'] = max_page + query_args['max_search_results'] = max_search_results + + query_results = search_elastic(**query_args) + + max_results = min(max_search_results, query_results['hits']['total']) + # change p= argument to whatever you change page_parameter to or pagination breaks + pagination = Pagination(p=query_args['page'], per_page=per_page, + total=max_results, bs_version=3, page_parameter='p', + display_msg=SERACH_PAGINATE_DISPLAY_MSG) + return flask.render_template('user.html', + use_elastic=True, + pagination=pagination, + torrent_query=query_results, + search=query_args, + user=user, + user_page=True, + rss_filter=rss_query_string, + level=level, + admin=admin, + superadmin=superadmin, + form=form) + # Similar logic as home page + else: + if use_elastic: + query_args['term'] = '' + else: + query_args['term'] = term or '' + query = search_db(**query_args) + return flask.render_template('user.html', + use_elastic=False, + torrent_query=query, + search=query_args, + user=user, + user_page=True, + rss_filter=rss_query_string, + level=level, + admin=admin, + superadmin=superadmin, + form=form) @app.template_filter('rfc822') @@ -356,19 +332,27 @@ def _jinja2_filter_rfc822(date, fmt=None): return formatdate(float(date.strftime('%s'))) -def render_rss(label, query): +@app.template_filter('rfc822_es') +def _jinja2_filter_rfc822(datestr, fmt=None): + return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s'))) + + +def render_rss(label, query, use_elastic): rss_xml = flask.render_template('rss.xml', + use_elastic=use_elastic, term=label, site_url=flask.request.url_root, - query=query) + torrent_query=query) response = flask.make_response(rss_xml) response.headers['Content-Type'] = 'application/xml' + # Cache for an hour + response.headers['Cache-Control'] = 'max-age={}'.format(1*5*60) return response -#@app.route('/about', methods=['GET']) +# @app.route('/about', methods=['GET']) # def about(): -# return flask.render_template('about.html') + # return flask.render_template('about.html') @app.route('/login', methods=['GET', 'POST']) @@ -385,7 +369,8 @@ def login(): if not user: user = models.User.by_email(username) - if not user or password != user.password_hash or user.status == models.UserStatusType.INACTIVE: + if (not user or password != user.password_hash + or user.status == models.UserStatusType.INACTIVE): flask.flash(flask.Markup( '<strong>Login failed!</strong> Incorrect username or password.'), 'danger') return flask.redirect(flask.url_for('login')) @@ -463,25 +448,36 @@ def profile(): if flask.request.method == 'POST' and form.validate(): user = flask.g.user - new_email = form.email.data + new_email = form.email.data.strip() new_password = form.new_password.data if new_email: + # enforce password check on email change too + if form.current_password.data != user.password_hash: + flask.flash(flask.Markup( + '<strong>Email change failed!</strong> Incorrect password.'), 'danger') + return flask.redirect('/profile') user.email = form.email.data - + flask.flash(flask.Markup( + '<strong>Email successfully changed!</strong>'), 'info') if new_password: if form.current_password.data != user.password_hash: flask.flash(flask.Markup( '<strong>Password change failed!</strong> Incorrect password.'), 'danger') return flask.redirect('/profile') user.password_hash = form.new_password.data + flask.flash(flask.Markup( + '<strong>Password successfully changed!</strong>'), 'info') db.session.add(user) db.session.commit() flask.g.user = user + return flask.redirect('/profile') - return flask.render_template('profile.html', form=form, level=level) + current_email = models.User.by_id(flask.g.user.id).email + + return flask.render_template('profile.html', form=form, email=current_email, level=level) @app.route('/user/activate/<payload>') @@ -572,7 +568,8 @@ def edit_torrent(torrent_id): if flask.request.method == 'POST' and form.validate(): # Form has been sent, edit torrent with data. - torrent.main_category_id, torrent.sub_category_id = form.category.parsed_data.get_category_ids() + torrent.main_category_id, torrent.sub_category_id = \ + form.category.parsed_data.get_category_ids() torrent.display_name = (form.display_name.data or '').strip() torrent.information = (form.information.data or '').strip() torrent.description = (form.description.data or '').strip() @@ -585,6 +582,9 @@ def edit_torrent(torrent_id): db.session.commit() + flask.flash(flask.Markup( + 'Torrent has been successfully edited! Changes might take a few minutes to show up.'), 'info') + return flask.redirect('/view/' + str(torrent_id)) else: # Setup form with pre-formatted form. @@ -599,7 +599,10 @@ def edit_torrent(torrent_id): form.is_complete.data = torrent.complete form.is_anonymous.data = torrent.anonymous - return flask.render_template('edit.html', form=form, torrent=torrent, admin=flask.g.user.is_admin) + return flask.render_template('edit.html', + form=form, + torrent=torrent, + admin=flask.g.user.is_admin) @app.route('/view/<int:torrent_id>/magnet') @@ -651,8 +654,10 @@ def get_activation_link(user): def send_verification_email(to_address, activ_link): - ''' this is until we have our own mail server, obviously. This can be greatly cut down if on same machine. - probably can get rid of all but msg formatting/building, init line and sendmail line if local SMTP server ''' + ''' this is until we have our own mail server, obviously. + This can be greatly cut down if on same machine. + probably can get rid of all but msg formatting/building, + init line and sendmail line if local SMTP server ''' msg_body = 'Please click on: ' + activ_link + ' to activate your account.\n\n\nUnsubscribe:' @@ -679,7 +684,7 @@ def _create_user_class_choices(): return choices -#################################### STATIC PAGES #################################### +# #################################### STATIC PAGES #################################### @app.route('/rules', methods=['GET']) def site_rules(): return flask.render_template('rules.html') @@ -690,9 +695,9 @@ def site_help(): return flask.render_template('help.html') -#################################### API ROUTES #################################### +# #################################### API ROUTES #################################### # DISABLED FOR NOW -@app.route('/api/upload', methods = ['POST']) +@app.route('/api/upload', methods=['POST']) def api_upload(): api_response = api_handler.api_upload(flask.request) return api_response diff --git a/nyaa/search.py b/nyaa/search.py new file mode 100644 index 0000000..9e22f84 --- /dev/null +++ b/nyaa/search.py @@ -0,0 +1,328 @@ +import flask +import re +import math +import json +import shlex + +from nyaa import app, db +from nyaa import models + +import sqlalchemy_fulltext.modes as FullTextMode +from sqlalchemy_fulltext import FullTextSearch +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search, Q + + +def search_elastic(term='', user=None, sort='id', order='desc', + category='0_0', quality_filter='0', page=1, + rss=False, admin=False, logged_in_user=None, + per_page=75, max_search_results=1000): + # This function can easily be memcached now + + es_client = Elasticsearch() + + es_sort_keys = { + 'id': 'id', + 'size': 'filesize', + # 'name': 'display_name', # This is slow and buggy + 'seeders': 'seed_count', + 'leechers': 'leech_count', + 'downloads': 'download_count' + } + + sort_ = sort.lower() + if sort_ not in es_sort_keys: + flask.abort(400) + + es_sort = es_sort_keys[sort] + + order_keys = { + 'desc': 'desc', + 'asc': 'asc' + } + + order_ = order.lower() + if order_ not in order_keys: + flask.abort(400) + + # Only allow ID, desc if RSS + if rss: + sort = es_sort_keys['id'] + order = 'desc' + + # funky, es sort is default asc, prefixed by '-' if desc + if 'desc' == order: + es_sort = '-' + es_sort + + # Quality filter + quality_keys = [ + '0', # Show all + '1', # No remakes + '2', # Only trusted + '3' # Only completed + ] + + if quality_filter.lower() not in quality_keys: + flask.abort(400) + + quality_filter = int(quality_filter) + + # Category filter + main_category = None + sub_category = None + main_cat_id = 0 + sub_cat_id = 0 + if category: + cat_match = re.match(r'^(\d+)_(\d+)$', category) + if not cat_match: + flask.abort(400) + + main_cat_id = int(cat_match.group(1)) + sub_cat_id = int(cat_match.group(2)) + + if main_cat_id > 0: + if sub_cat_id > 0: + sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id) + if not sub_category: + flask.abort(400) + else: + main_category = models.MainCategory.by_id(main_cat_id) + if not main_category: + flask.abort(400) + + # This might be useless since we validate users + # before coming into this method, but just to be safe... + if user: + user = models.User.by_id(user) + if not user: + flask.abort(404) + user = user.id + + same_user = False + if logged_in_user: + same_user = user == logged_in_user.id + + s = Search(using=es_client, index=app.config.get('ES_INDEX_NAME')) # todo, sukebei prefix + + # Apply search term + if term: + s = s.query('simple_query_string', + analyzer='my_search_analyzer', + default_operator="AND", + query=term) + + # User view (/user/username) + if user: + s = s.filter('term', uploader_id=user) + + if not admin: + # Hide all DELETED torrents if regular user + s = s.filter('term', deleted=False) + # If logged in user is not the same as the user being viewed, + # show only torrents that aren't hidden or anonymous. + # + # If logged in user is the same as the user being viewed, + # show all torrents including hidden and anonymous ones. + # + # On RSS pages in user view, show only torrents that + # aren't hidden or anonymous no matter what + if not same_user or rss: + s = s.filter('term', hidden=False) + s = s.filter('term', anonymous=False) + # General view (homepage, general search view) + else: + if not admin: + # Hide all DELETED torrents if regular user + s = s.filter('term', deleted=False) + # If logged in, show all torrents that aren't hidden unless they belong to you + # On RSS pages, show all public torrents and nothing more. + if logged_in_user and not rss: + hiddenFilter = Q('term', hidden=False) + userFilter = Q('term', uploader_id=logged_in_user.id) + combinedFilter = hiddenFilter | userFilter + s = s.filter('bool', filter=[combinedFilter]) + else: + s = s.filter('term', hidden=False) + + if main_category: + s = s.filter('term', main_category_id=main_cat_id) + elif sub_category: + s = s.filter('term', main_category_id=main_cat_id) + s = s.filter('term', sub_category_id=sub_cat_id) + + if quality_filter == 0: + pass + elif quality_filter == 1: + s = s.filter('term', remake=False) + elif quality_filter == 2: + s = s.filter('term', trusted=True) + elif quality_filter == 3: + s = s.filter('term', complete=True) + + # Apply sort + s = s.sort(es_sort) + + # Only show first RESULTS_PER_PAGE items for RSS + if rss: + s = s[0:per_page] + else: + max_page = min(page, int(math.ceil(max_search_results / float(per_page)))) + from_idx = (max_page - 1) * per_page + to_idx = min(max_search_results, max_page * per_page) + s = s[from_idx:to_idx] + + highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT') + if highlight: + s = s.highlight_options(tags_schema='styled') + s = s.highlight("display_name") + + # Return query, uncomment print line to debug query + # from pprint import pprint + # print(json.dumps(s.to_dict())) + return s.execute() + + +def search_db(term='', user=None, sort='id', order='desc', category='0_0', + quality_filter='0', page=1, rss=False, admin=False, + logged_in_user=None, per_page=75): + sort_keys = { + 'id': models.Torrent.id, + 'size': models.Torrent.filesize, + # Disable this because we disabled this in search_elastic, for the sake of consistency: + # 'name': models.Torrent.display_name, + 'seeders': models.Statistic.seed_count, + 'leechers': models.Statistic.leech_count, + 'downloads': models.Statistic.download_count + } + + sort_ = sort.lower() + if sort_ not in sort_keys: + flask.abort(400) + sort = sort_keys[sort] + + order_keys = { + 'desc': 'desc', + 'asc': 'asc' + } + + order_ = order.lower() + if order_ not in order_keys: + flask.abort(400) + + filter_keys = { + '0': None, + '1': (models.TorrentFlags.REMAKE, False), + '2': (models.TorrentFlags.TRUSTED, True), + '3': (models.TorrentFlags.COMPLETE, True) + } + + sentinel = object() + filter_tuple = filter_keys.get(quality_filter.lower(), sentinel) + if filter_tuple is sentinel: + flask.abort(400) + + if user: + user = models.User.by_id(user) + if not user: + flask.abort(404) + user = user.id + + main_category = None + sub_category = None + main_cat_id = 0 + sub_cat_id = 0 + if category: + cat_match = re.match(r'^(\d+)_(\d+)$', category) + if not cat_match: + flask.abort(400) + + main_cat_id = int(cat_match.group(1)) + sub_cat_id = int(cat_match.group(2)) + + if main_cat_id > 0: + if sub_cat_id > 0: + sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id) + else: + main_category = models.MainCategory.by_id(main_cat_id) + + if not category: + flask.abort(400) + + # Force sort by id desc if rss + if rss: + sort = sort_keys['id'] + order = 'desc' + + same_user = False + if logged_in_user: + same_user = logged_in_user.id == user + + if term: + query = db.session.query(models.TorrentNameSearch) + else: + query = models.Torrent.query + + # User view (/user/username) + if user: + query = query.filter(models.Torrent.uploader_id == user) + + if not admin: + # Hide all DELETED torrents if regular user + query = query.filter(models.Torrent.flags.op('&')( + int(models.TorrentFlags.DELETED)).is_(False)) + # If logged in user is not the same as the user being viewed, + # show only torrents that aren't hidden or anonymous + # + # If logged in user is the same as the user being viewed, + # show all torrents including hidden and anonymous ones + # + # On RSS pages in user view, + # show only torrents that aren't hidden or anonymous no matter what + if not same_user or rss: + query = query.filter(models.Torrent.flags.op('&')( + int(models.TorrentFlags.HIDDEN | models.TorrentFlags.ANONYMOUS)).is_(False)) + # General view (homepage, general search view) + else: + if not admin: + # Hide all DELETED torrents if regular user + query = query.filter(models.Torrent.flags.op('&')( + int(models.TorrentFlags.DELETED)).is_(False)) + # If logged in, show all torrents that aren't hidden unless they belong to you + # On RSS pages, show all public torrents and nothing more. + if logged_in_user and not rss: + query = query.filter( + (models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) | + (models.Torrent.uploader_id == logged_in_user.id)) + # Otherwise, show all torrents that aren't hidden + else: + query = query.filter(models.Torrent.flags.op('&')( + int(models.TorrentFlags.HIDDEN)).is_(False)) + + if main_category: + query = query.filter(models.Torrent.main_category_id == main_cat_id) + elif sub_category: + query = query.filter((models.Torrent.main_category_id == main_cat_id) & + (models.Torrent.sub_category_id == sub_cat_id)) + + if filter_tuple: + query = query.filter(models.Torrent.flags.op('&')( + int(filter_tuple[0])).is_(filter_tuple[1])) + + if term: + for item in shlex.split(term, posix=False): + if len(item) >= 2: + query = query.filter(FullTextSearch( + item, models.TorrentNameSearch, FullTextMode.NATURAL)) + + # Sort and order + if sort.class_ != models.Torrent: + query = query.join(sort.class_) + + query = query.order_by(getattr(sort, order)()) + + if rss: + query = query.limit(per_page) + else: + query = query.paginate_faste(page, per_page=per_page, step=5) + + return query diff --git a/nyaa/static/css/bootstrap-dark.min.css b/nyaa/static/css/bootstrap-dark.min.css index be99bb5..4b9a19f 100644 Binary files a/nyaa/static/css/bootstrap-dark.min.css and b/nyaa/static/css/bootstrap-dark.min.css differ diff --git a/nyaa/static/css/main.css b/nyaa/static/css/main.css index 83ca0ea..7edbffa 100644 --- a/nyaa/static/css/main.css +++ b/nyaa/static/css/main.css @@ -97,4 +97,86 @@ table.torrent-list thead th.sorting_desc:after { margin-left: 20px; margin-bottom: 10px; } -} \ No newline at end of file +} + +.search-container { + display: -webkit-box; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: vertical; + -webkit-box-direction: normal; + -ms-flex-direction: column; + flex-direction: column; +} + +.form-control.search-bar { + -webkit-box-ordinal-group: 2; + -ms-flex-order: 1; + order: 1; + width: 99%; + padding-right: 4em; +} + +.search-btn { + -webkit-box-ordinal-group: 3; + -ms-flex-order: 2; + order: 2; + -ms-flex-item-align: end; + align-self: flex-end; + top: -34px; + height: 0; + width: auto; + z-index: 3; +} + +#navFilter-criteria { + -webkit-box-ordinal-group: 4; + -ms-flex-order: 3; + order: 3; +} + +#navFilter-category { + -webkit-box-ordinal-group: 5; + -ms-flex-order: 4; + order: 4; +} + +.nav-filter { + width: 100%; + padding: 1em 0; +} + +.bootstrap-select > button { + margin-top: 1em; +} + +/* Allows the bootstrap selects on nav show outside the + collapsible section of the navigation */ +.navbar-collapse.in { + overflow-y: visible; +} + +@media (min-width: 991px) { + .search-btn { + top: 0; + width: auto; + } + + .bootstrap-select > button { + margin-top: auto; + } +} + +/* elasticsearch term highlight */ +.hlt1 { + font-style: normal; + display: inline-block; + padding: 0 3px; + border-radius: 3px; + border: 1px solid rgba(100, 56, 0, 0.8); + background: rgba(200,127,0,0.3); +} + +ul.nav-tabs#profileTabs { + margin-bottom: 15px; +} diff --git a/nyaa/static/js/main.js b/nyaa/static/js/main.js index b9c140b..63f32c5 100644 --- a/nyaa/static/js/main.js +++ b/nyaa/static/js/main.js @@ -105,8 +105,13 @@ document.addEventListener("DOMContentLoaded", function() { var previewTabEl = markdownEditor.querySelector(previewTabSelector); var targetEl = markdownEditor.querySelector(targetSelector); + var reader = new commonmark.Parser({safe: true}); + var writer = new commonmark.HtmlRenderer({safe: true}); + writer.softbreak = '<br />'; + previewTabEl.addEventListener('click', function () { - targetEl.innerHTML = marked(sourceSelector.value.trim(), { sanitize: true, breaks:true }); + var parsed = reader.parse(sourceSelector.value.trim()); + targetEl.innerHTML = writer.render(parsed); }); }); }); diff --git a/nyaa/static/pinned-tab.svg b/nyaa/static/pinned-tab.svg new file mode 100644 index 0000000..28034c3 Binary files /dev/null and b/nyaa/static/pinned-tab.svg differ diff --git a/nyaa/templates/home.html b/nyaa/templates/home.html index c6e82f4..f0fb99d 100644 --- a/nyaa/templates/home.html +++ b/nyaa/templates/home.html @@ -1,9 +1,10 @@ {% extends "layout.html" %} -{% block title %}Browse :: {{ config.SITE_NAME }}{% endblock %} +{% block title %}{% if search.term %}{{ search.term | e}}{% else %}Browse{% endif %} :: {{ config.SITE_NAME }}{% endblock %} {% block body %} <div class="alert alert-info"> - <p><strong>Hello!</strong> This site is still a work in progress and new features (faster and actually more accurate search, comments etc.) will be added in the coming days.</p> + <p><strong>5/17 Update:</strong> We've added faster and more accurate search! In addition to your typical keyword search in both English and other languages, you can also now use powerful operators + like <kbd>clockwork planet -horrible</kbd> or <kbd>commie|horrible|cartel yowamushi</kbd> to search. For all supported operators, please visit <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html#_simple_query_string_syntax">here</a>. More features are coming soon!</p><br> <p>We welcome you to provide feedback at <a href="irc://irc.rizon.net/nyaa-dev">#nyaa-dev@irc.rizon.net</a></p> <p>Our GitHub: <a href="https://github.com/nyaadevs" target="_blank">https://github.com/nyaadevs</a> - creating <a href="https://github.com/nyaadevs/nyaa/issues">issues</a> for features and faults is recommendable!</p> </div> diff --git a/nyaa/templates/layout.html b/nyaa/templates/layout.html index 232d2d5..fc64265 100644 --- a/nyaa/templates/layout.html +++ b/nyaa/templates/layout.html @@ -8,6 +8,7 @@ <meta http-equiv="X-UA-Compatible" content="IE=edge"> <link rel="shortcut icon" type="image/png" href="/static/favicon.png"> <link rel="icon" type="image/png" href="/static/favicon.png"> + <link rel="mask-icon" href="/static/pinned-tab.svg" color="#3582F7"> <link rel="alternate" type="application/rss+xml" href="{% if rss_filter %}{{ url_for('home', page='rss', _external=True, **rss_filter) }}{% else %}{{ url_for('home', page='rss', _external=True) }}{% endif %}" /> <!-- Bootstrap core CSS --> @@ -34,7 +35,7 @@ <!-- Modified to not apply border-radius to selectpickers and stuff so our navbar looks cool --> <script src="/static/js/bootstrap-select.js"></script> <script src="/static/js/main.js"></script> - <script src="//cdnjs.cloudflare.com/ajax/libs/marked/0.3.6/marked.min.js"></script> + <script src="//cdnjs.cloudflare.com/ajax/libs/commonmark/0.27.0/commonmark.min.js"></script> <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> <!--[if lt IE 9]> @@ -144,16 +145,16 @@ {% else %} <form class="navbar-form navbar-right form" action="/" method="get"> {% endif %} - <div class="input-group"> - <input type="text" class="form-control" name="q" placeholder="Search..." value="{{ search["term"] if search is defined else '' }}"> - <div class="input-group-btn" id="navFilter"> + <div class="input-group search-container"> + <input type="text" class="form-control search-bar" name="q" placeholder="Search..." value="{{ search["term"] if search is defined else '' }}"> + <div class="input-group-btn nav-filter" id="navFilter-criteria"> <select class="selectpicker show-tick" title="Filter" data-width="120px" name="f"> - <option value="0" title="Show all" {% if search is defined and search["quality_filter"] == "0" %}selected{% else %}selected{% endif %}>Show all</option> + <option value="0" title="No filter" {% if search is defined and search["quality_filter"] == "0" %}selected{% else %}selected{% endif %}>No filter</option> <option value="1" title="No remakes" {% if search is defined and search["quality_filter"] == "1" %}selected{% endif %}>No remakes</option> <option value="2" title="Trusted only" {% if search is defined and search["quality_filter"] == "2" %}selected{% endif %}>Trusted only</option> </select> </div> - <div class="input-group-btn" id="navFilter"> + <div class="input-group-btn nav-filter" id="navFilter-category"> {% set nyaa_cats = [('1_0', 'Anime', 'Anime'), ('1_1', '- Anime Music Video', 'Anime - AMV'), ('1_2', '- English-translated', 'Anime - English'), @@ -177,7 +178,7 @@ ('6_0', 'Software', 'Software'), ('6_1', '- Applications', 'Software - Apps'), ('6_2', '- Games', 'Software - Games')] %} - {% set suke_cats = [('1_0', 'Art', 'Art'), + {% set suke_cats = [('1_0', 'Art', 'Art'), ('1_1', '- Anime', 'Art - Anime'), ('1_2', '- Doujinshi', 'Art - Doujinshi'), ('1_3', '- Games', 'Art - Games'), @@ -192,8 +193,8 @@ {% set used_cats = suke_cats %} {% endif %} <select class="selectpicker show-tick" title="Category" data-width="170px" name="c"> - <option value="0_0" title="Show all" {% if search is defined and search["category"] == "0_0" %}selected{% else %}selected{% endif %}> - Show all + <option value="0_0" title="All categories" {% if search is defined and search["category"] == "0_0" %}selected{% else %}selected{% endif %}> + All categories </option> {% for cat_id, cat_name, cat_title in used_cats %} <option value="{{ cat_id }}" title="{{ cat_title }}" {% if search is defined and search.category == cat_id %}selected{% endif %}> @@ -202,7 +203,7 @@ {% endfor %} </select> </div> - <div class="input-group-btn"> + <div class="input-group-btn search-btn"> <button class="btn btn-primary" type="submit"> <i class="fa fa-search fa-fw"></i> </button> diff --git a/nyaa/templates/profile.html b/nyaa/templates/profile.html index 671a9d3..f07d0a5 100644 --- a/nyaa/templates/profile.html +++ b/nyaa/templates/profile.html @@ -3,55 +3,83 @@ {% block body %} {% from "_formhelpers.html" import render_field %} -{% if g.user %} - <h1>My Account</h1> - <dl class="dl-horizontal"> +<h1>Edit Profile</h1> + +<ul class="nav nav-tabs" id="profileTabs" role="tablist"> + <li role="presentation" class="active"> + <a href="#password-change" id="password-change-tab" role="tab" data-toggle="tab" aria-controls="profile" aria-expanded="true">Password</a> + </li> + <li role="presentation"> + <a href="#email-change" id="email-change-tab" role="tab" data-toggle="tab" aria-controls="profile" aria-expanded="false">Email</a> + </li> + <li role="presentation"> + <a href="#general-info" id="general-info-tab" role="tab" data-toggle="tab" aria-controls="profile" aria-expanded="false">My Info</a> + </li> +</ul> +<div class="tab-content"> + <div class="tab-pane fade active in" role="tabpanel" id="password-change" aria-labelledby="password-change-tab"> + <form method="POST"> + {{ form.csrf_token }} + <div class="row"> + <div class="form-group col-md-4"> + {{ render_field(form.current_password, class_='form-control', placeholder='Current password') }} + </div> + </div> + <div class="row"> + <div class="form-group col-md-4"> + {{ render_field(form.new_password, class_='form-control', placeholder='New password') }} + </div> + </div> + <div class="row"> + <div class="form-group col-md-4"> + {{ render_field(form.password_confirm, class_='form-control', placeholder='New password (confirm)') }} + </div> + </div> + <br> + <div class="row"> + <div class="col-md-4"> + <input type="submit" value="Update" class="btn btn-primary"> + </div> + </div> + </form> + </div> + <div class="tab-pane fade" role="tabpanel" id="email-change" aria-labelledby="email-change-tab"> + <form method="POST"> + {{ form.csrf_token }} + <div class="row"> + <div class="form-group col-md-4"> + <label class="control-label" for="current_email">Current Email</label> + <div>{{email}}</div> + </div> + </div> + <div class="row"> + <div class="form-group col-md-4"> + {{ render_field(form.email, class_='form-control', placeholder='New email address') }} + </div> + </div> + <div class="row"> + <div class="form-group col-md-4"> + {{ render_field(form.current_password, class_='form-control', placeholder='Current password') }} + </div> + </div> + <br> + <div class="row"> + <div class="col-md-4"> + <input type="submit" value="Update" class="btn btn-primary"> + </div> + </div> + </form> + </div> + <div class="tab-pane fade" role="tabpanel" id="general-info" aria-labelledby="general-info-tab"> + <dl class="dl-horizontal"> <dt>User ID:</dt> <dd>{{g.user.id}}</dd> <dt>Account created on:</dt> <dd>{{g.user.created_time}}</dd> - <dt>Email address:</dt> - <dd>{{g.user.email}}</dd> <dt>User class:</dt> <dd>{{level}}</dd><br> </dl> -{% endif %} + </div> +</div> -<h2>Edit Profile</h2> - <form method="POST"> - {{ form.csrf_token }} - - <div class="row"> - <div class="form-group col-md-4"> - {{ render_field(form.email, class_='form-control', placeholder='New email address') }} - </div> - </div> - - <div class="row"> - <div class="form-group col-md-4"> - {{ render_field(form.current_password, class_='form-control', placeholder='Current password') }} - </div> - </div> - - <div class="row"> - <div class="form-group col-md-4"> - {{ render_field(form.new_password, class_='form-control', placeholder='New password') }} - </div> - </div> - - <div class="row"> - <div class="form-group col-md-4"> - {{ render_field(form.password_confirm, class_='form-control', placeholder='New password (confirm)') }} - </div> - </div> - - <br> - - <div class="row"> - <div class="col-md-4"> - <input type="submit" value="Update" class="btn btn-primary"> - </div> - </div> - </form> {% endblock %} - diff --git a/nyaa/templates/rss.xml b/nyaa/templates/rss.xml index 266e524..e1787d2 100644 --- a/nyaa/templates/rss.xml +++ b/nyaa/templates/rss.xml @@ -4,20 +4,32 @@ <description>RSS Feed for {{ term }}</description> <link>{{ url_for('home', _external=True) }}</link> <atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" /> - {% for torrent in query %} + {% for torrent in torrent_query %} {% if torrent.has_torrent %} <item> <title>{{ torrent.display_name }}</title> + {% if use_elastic %} + <link>{{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }}</link> + <guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid> + <pubDate>{{ torrent.created_time|rfc822_es }}</pubDate> + {% else %} <link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link> <guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid> <pubDate>{{ torrent.created_time|rfc822 }}</pubDate> + {% endif %} </item> {% else %} <item> <title>{{ torrent.display_name }}</title> + {% if use_elastic %} + <link>{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}</link> + <guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid> + <pubDate>{{ torrent.created_time|rfc822_es }}</pubDate> + {% else %} <link>{{ torrent.magnet_uri }}</link> <guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid> <pubDate>{{ torrent.created_time|rfc822 }}</pubDate> + {% endif %} </item> {% endif %} {% endfor %} diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html index bdacd4d..f169b6f 100644 --- a/nyaa/templates/search_results.html +++ b/nyaa/templates/search_results.html @@ -8,7 +8,7 @@ {{ caller() }} </th> {% endmacro %} -{% if torrent_query.items %} +{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %} <div class="table-responsive"> <table class="table table-bordered table-hover table-striped torrent-list"> <thead> @@ -16,7 +16,7 @@ {% call render_column_header("hdr-category", "width:80px;", center_text=True) %} <div>Category</div> {% endcall %} - {% call render_column_header("hdr-name", "width:auto;", sort_key="name") %} + {% call render_column_header("hdr-name", "width:auto;") %} <div>Name</div> {% endcall %} {% call render_column_header("hdr-link", "width:70px;", center_text=True) %} @@ -45,27 +45,51 @@ </tr> </thead> <tbody> - {% for torrent in torrent_query.items %} + {% set torrents = torrent_query if use_elastic else torrent_query.items %} + {% for torrent in torrents %} <tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}"> - {% set cat_id = (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %} + {% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) if use_elastic else (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %} {% set icon_dir = config.SITE_FLAVOR %} <td style="padding:0 4px;"> + {% if use_elastic %} + <a href="/?c={{ cat_id }}" title="{{ torrent.main_category_id }} - {{ torrent.sub_category_id }}"> + {% else %} <a href="/?c={{ cat_id }}" title="{{ torrent.main_category.name }} - {{ torrent.sub_category.name }}"> + {% endif %} <img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png"> </a> </td> - <td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}">{{ torrent.display_name | escape }}</a></td> + {% if use_elastic %} + <td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}" title="{{ torrent.display_name | escape }}">{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}</a></td> + {% else %} + <td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}" title="{{ torrent.display_name | escape }}">{{ torrent.display_name | escape }}</a></td> + {% endif %} <td style="white-space: nowrap;text-align: center;"> {% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %} + {% if use_elastic %} + <a href="{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}"><i class="fa fa-fw fa-magnet"></i></a> + {% else %} <a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a> + {% endif %} </td> <td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td> + {% if use_elastic %} + <td class="text-center" data-timestamp="{{ torrent.created_time | utc_time }}">{{ torrent.created_time | display_time }}</td> + {% else %} <td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td> + {% endif %} + {% if config.ENABLE_SHOW_STATS %} + {% if use_elastic %} + <td class="text-center" style="color: green;">{{ torrent.seed_count }}</td> + <td class="text-center" style="color: red;">{{ torrent.leech_count }}</td> + <td class="text-center">{{ torrent.download_count }}</td> + {% else %} <td class="text-center" style="color: green;">{{ torrent.stats.seed_count }}</td> <td class="text-center" style="color: red;">{{ torrent.stats.leech_count }}</td> <td class="text-center">{{ torrent.stats.download_count }}</td> {% endif %} + {% endif %} </tr> {% endfor %} </tbody> @@ -76,6 +100,11 @@ {% endif %} <center> + {% if use_elastic %} + {{ pagination.info }} + {{ pagination.links }} + {% else %} {% from "bootstrap/pagination.html" import render_pagination %} {{ render_pagination(torrent_query) }} + {% endif %} </center> diff --git a/nyaa/templates/upload.html b/nyaa/templates/upload.html index ec2f1d8..5645fb5 100644 --- a/nyaa/templates/upload.html +++ b/nyaa/templates/upload.html @@ -13,7 +13,7 @@ <form method="POST" enctype="multipart/form-data"> - {% if config.ENFORCE_MAIN_ANNOUNCE_URL %}<p><strong>Important:</strong> Please include <i>{{config.MAIN_ANNOUNCE_URL}}</i> in your trackers</p>{% endif %} + {% if config.ENFORCE_MAIN_ANNOUNCE_URL %}<p><strong>Important:</strong> Please include <kbd>{{config.MAIN_ANNOUNCE_URL}}</kbd> in your trackers</p>{% endif %} <div class="row"> <div class="form-group col-md-6"> {{ render_upload(form.torrent_file, accept=".torrent") }} diff --git a/nyaa/templates/view.html b/nyaa/templates/view.html index fd64979..6542c30 100644 --- a/nyaa/templates/view.html +++ b/nyaa/templates/view.html @@ -92,7 +92,7 @@ <i class="glyphicon glyphicon-folder-open"></i> <b>{{ key }}</b></td> {{ loop(value.items()) }} {%- else %} - <td style="padding-left: {{ loop.depth0 * 20 }}px"> + <td{% if loop.depth0 is greaterthan 0 %} style="padding-left: {{ loop.depth0 * 20 }}px"{% endif %}> <i class="glyphicon glyphicon-file"></i> {{ key }}</td> <td class="col-md-2">{{ value | filesizeformat(True) }}</td> {%- endif %} @@ -122,8 +122,11 @@ <script> var target = document.getElementById('torrent-description'); var text = target.innerHTML; - var html = marked(text.trim(), { sanitize: true, breaks:true }); - target.innerHTML = html; + var reader = new commonmark.Parser({safe: true}); + var writer = new commonmark.HtmlRenderer({safe: true}); + writer.softbreak = '<br />'; + var parsed = reader.parse(text.trim()); + target.innerHTML = writer.render(parsed); </script> {% endblock %} diff --git a/nyaa/torrents.py b/nyaa/torrents.py index 192ab0f..eff6f54 100644 --- a/nyaa/torrents.py +++ b/nyaa/torrents.py @@ -3,6 +3,7 @@ import base64 import time from urllib.parse import urlencode from orderedset import OrderedSet +from nyaa import app from nyaa import bencode from nyaa import app @@ -54,9 +55,23 @@ def get_trackers(torrent): return list(trackers) +def get_trackers_magnet(): + trackers = OrderedSet() + + # Our main one first + main_announce_url = app.config.get('MAIN_ANNOUNCE_URL') + if main_announce_url: + trackers.add(main_announce_url) + + # and finally our tracker list + trackers.update(default_trackers()) + + return list(trackers) + + def create_magnet(torrent, max_trackers=5, trackers=None): if trackers is None: - trackers = get_trackers(torrent) + trackers = get_trackers_magnet() magnet_parts = [ ('dn', torrent.display_name) @@ -68,6 +83,24 @@ def create_magnet(torrent, max_trackers=5, trackers=None): return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts) +# For processing ES links +@app.context_processor +def create_magnet_from_info(): + def _create_magnet_from_info(display_name, info_hash, max_trackers=5, trackers=None): + if trackers is None: + trackers = get_trackers_magnet() + + magnet_parts = [ + ('dn', display_name) + ] + for tracker in trackers[:max_trackers]: + magnet_parts.append(('tr', tracker)) + + b32_info_hash = base64.b32encode(bytes.fromhex(info_hash)).decode('utf-8') + return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts) + return dict(create_magnet_from_info=_create_magnet_from_info) + + def create_default_metadata_base(torrent, trackers=None): if trackers is None: trackers = get_trackers(torrent) diff --git a/requirements.txt b/requirements.txt index 224866b..843b935 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,11 +24,17 @@ pycodestyle==2.3.1 pycparser==2.17 pyparsing==2.2.0 six==1.10.0 -SQLAlchemy>=1.1.9 +SQLAlchemy==1.1.9 SQLAlchemy-FullText-Search==0.2.3 -SQLAlchemy-Utils>=0.32.14 +SQLAlchemy-Utils==0.32.14 uWSGI==2.0.15 visitor==0.1.3 webassets==0.12.1 Werkzeug==0.12.1 WTForms==2.1 +## elasticsearch dependencies +elasticsearch==5.3.0 +elasticsearch-dsl==5.2.0 +progressbar2==3.20.0 +mysql-replication==0.13 +flask-paginate==0.4.5 \ No newline at end of file diff --git a/sync_es.py b/sync_es.py new file mode 100644 index 0000000..4cbd9f2 --- /dev/null +++ b/sync_es.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python +""" +stream changes in mysql (on the torrents and statistics table) into +elasticsearch as they happen on the binlog. This keeps elasticsearch in sync +with whatever you do to the database, including stuff like admin queries. Also, +because mysql keeps the binlog around for N days before deleting old stuff, you +can survive a hiccup of elasticsearch or this script dying and pick up where +you left off. + +For that "picking up" part, this script depends on one piece of external state: +its last known binlog filename and position. This is saved off as a JSON file +to a configurable location on the filesystem periodically. If the file is not +present then you can initialize it with the values from `SHOW MASTER STATUS` +from the mysql repl, which will start the sync from current state. + +In the case of catastrophic elasticsearch meltdown where you need to +reconstruct the index, you'll want to be a bit careful with coordinating +sync_es and import_to_es scripts. If you run import_to_es first than run +sync_es against SHOW MASTER STATUS, anything that changed the database between +when import_to_es and sync_es will be lost. Instead, you can run SHOW MASTER +STATUS _before_ you run import_to_es. That way you'll definitely pick up any +changes that happen while the import_to_es script is dumping stuff from the +database into es, at the expense of redoing a (small) amount of indexing. +""" +from elasticsearch import Elasticsearch +from elasticsearch.helpers import bulk +from pymysqlreplication import BinLogStreamReader +from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent +from datetime import datetime +from nyaa.models import TorrentFlags +import sys +import json +import time +import logging + +logging.basicConfig() + +log = logging.getLogger('sync_es') +log.setLevel(logging.INFO) + +#logging.getLogger('elasticsearch').setLevel(logging.DEBUG) + +# in prod want in /var/lib somewhere probably +SAVE_LOC = "/var/lib/sync_es_position.json" +MYSQL_HOST = '127.0.0.1' +MYSQL_PORT = 3306 +MYSQL_USER = 'test' +MYSQL_PW = 'test123' +NT_DB = 'nyaav2' + +with open(SAVE_LOC) as f: + pos = json.load(f) + +es = Elasticsearch(timeout=30) + +stream = BinLogStreamReader( + # TODO parse out from config.py or something + connection_settings = { + 'host': MYSQL_HOST, + 'port': MYSQL_PORT, + 'user': MYSQL_USER, + 'passwd': MYSQL_PW + }, + server_id=10, # arbitrary + # only care about this database currently + only_schemas=[NT_DB], + # these tables in the database + only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"], + # from our save file + resume_stream=True, + log_file=pos['log_file'], + log_pos=pos['log_pos'], + # skip the other stuff like table mapping + only_events=[UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent], + # if we're at the head of the log, block until something happens + # note it'd be nice to block async-style instead, but the mainline + # binlogreader is synchronous. there is an (unmaintained?) fork + # using aiomysql if anybody wants to revive that. + blocking=True) + +def reindex_torrent(t, index_name): + # XXX annoyingly different from import_to_es, and + # you need to keep them in sync manually. + f = t['flags'] + doc = { + "id": t['id'], + "display_name": t['display_name'], + "created_time": t['created_time'], + "updated_time": t['updated_time'], + "description": t['description'], + # not analyzed but included so we can render magnet links + # without querying sql again. + "info_hash": t['info_hash'].hex(), + "filesize": t['filesize'], + "uploader_id": t['uploader_id'], + "main_category_id": t['main_category_id'], + "sub_category_id": t['sub_category_id'], + # XXX all the bitflags are numbers + "anonymous": bool(f & TorrentFlags.ANONYMOUS), + "trusted": bool(f & TorrentFlags.TRUSTED), + "remake": bool(f & TorrentFlags.REMAKE), + "complete": bool(f & TorrentFlags.COMPLETE), + # TODO instead of indexing and filtering later + # could delete from es entirely. Probably won't matter + # for at least a few months. + "hidden": bool(f & TorrentFlags.HIDDEN), + "deleted": bool(f & TorrentFlags.DELETED), + "has_torrent": bool(t['has_torrent']), + } + # update, so we don't delete the stats if present + return { + '_op_type': 'update', + '_index': index_name, + '_type': 'torrent', + '_id': str(t['id']), + "doc": doc, + "doc_as_upsert": True + } + +def reindex_stats(s, index_name): + # update the torrent at torrent_id, assumed to exist; + # this will always be the case if you're reading the binlog + # in order; the foreign key constraint on torrrent_id prevents + # the stats row rom existing if the torrent isn't around. + return { + '_op_type': 'update', + '_index': index_name, + '_type': 'torrent', + '_id': str(s['torrent_id']), + "doc": { + "stats_last_updated": s["last_updated"], + "download_count": s["download_count"], + "leech_count": s['leech_count'], + "seed_count": s['seed_count'], + }} + +def delet_this(row, index_name): + return { + "_op_type": 'delete', + '_index': index_name, + '_type': 'torrent', + '_id': str(row['values']['id'])} + +n = 0 +last_save = time.time() + +for event in stream: + if event.table == "nyaa_torrents" or event.table == "sukebei_torrents": + if event.table == "nyaa_torrents": + index_name = "nyaa" + else: + index_name = "sukebei" + if type(event) is WriteRowsEvent: + bulk(es, (reindex_torrent(row['values'], index_name) for row in event.rows)) + elif type(event) is UpdateRowsEvent: + # UpdateRowsEvent includes the old values too, but we don't care + bulk(es, (reindex_torrent(row['after_values'], index_name) for row in event.rows)) + elif type(event) is DeleteRowsEvent: + # ok, bye + bulk(es, (delet_this(row, index_name) for row in event.rows)) + else: + raise Exception(f"unknown event {type(event)}") + elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics": + if event.table == "nyaa_statistics": + index_name = "nyaa" + else: + index_name = "sukebei" + if type(event) is WriteRowsEvent: + bulk(es, (reindex_stats(row['values'], index_name) for row in event.rows)) + elif type(event) is UpdateRowsEvent: + bulk(es, (reindex_stats(row['after_values'], index_name) for row in event.rows)) + elif type(event) is DeleteRowsEvent: + # uh ok. assume that the torrent row will get deleted later, + # which will clean up the entire es "torrent" document + pass + else: + raise Exception(f"unknown event {type(event)}") + else: + raise Exception(f"unknown table {s.table}") + + n += 1 + if n % 100 == 0 or time.time() - last_save > 30: + log.info(f"saving position {stream.log_file}/{stream.log_pos}") + with open(SAVE_LOC, 'w') as f: + json.dump({"log_file": stream.log_file, "log_pos": stream.log_pos}, f) diff --git a/trackers.txt b/trackers.txt index 6fa0310..de0d240 100644 --- a/trackers.txt +++ b/trackers.txt @@ -1,9 +1,13 @@ -udp://tracker.internetwarriors.net:1337/announce -udp://tracker.leechers-paradise.org:6969/announce -udp://tracker.coppersurfer.tk:6969/announce -udp://exodus.desync.com:6969/announce -udp://tracker.sktorrent.net:6969/announce -udp://tracker.zer0day.to:1337/announce -udp://tracker.pirateparty.gr:6969/announce udp://oscar.reyesleon.xyz:6969/announce +udp://tracker.cyberia.is:6969/announce +udp://tracker.doko.moe:6969 +http://tracker.baka-sub.cf:80/announce +udp://tracker.coppersurfer.tk:6969/announce +udp://tracker.torrent.eu.org:451 udp://tracker.opentrackr.org:1337/announce +udp://tracker.zer0day.to:1337/announce +http://t.nyaatracker.com:80/announce +https://open.kickasstracker.com:443/announce +udp://tracker.safe.moe:6969/announce +udp://p4p.arenabg.ch:1337/announce +udp://tracker.justseed.it:1337/announce diff --git a/api_uploader.py b/utils/api_uploader.py similarity index 100% rename from api_uploader.py rename to utils/api_uploader.py diff --git a/batch_upload_torrent.sh b/utils/batch_upload_torrent.sh similarity index 100% rename from batch_upload_torrent.sh rename to utils/batch_upload_torrent.sh