From 642d7bd9ec65046479e9e285ada73c71951b7de8 Mon Sep 17 00:00:00 2001 From: wranai Date: Sat, 13 May 2017 13:54:42 +0200 Subject: [PATCH 01/68] 95% trackers from newtrackon.com I looked through the trackers on newtrackon.com. These are trackers with great uptime (mostly better than 99%), but also preferring UDP, IPv6, and pre-existence in the magnet links from different Nyaa mirrors. --- trackers.txt | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/trackers.txt b/trackers.txt index 6fa0310..3f194e4 100644 --- a/trackers.txt +++ b/trackers.txt @@ -1,9 +1,14 @@ -udp://tracker.internetwarriors.net:1337/announce -udp://tracker.leechers-paradise.org:6969/announce -udp://tracker.coppersurfer.tk:6969/announce -udp://exodus.desync.com:6969/announce -udp://tracker.sktorrent.net:6969/announce -udp://tracker.zer0day.to:1337/announce -udp://tracker.pirateparty.gr:6969/announce udp://oscar.reyesleon.xyz:6969/announce +udp://tracker.cyberia.is:6969/announce +udp://tracker.doko.moe:6969 +http://tracker.baka-sub.cf:80/announce +udp://tracker.coppersurfer.tk:6969/announce +udp://tracker.torrent.eu.org:451 udp://tracker.opentrackr.org:1337/announce +udp://explodie.org:6969 +udp://tracker.zer0day.to:1337/announce +http://t.nyaatracker.com:80/announce +https://open.kickasstracker.com:443/announce +udp://tracker.safe.moe:6969/announce +udp://p4p.arenabg.ch:1337/announce +udp://tracker.justseed.it:1337/announce From 17217d9427c33ad139dc40423119b186bcdf2a72 Mon Sep 17 00:00:00 2001 From: queue Date: Sun, 14 May 2017 00:48:17 -0600 Subject: [PATCH 02/68] WIP es stuff --- import_to_es.py | 90 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 import_to_es.py diff --git a/import_to_es.py b/import_to_es.py new file mode 100644 index 0000000..4be5e2b --- /dev/null +++ b/import_to_es.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +""" +Bulk load torents from mysql into elasticsearch `nyaav2` index, +which is assumed to already exist. +This is a one-shot deal, so you'd either need to complement it +with a cron job or some binlog-reading thing (TODO) +""" +from nyaa.models import Torrent +from elasticsearch import Elasticsearch +from elasticsearch import helpers +import progressbar +import sys + +bar = progressbar.ProgressBar( + max_value=Torrent.query.count(), + widgets=[ + progressbar.SimpleProgress(), + ' [', progressbar.Timer(), '] ', + progressbar.Bar(), + ' (', progressbar.ETA(), ') ', + ]) + +es = Elasticsearch() + +# turn into thing that elasticsearch indexes. We flatten in +# the stats (seeders/leechers) so we can order by them in es naturally. +# we _don't_ dereference uploader_id to the user's display name however, +# instead doing that at query time. I _think_ this is right because +# we don't want to reindex all the user's torrents just because they +# changed their name, and we don't really want to FTS search on the user anyway. +# Maybe it's more convenient to derefence though. +def mk_es(t): + return { + "_id": t.id, + "_type": "torrent", + "_index": "nyaav2", + "_source": { + "display_name": t.display_name, + "created_time": t.created_time, + "updated_time": t.updated_time, + "description": t.description, + # not analyzed but included so we can render magnet links + # without querying sql again. + "info_hash": t.info_hash.hex(), + "filesize": t.filesize, + "uploader_id": t.uploader_id, + "main_category_id": t.main_category_id, + "sub_category_id": t.sub_category_id, + # XXX all the bitflags are numbers + "anonymous": bool(t.anonymous), + "trusted": bool(t.trusted), + "remake": bool(t.remake), + "complete": bool(t.complete), + # TODO instead of indexing and filtering later + # could delete from es entirely. Probably won't matter + # for at least a few months. + "hidden": bool(t.hidden), + "deleted": bool(t.deleted), + "has_torrent": t.has_torrent, + # XXX last_updated isn't initialized + "stats_last_updated": t.stats.last_updated or t.created_time, + "download_count": t.stats.download_count, + "leech_count": t.stats.leech_count, + "seed_count": t.stats.seed_count, + } + } + +# page through an sqlalchemy query, like the per_fetch but +# doesn't break the eager joins its doing against the stats table. +# annoying that this isn't built in somehow. +def page_query(query, limit=sys.maxsize, batch_size=10000): + start = 0 + while True: + # XXX very inelegant way to do this, i'm confus + stop = min(limit, start + batch_size) + if stop == start: + break + things = query.slice(start, stop) + if not things: + break + had_things = False + for thing in things: + had_things = True + yield(thing) + if not had_things or stop == limit: + break + bar.update(start) + start = min(limit, start + batch_size) + +helpers.bulk(es, (mk_es(t) for t in page_query(Torrent.query)), chunk_size=10000) From d7d24ef49efe7195e833e27bea86169a42692950 Mon Sep 17 00:00:00 2001 From: queue Date: Sun, 14 May 2017 00:55:08 -0600 Subject: [PATCH 03/68] update requirements.txt has ipython stuck in there too, oh well --- requirements.txt | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 224866b..dbf234d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,11 +24,33 @@ pycodestyle==2.3.1 pycparser==2.17 pyparsing==2.2.0 six==1.10.0 -SQLAlchemy>=1.1.9 +SQLAlchemy==1.1.9 SQLAlchemy-FullText-Search==0.2.3 -SQLAlchemy-Utils>=0.32.14 +SQLAlchemy-Utils==0.32.14 uWSGI==2.0.15 visitor==0.1.3 webassets==0.12.1 Werkzeug==0.12.1 WTForms==2.1 +## The following requirements were added by pip freeze: +decorator==4.0.11 +elasticsearch==5.3.0 +elasticsearch-dsl==5.2.0 +ipython==6.0.0 +ipython-genutils==0.2.0 +jedi==0.10.2 +mysql-replication==0.13 +pexpect==4.2.1 +pickleshare==0.7.4 +pkg-resources==0.0.0 +progressbar2==3.20.0 +prompt-toolkit==1.0.14 +ptyprocess==0.5.1 +Pygments==2.2.0 +PyMySQL==0.7.11 +python-dateutil==2.6.0 +python-utils==2.1.0 +simplegeneric==0.8.1 +traitlets==4.3.2 +urllib3==1.21.1 +wcwidth==0.1.7 From 3cbe2e4221e0c66105ef0d72221a148f78007d8c Mon Sep 17 00:00:00 2001 From: queue Date: Sun, 14 May 2017 02:01:26 -0600 Subject: [PATCH 04/68] WIP hack in es as the provider for search results real sketch. lots of stuff is still broken. But! you can make elasticsearch q= style queries and it shows up properly. only first page works; need to adapt pager to elasticsearch's "total-hits" thing. --- import_to_es.py | 4 +++ nyaa/routes.py | 44 ++++++++++++++++++++++++++++-- nyaa/templates/search_results.html | 20 ++++++++------ 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/import_to_es.py b/import_to_es.py index 4be5e2b..e714da5 100644 --- a/import_to_es.py +++ b/import_to_es.py @@ -35,6 +35,10 @@ def mk_es(t): "_type": "torrent", "_index": "nyaav2", "_source": { + # we're also indexing the id as a number so you can + # order by it. seems like this is just equivalent to + # order by created_time, but oh well + "id": t.id, "display_name": t.display_name, "created_time": t.created_time, "updated_time": t.updated_time, diff --git a/nyaa/routes.py b/nyaa/routes.py index 4064b15..2941e5b 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -27,6 +27,11 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.utils import formatdate +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search, Q + +es_client = Elasticsearch() + DEBUG_API = False @@ -67,6 +72,16 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_ sort_ = sort.lower() if sort_ not in sort_keys: flask.abort(400) + + # XXX gross why are all the names subtly different + es_sort = ({ + 'id': 'id', + 'size': 'filesize', + 'name': 'display_name', + 'seeders': 'seed_count', + 'leechers': 'leech_count', + 'downloads': 'download_count' + })[sort] sort = sort_keys[sort] order_keys = { @@ -78,6 +93,10 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_ if order_ not in order_keys: flask.abort(400) + # funky, es sort is default asc, prefixed by '-' if desc + if "desc" == order: + es_sort = "-" + es_sort + filter_keys = { '0': None, '1': (models.TorrentFlags.REMAKE, False), @@ -126,28 +145,37 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_ if flask.g.user: same_user = flask.g.user.id == user + s = Search(using=es_client, index='nyaav2') if term: query = db.session.query(models.TorrentNameSearch) + s = s.query("query_string", default_field="display_name", default_operator="AND", query=term) else: query = models.Torrent.query # Filter by user if user: + s = s.filter("term", uploader_id=user) + query = query.filter(models.Torrent.uploader_id == user) # If admin, show everything if not admin: # If user is not logged in or the accessed feed doesn't belong to user, # hide anonymous torrents belonging to the queried user if not same_user: + # TODO adapt to es syntax query = query.filter(models.Torrent.flags.op('&')( int(models.TorrentFlags.ANONYMOUS | models.TorrentFlags.DELETED)).is_(False)) if main_category: + s = s.filter("term", main_category_id=main_cat_id) query = query.filter(models.Torrent.main_category_id == main_cat_id) elif sub_category: + s = s.filter("term", main_category_id=main_cat_id) + s = s.filter("term", sub_category_id=sub_cat_id) query = query.filter((models.Torrent.main_category_id == main_cat_id) & (models.Torrent.sub_category_id == sub_cat_id)) + # TODO i dunno what this means in es if filter_tuple: query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1])) @@ -157,6 +185,7 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_ int(models.TorrentFlags.HIDDEN | models.TorrentFlags.DELETED)).is_(False)) if term: + # note already handled in es for item in shlex.split(term, posix=False): if len(item) >= 2: query = query.filter(FullTextSearch( @@ -166,14 +195,22 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_ if sort.class_ != models.Torrent: query = query.join(sort.class_) + s = s.sort(es_sort) query = query.order_by(getattr(sort, order)()) + per = app.config['RESULTS_PER_PAGE'] if rss: - query = query.limit(app.config['RESULTS_PER_PAGE']) + pass + #query = query.limit(app.config['RESULTS_PER_PAGE']) else: - query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5) + # page is 1-based? + s = s[(page-1)*per:page*per] + #query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5) - return query + #return query + from pprint import pprint + print(json.dumps(s.to_dict())) + return s.execute() @app.errorhandler(404) @@ -445,6 +482,7 @@ def activate_user(payload): user.status = models.UserStatusType.ACTIVE + db.session.add(user) db.session.commit() diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html index 95d18d5..4bce368 100644 --- a/nyaa/templates/search_results.html +++ b/nyaa/templates/search_results.html @@ -8,7 +8,7 @@ {{ caller() }} {% endmacro %} -{% if torrent_query.items %} +{% if torrent_query.hits.total > 0 %}
@@ -45,26 +45,26 @@ - {% for torrent in torrent_query.items %} + {% for torrent in torrent_query %} - {% set cat_id = (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %} + {% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) %} {% set icon_dir = config.SITE_FLAVOR %} - + - + {% if config.ENABLE_SHOW_STATS %} - - - + + + {% endif %} {% endfor %} @@ -75,7 +75,9 @@

No results found

{% endif %} +{#
{% from "bootstrap/pagination.html" import render_pagination %} {{ render_pagination(torrent_query) }}
+#} From 8c951210d497e9ed237457e2474c31e392d1891a Mon Sep 17 00:00:00 2001 From: queue Date: Sun, 14 May 2017 02:19:42 -0600 Subject: [PATCH 05/68] es: implement highlighting no apologies for styling. somebody else with opinions will fix it later. --- nyaa/routes.py | 3 +++ nyaa/static/css/main.css | 12 +++++++++++- nyaa/templates/search_results.html | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/nyaa/routes.py b/nyaa/routes.py index 2941e5b..3e87a2a 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -207,6 +207,9 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_ s = s[(page-1)*per:page*per] #query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5) + s = s.highlight_options(tags_schema='styled') + s = s.highlight("display_name") + #return query from pprint import pprint print(json.dumps(s.to_dict())) diff --git a/nyaa/static/css/main.css b/nyaa/static/css/main.css index ee01f9b..f7234ec 100644 --- a/nyaa/static/css/main.css +++ b/nyaa/static/css/main.css @@ -92,4 +92,14 @@ table.torrent-list thead th.sorting_desc:after { margin-left: 20px; margin-bottom: 10px; } -} \ No newline at end of file +} + +/* elasticsearch term highlight */ +.hlt1 { + font-style: normal; + display: inline-block; + padding: 0 3px; + border-radius: 3px; + border: 1px solid rgba(100, 56, 0, 0.8); + background: rgba(200,127,0,0.3); +} diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html index 4bce368..7657bb1 100644 --- a/nyaa/templates/search_results.html +++ b/nyaa/templates/search_results.html @@ -54,7 +54,7 @@ - + - +
- + {{ torrent.display_name | escape }}{{ torrent.display_name | escape }} {% if torrent.has_torrent %}{% endif %} {{ torrent.filesize | filesizeformat(True) }}{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}{{ torrent.created_time }}{{ torrent.stats.seed_count }}{{ torrent.stats.leech_count }}{{ torrent.stats.download_count }}{{ torrent.seed_count }}{{ torrent.leech_count }}{{ torrent.download_count }}
{{ torrent.display_name | escape }}{{ torrent.meta.highlight.display_name[0] | safe }} {% if torrent.has_torrent %}{% endif %} From 7fd7d30722b0a0cd4bcdd1694b9fa16d310fd694 Mon Sep 17 00:00:00 2001 From: JSFernandes Date: Sun, 14 May 2017 15:24:20 +0100 Subject: [PATCH 06/68] Remove sourceMappingURL from bootstrap-dark.min.css Some browsers, such as Firefox 53.0 try to download the specified file, to improve development experience. Since we don't host this file, some users were making a request resulting in 404 when loading the page in dark mode. --- nyaa/static/css/bootstrap-dark.min.css | Bin 122430 -> 122385 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/nyaa/static/css/bootstrap-dark.min.css b/nyaa/static/css/bootstrap-dark.min.css index be99bb5563a9d49efdcb522b72fe11eb2f15f2dd..4b9a19f9566b573751844300afec17ebf43ed975 100644 GIT binary patch delta 11 ScmdnDhkfE6_J%Etg7*L)^#u9= delta 57 zcmbQZhkf53_J%Etg7 Date: Mon, 15 May 2017 02:29:25 +0200 Subject: [PATCH 07/68] removed explodie as suggested supposedly, it's banned on tokyotosho --- trackers.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/trackers.txt b/trackers.txt index 3f194e4..de0d240 100644 --- a/trackers.txt +++ b/trackers.txt @@ -5,7 +5,6 @@ http://tracker.baka-sub.cf:80/announce udp://tracker.coppersurfer.tk:6969/announce udp://tracker.torrent.eu.org:451 udp://tracker.opentrackr.org:1337/announce -udp://explodie.org:6969 udp://tracker.zer0day.to:1337/announce http://t.nyaatracker.com:80/announce https://open.kickasstracker.com:443/announce From 85ba16545f486f879e84f01dc5003c2f6c3c1857 Mon Sep 17 00:00:00 2001 From: queue Date: Sun, 14 May 2017 22:26:44 -0600 Subject: [PATCH 08/68] es: fix highlighting without query term I like highlighting. --- nyaa/templates/search_results.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html index 23f9a40..8d6f9da 100644 --- a/nyaa/templates/search_results.html +++ b/nyaa/templates/search_results.html @@ -54,7 +54,7 @@ {{ torrent.meta.highlight.display_name[0] | safe }}{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%} {% if torrent.has_torrent %}{% endif %} From 32b9170a81b14541318842481f0d62bdfddadc20 Mon Sep 17 00:00:00 2001 From: queue Date: Mon, 15 May 2017 01:32:56 -0600 Subject: [PATCH 09/68] es: add sync_es script for binlog maintenance lightly documented. --- sync_es.py | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 sync_es.py diff --git a/sync_es.py b/sync_es.py new file mode 100644 index 0000000..81ad17f --- /dev/null +++ b/sync_es.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +""" +stream changes in mysql (on the torrents and statistics table) into +elasticsearch as they happen on the binlog. This keeps elasticsearch in sync +with whatever you do to the database, including stuff like admin queries. Also, +because mysql keeps the binlog around for N days before deleting old stuff, you +can survive a hiccup of elasticsearch or this script dying and pick up where +you left off. + +For that "picking up" part, this script depends on one piece of external state: +its last known binlog filename and position. This is saved off as a JSON file +to a configurable location on the filesystem periodically. If the file is not +present then you can initialize it with the values from `SHOW MASTER STATUS` +from the mysql repl, which will start the sync from current state. + +In the case of catastrophic elasticsearch meltdown where you need to +reconstruct the index, you'll want to be a bit careful with coordinating +sync_es and import_to_es scripts. If you run import_to_es first than run +sync_es against SHOW MASTER STATUS, anything that changed the database between +when import_to_es and sync_es will be lost. Instead, you can run SHOW MASTER +STATUS _before_ you run import_to_es. That way you'll definitely pick up any +changes that happen while the import_to_es script is dumping stuff from the +database into es, at the expense of redoing a (small) amount of indexing. +""" +from elasticsearch import Elasticsearch +from pymysqlreplication import BinLogStreamReader +from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent +from datetime import datetime +from nyaa.models import TorrentFlags +import sys +import json +import time +import logging + +logging.basicConfig() + +log = logging.getLogger('sync_es') +log.setLevel(logging.INFO) + +#logging.getLogger('elasticsearch').setLevel(logging.DEBUG) + +# in prod want in /var/lib somewhere probably +SAVE_LOC = "/tmp/sync_es_position.json" + +with open(SAVE_LOC) as f: + pos = json.load(f) + +es = Elasticsearch() + +stream = BinLogStreamReader( + # TODO parse out from config.py or something + connection_settings = { + 'host': '127.0.0.1', + 'port': 13306, + 'user': 'root', + 'passwd': 'dunnolol' + }, + server_id=10, # arbitrary + # only care about this table currently + only_schemas=["nyaav2"], + # TODO sukebei + only_tables=["nyaa_torrents", "nyaa_statistics"], + # from our save file + resume_stream=True, + log_file=pos['log_file'], + log_pos=pos['log_pos'], + # skip the other stuff like table mapping + only_events=[UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent], + # if we're at the head of the log, block until something happens + # note it'd be nice to block async-style instead, but the mainline + # binlogreader is synchronous. there is an (unmaintained?) fork + # using aiomysql if anybody wants to revive that. + blocking=True) + +def reindex_torrent(t): + # XXX annoyingly different from import_to_es, and + # you need to keep them in sync manually. + f = t['flags'] + doc = { + "id": t['id'], + "display_name": t['display_name'], + "created_time": t['created_time'], + "updated_time": t['updated_time'], + "description": t['description'], + # not analyzed but included so we can render magnet links + # without querying sql again. + "info_hash": t['info_hash'].hex(), + "filesize": t['filesize'], + "uploader_id": t['uploader_id'], + "main_category_id": t['main_category_id'], + "sub_category_id": t['sub_category_id'], + # XXX all the bitflags are numbers + "anonymous": bool(f & TorrentFlags.ANONYMOUS), + "trusted": bool(f & TorrentFlags.TRUSTED), + "remake": bool(f & TorrentFlags.REMAKE), + "complete": bool(f & TorrentFlags.COMPLETE), + # TODO instead of indexing and filtering later + # could delete from es entirely. Probably won't matter + # for at least a few months. + "hidden": bool(f & TorrentFlags.HIDDEN), + "deleted": bool(f & TorrentFlags.DELETED), + "has_torrent": bool(t['has_torrent']), + } + # update, so we don't delete the stats if present + es.update( + index='nyaav2', + doc_type='torrent', + id=t['id'], + body={"doc": doc, "doc_as_upsert": True}) + +def reindex_stats(s): + es.update( + index='nyaav2', + doc_type='torrent', + id=s['torrent_id'], + body={ + "doc": { + "stats_last_updated": s["last_updated"], + "download_count": s["download_count"], + "leech_count": s['leech_count'], + "seed_count": s['seed_count'], + }}) + +n = 0 +last_save = time.time() + +for event in stream: + for row in event.rows: + if event.table == "nyaa_torrents": + if type(event) is WriteRowsEvent: + reindex_torrent(row['values']) + elif type(event) is UpdateRowsEvent: + reindex_torrent(row['after_values']) + elif type(event) is DeleteRowsEvent: + # just delete it + es.delete(index='nyaav2', doc_type='torrent', id=row['values']['id']) + else: + raise Exception(f"unknown event {type(event)}") + elif event.table == "nyaa_statistics": + if type(event) is WriteRowsEvent: + reindex_stats(row['values']) + elif type(event) is UpdateRowsEvent: + reindex_stats(row['after_values']) + elif type(event) is DeleteRowsEvent: + # uh ok. assume that the torrent row will get deleted later. + pass + else: + raise Exception(f"unknown event {type(event)}") + else: + raise Exception(f"unknown table {s.table}") + n += 1 + if n % 100 == 0 or time.time() - last_save > 30: + log.info(f"saving position {stream.log_file}/{stream.log_pos}") + with open(SAVE_LOC, 'w') as f: + json.dump({"log_file": stream.log_file, "log_pos": stream.log_pos}, f) From c2c547e7867742b06cd97c95210c864b0ed9789f Mon Sep 17 00:00:00 2001 From: aldacron Date: Mon, 15 May 2017 11:14:01 -0700 Subject: [PATCH 10/68] some more elasticsearch work, including index mapping and analyzer --- create_es.sh | 3 ++ es_mapping.yml | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ import_to_es.py | 5 +-- nyaa/routes.py | 2 +- requirements.txt | 20 +---------- 5 files changed, 97 insertions(+), 24 deletions(-) create mode 100755 create_es.sh create mode 100644 es_mapping.yml diff --git a/create_es.sh b/create_es.sh new file mode 100755 index 0000000..2b83620 --- /dev/null +++ b/create_es.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +curl -v -XPUT 'localhost:9200/nyaav2?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml diff --git a/es_mapping.yml b/es_mapping.yml new file mode 100644 index 0000000..9085ec2 --- /dev/null +++ b/es_mapping.yml @@ -0,0 +1,91 @@ +--- +# CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml +# fo inline comments. +settings: + analysis: + analyzer: + my_search_analyzer: + type: custom + tokenizer: standard + char_filter: + - my_char_filter + filter: + - standard + - lowercase + my_index_analyzer: + type: custom + tokenizer: standard + char_filter: + - my_char_filter + filter: + - lowercase + - my_ngram + filter: + my_ngram: + type: edgeNGram + min_gram: 1 + max_gram: 15 + char_filter: + my_char_filter: + type: mapping + mappings: ["-=>_", "!=>_"] + index: + # we're running a single es node, so no sharding necessary, + # plus replicas don't really help either. + number_of_shards: 1 + number_of_replicas : 0 + mapper: + # disable elasticsearch's "helpful" autoschema + dynamic: false + # since we disabled the _all field, default query the + # name of the torrent. + query: + default_field: display_name +mappings: + torrent: + # don't want everything concatenated + _all: + enabled: false + properties: + id: + type: long + display_name: + # TODO could do a fancier tokenizer here to parse out the + # the scene convention of stuff in brackets, plus stuff like k-on + type: text + analyzer: my_index_analyzer + fielddata: true + created_time: + type: date + # Only in the ES index for generating magnet links + info_hash: + enabled: false + filesize: + type: long + anonymous: + type: boolean + trusted: + type: boolean + remake: + type: boolean + complete: + type: boolean + hidden: + type: boolean + deleted: + type: boolean + has_torrent: + type: boolean + download_count: + type: long + leech_count: + type: long + seed_count: + type: long + # these ids are really only for filtering, thus keyword + uploader_id: + type: keyword + main_category_id: + type: keyword + sub_category_id: + type: keyword \ No newline at end of file diff --git a/import_to_es.py b/import_to_es.py index e714da5..046bde6 100644 --- a/import_to_es.py +++ b/import_to_es.py @@ -41,8 +41,6 @@ def mk_es(t): "id": t.id, "display_name": t.display_name, "created_time": t.created_time, - "updated_time": t.updated_time, - "description": t.description, # not analyzed but included so we can render magnet links # without querying sql again. "info_hash": t.info_hash.hex(), @@ -61,8 +59,7 @@ def mk_es(t): "hidden": bool(t.hidden), "deleted": bool(t.deleted), "has_torrent": t.has_torrent, - # XXX last_updated isn't initialized - "stats_last_updated": t.stats.last_updated or t.created_time, + # Stats "download_count": t.stats.download_count, "leech_count": t.stats.leech_count, "seed_count": t.stats.seed_count, diff --git a/nyaa/routes.py b/nyaa/routes.py index 3e87a2a..758635a 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -148,7 +148,7 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_ s = Search(using=es_client, index='nyaav2') if term: query = db.session.query(models.TorrentNameSearch) - s = s.query("query_string", default_field="display_name", default_operator="AND", query=term) + s = s.query("simple_query_string", analyzer="my_search_analyzer", default_operator="AND", query=term) else: query = models.Torrent.query diff --git a/requirements.txt b/requirements.txt index dbf234d..af89eab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,24 +33,6 @@ webassets==0.12.1 Werkzeug==0.12.1 WTForms==2.1 ## The following requirements were added by pip freeze: -decorator==4.0.11 elasticsearch==5.3.0 elasticsearch-dsl==5.2.0 -ipython==6.0.0 -ipython-genutils==0.2.0 -jedi==0.10.2 -mysql-replication==0.13 -pexpect==4.2.1 -pickleshare==0.7.4 -pkg-resources==0.0.0 -progressbar2==3.20.0 -prompt-toolkit==1.0.14 -ptyprocess==0.5.1 -Pygments==2.2.0 -PyMySQL==0.7.11 -python-dateutil==2.6.0 -python-utils==2.1.0 -simplegeneric==0.8.1 -traitlets==4.3.2 -urllib3==1.21.1 -wcwidth==0.1.7 +progressbar2==3.20.0 \ No newline at end of file From 899aa01473654e6ed85698f23df29c2e8081521b Mon Sep 17 00:00:00 2001 From: aldacron Date: Mon, 15 May 2017 23:51:58 -0700 Subject: [PATCH 11/68] hooked up ES... 90% done, need to figure out how to generate magnet URIs --- README.md | 38 ++++ config.example.py | 13 +- create_es.sh | 4 +- import_to_es.py | 3 +- my.cnf | 6 + nyaa/routes.py | 322 ++++++++++++----------------- nyaa/search.py | 317 ++++++++++++++++++++++++++++ nyaa/templates/rss.xml | 14 +- nyaa/templates/search_results.html | 35 +++- requirements.txt | 6 +- sync_es.py | 53 +++-- 11 files changed, 585 insertions(+), 226 deletions(-) create mode 100644 nyaa/search.py diff --git a/README.md b/README.md index 5ec0077..632ce73 100644 --- a/README.md +++ b/README.md @@ -44,5 +44,43 @@ - Start the dev server with `python run.py` - Deactivate `source deactivate` +# Enabling ElasticSearch + +## Basics +- Install jdk `sudo apt-get install openjdk-8-jdk` +- Install elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html +- `sudo systemctl enable elasticsearch.service` +- `sudo systemctl start elasticsearch.service` +- Run `curl -XGET 'localhost:9200'` and make sure ES is running +- Optional: install Kabana as a search frontend for ES + +## Enable MySQL Binlogging +- Add the `[mariadb]` bin-log section to my.cnf and reload mysql server +- Connect to mysql +- `SHOW VARIABLES LIKE 'binlog_format';` + - Make sure it shows ROW +- Connect to root user +- `GRANT REPLICATION SLAVE ON *.* TO 'test'@'localhost';` where test is the user you will be running `sync_es.py` with + +## Setting up ES +- Run `./create_es.sh` and this creates two indicies: `nyaa` and `sukebei` +- The output should show `akncolwedged: true` twice +- The safest bet is to disable the webapp here to ensure there's no database writes +- Run `python import_to_es.py` with `SITE_FLAVOR` set to `nyaa` +- Run `python import_to_es.py` with `SITE_FLAVOR` set to `sukebei` +- These will take some time to run as it's indexing + +## Setting up sync_es.py +- Sync_es.py keeps the ElasticSearch index updated by reading the BinLog +- Configure the MySQL options with the user where you granted the REPLICATION permissions +- Connect to MySQL, run `SHOW MASTER STATUS;`. +- Copy the output to `/var/lib/sync_es_position.json` with the contents `{"log_file": "FILE", "log_pos": POSITION}` and replace FILENAME with File (something like master1-bin.000002) in the SQL output and POSITION (something like 892528513) with Position +- Set up `sync_es.py` as a service and run it, preferably as the system/root +- Make sure `sync_es.py` runs within venv with the right dependencies + +## Good to go! +- After that, enable the `USE_ELASTIC_SEARCH` flag and restart the webapp and you're good to go + + ## Code Quality: - Remember to follow PEP8 style guidelines and run `./lint.sh` before committing. diff --git a/config.example.py b/config.example.py index f34c554..73702b9 100644 --- a/config.example.py +++ b/config.example.py @@ -33,8 +33,6 @@ MAIL_FROM_ADDRESS = '***' SMTP_USERNAME = '***' SMTP_PASSWORD = '***' -RESULTS_PER_PAGE = 75 - # What the site identifies itself as. SITE_NAME = 'Nyaa' @@ -49,3 +47,14 @@ ENFORCE_MAIN_ANNOUNCE_URL = False MAIN_ANNOUNCE_URL = '' BACKUP_TORRENT_FOLDER = 'torrents' + +# +# Search Options +# +# Max ES search results, do not set over 10000 +RESULTS_PER_PAGE = 75 + +USE_ELASTIC_SEARCH = False +ENABLE_ELASTIC_SEARCH_HIGHLIGHT = False +ES_MAX_SEARCH_RESULT = 1000 +ES_INDEX_NAME = SITE_FLAVOR # we create indicies named nyaa or sukebei \ No newline at end of file diff --git a/create_es.sh b/create_es.sh index 2b83620..5b0c564 100755 --- a/create_es.sh +++ b/create_es.sh @@ -1,3 +1,5 @@ #!/usr/bin/env bash -curl -v -XPUT 'localhost:9200/nyaav2?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml +# create indicies named "nyaa" and "sukebei", these are hardcoded +curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml +curl -v -XPUT 'localhost:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml diff --git a/import_to_es.py b/import_to_es.py index 046bde6..886211f 100644 --- a/import_to_es.py +++ b/import_to_es.py @@ -5,6 +5,7 @@ which is assumed to already exist. This is a one-shot deal, so you'd either need to complement it with a cron job or some binlog-reading thing (TODO) """ +from nyaa import app from nyaa.models import Torrent from elasticsearch import Elasticsearch from elasticsearch import helpers @@ -33,7 +34,7 @@ def mk_es(t): return { "_id": t.id, "_type": "torrent", - "_index": "nyaav2", + "_index": app.config['ES_INDEX_NAME'], "_source": { # we're also indexing the id as a number so you can # order by it. seems like this is just equivalent to diff --git a/my.cnf b/my.cnf index 657a8f6..d586484 100644 --- a/my.cnf +++ b/my.cnf @@ -4,3 +4,9 @@ ft_min_word_len=2 innodb_ft_cache_size = 80000000 innodb_ft_total_cache_size = 1600000000 max_allowed_packet = 100M + +[mariadb] +log-bin +server_id=1 +log-basename=master1 +binlog-format = row diff --git a/nyaa/routes.py b/nyaa/routes.py index 758635a..5fbcb5c 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -6,18 +6,16 @@ from nyaa import bencode, utils from nyaa import torrents from nyaa import backend from nyaa import api_handler +from nyaa.search import search_elastic, search_db import config import json -import re from datetime import datetime, timedelta import ipaddress import os.path import base64 from urllib.parse import quote -import sqlalchemy_fulltext.modes as FullTextMode -from sqlalchemy_fulltext import FullTextSearch -import shlex +import math from werkzeug import url_encode from itsdangerous import URLSafeSerializer, BadSignature @@ -27,12 +25,14 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.utils import formatdate -from elasticsearch import Elasticsearch -from elasticsearch_dsl import Search, Q +from flask_paginate import Pagination -es_client = Elasticsearch() DEBUG_API = False +DEFAULT_MAX_SEARCH_RESULT = 1000 +DEFAULT_PER_PAGE = 75 +SERACH_PAGINATE_DISPLAY_MSG = '''Displaying results {start}-{end} out of {total} results.
+ Please refine your search results if you can't find what you were looking for.''' def redirect_url(): @@ -53,168 +53,13 @@ def modify_query(**new_values): return '{}?{}'.format(flask.request.path, url_encode(args)) + @app.template_global() def filter_truthy(input_list): ''' Jinja2 can't into list comprehension so this is for the search_results.html template ''' return [item for item in input_list if item] -def search(term='', user=None, sort='id', order='desc', category='0_0', quality_filter='0', page=1, rss=False, admin=False): - sort_keys = { - 'id': models.Torrent.id, - 'size': models.Torrent.filesize, - 'name': models.Torrent.display_name, - 'seeders': models.Statistic.seed_count, - 'leechers': models.Statistic.leech_count, - 'downloads': models.Statistic.download_count - } - - sort_ = sort.lower() - if sort_ not in sort_keys: - flask.abort(400) - - # XXX gross why are all the names subtly different - es_sort = ({ - 'id': 'id', - 'size': 'filesize', - 'name': 'display_name', - 'seeders': 'seed_count', - 'leechers': 'leech_count', - 'downloads': 'download_count' - })[sort] - sort = sort_keys[sort] - - order_keys = { - 'desc': 'desc', - 'asc': 'asc' - } - - order_ = order.lower() - if order_ not in order_keys: - flask.abort(400) - - # funky, es sort is default asc, prefixed by '-' if desc - if "desc" == order: - es_sort = "-" + es_sort - - filter_keys = { - '0': None, - '1': (models.TorrentFlags.REMAKE, False), - '2': (models.TorrentFlags.TRUSTED, True), - '3': (models.TorrentFlags.COMPLETE, True) - } - - sentinel = object() - filter_tuple = filter_keys.get(quality_filter.lower(), sentinel) - if filter_tuple is sentinel: - flask.abort(400) - - if user: - user = models.User.by_id(user) - if not user: - flask.abort(404) - user = user.id - - main_category = None - sub_category = None - main_cat_id = 0 - sub_cat_id = 0 - if category: - cat_match = re.match(r'^(\d+)_(\d+)$', category) - if not cat_match: - flask.abort(400) - - main_cat_id = int(cat_match.group(1)) - sub_cat_id = int(cat_match.group(2)) - - if main_cat_id > 0: - if sub_cat_id > 0: - sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id) - else: - main_category = models.MainCategory.by_id(main_cat_id) - - if not category: - flask.abort(400) - - # Force sort by id desc if rss - if rss: - sort = sort_keys['id'] - order = 'desc' - - same_user = False - if flask.g.user: - same_user = flask.g.user.id == user - - s = Search(using=es_client, index='nyaav2') - if term: - query = db.session.query(models.TorrentNameSearch) - s = s.query("simple_query_string", analyzer="my_search_analyzer", default_operator="AND", query=term) - else: - query = models.Torrent.query - - # Filter by user - if user: - s = s.filter("term", uploader_id=user) - - query = query.filter(models.Torrent.uploader_id == user) - # If admin, show everything - if not admin: - # If user is not logged in or the accessed feed doesn't belong to user, - # hide anonymous torrents belonging to the queried user - if not same_user: - # TODO adapt to es syntax - query = query.filter(models.Torrent.flags.op('&')( - int(models.TorrentFlags.ANONYMOUS | models.TorrentFlags.DELETED)).is_(False)) - - if main_category: - s = s.filter("term", main_category_id=main_cat_id) - query = query.filter(models.Torrent.main_category_id == main_cat_id) - elif sub_category: - s = s.filter("term", main_category_id=main_cat_id) - s = s.filter("term", sub_category_id=sub_cat_id) - query = query.filter((models.Torrent.main_category_id == main_cat_id) & - (models.Torrent.sub_category_id == sub_cat_id)) - - # TODO i dunno what this means in es - if filter_tuple: - query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1])) - - # If admin, show everything - if not admin: - query = query.filter(models.Torrent.flags.op('&')( - int(models.TorrentFlags.HIDDEN | models.TorrentFlags.DELETED)).is_(False)) - - if term: - # note already handled in es - for item in shlex.split(term, posix=False): - if len(item) >= 2: - query = query.filter(FullTextSearch( - item, models.TorrentNameSearch, FullTextMode.NATURAL)) - - # Sort and order - if sort.class_ != models.Torrent: - query = query.join(sort.class_) - - s = s.sort(es_sort) - query = query.order_by(getattr(sort, order)()) - - per = app.config['RESULTS_PER_PAGE'] - if rss: - pass - #query = query.limit(app.config['RESULTS_PER_PAGE']) - else: - # page is 1-based? - s = s[(page-1)*per:page*per] - #query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5) - - s = s.highlight_options(tags_schema='styled') - s = s.highlight("display_name") - - #return query - from pprint import pprint - print(json.dumps(s.to_dict())) - return s.execute() - @app.errorhandler(404) def not_found(error): @@ -232,7 +77,6 @@ def before_request(): flask.g.user = user if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now(): - print("hio") flask.session['timeout'] = datetime.now() + timedelta(days=7) flask.session.permanent = True flask.session.modified = True @@ -270,6 +114,10 @@ def home(rss): if page: page = int(page) + per_page = app.config.get('RESULTS_PER_PAGE') + if not per_page: + per_page = DEFAULT_PER_PAGE + user_id = None if user_name: user = models.User.by_username(user_name) @@ -278,30 +126,72 @@ def home(rss): user_id = user.id query_args = { - 'term': term or '', 'user': user_id, 'sort': sort or 'id', 'order': order or 'desc', 'category': category or '0_0', 'quality_filter': quality_filter or '0', 'page': page or 1, - 'rss': rss + 'rss': rss, + 'per_page': per_page } - # God mode - if flask.g.user and flask.g.user.is_admin: - query_args['admin'] = True + if flask.g.user: + query_args['logged_in_user'] = flask.g.user + if flask.g.user.is_admin: # God mode + query_args['admin'] = True - query = search(**query_args) + # If searching, we get results from elastic search + use_elastic = app.config.get('USE_ELASTIC_SEARCH') + if use_elastic and term: + query_args['term'] = term - if rss: - return render_rss('/', query) + max_search_results = app.config.get('ES_MAX_SEARCH_RESULT') + if not max_search_results: + max_search_results = DEFAULT_MAX_SEARCH_RESULT + + max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages + + query_args['page'] = max_page + query_args['max_search_results'] = max_search_results + + query_results = search_elastic(**query_args) + + if rss: + return render_rss('/', query_results, use_elastic=True) + else: + rss_query_string = _generate_query_string(term, category, quality_filter, user_name) + max_results = min(max_search_results, query_results['hits']['total']) + # change p= argument to whatever you change page_parameter to or pagination breaks + pagination = Pagination(p=query_args['page'], per_page=per_page, + total=max_results, bs_version=3, page_parameter='p', + display_msg=SERACH_PAGINATE_DISPLAY_MSG) + return flask.render_template('home.html', + use_elastic=True, + pagination=pagination, + torrent_query=query_results, + search=query_args, + rss_filter=rss_query_string) else: - rss_query_string = _generate_query_string(term, category, quality_filter, user_name) - return flask.render_template('home.html', - torrent_query=query, - search=query_args, - rss_filter=rss_query_string) + # If ES is enabled, default to db search for browsing + if use_elastic: + query_args['term'] = '' + else: # Otherwise, use db search for everything + query_args['term'] = term or '' + print(query_args) + query = search_db(**query_args) + if rss: + return render_rss('/', query, use_elastic=False) + else: + rss_query_string = _generate_query_string(term, category, quality_filter, user_name) + # Use elastic is always false here because we only hit this section + # if we're browsing without a search term (which means we default to DB) + # or if ES is disabled + return flask.render_template('home.html', + use_elastic=False, + torrent_query=query, + search=query_args, + rss_filter=rss_query_string) @app.route('/user/') @@ -320,6 +210,10 @@ def view_user(user_name): if page: page = int(page) + per_page = app.config.get('RESULTS_PER_PAGE') + if not per_page: + per_page = DEFAULT_PER_PAGE + query_args = { 'term': term or '', 'user': user.id, @@ -328,40 +222,83 @@ def view_user(user_name): 'category': category or '0_0', 'quality_filter': quality_filter or '0', 'page': page or 1, - 'rss': False + 'rss': False, + 'per_page': per_page } - # God mode - if flask.g.user and flask.g.user.is_admin: - query_args['admin'] = True - - query = search(**query_args) + if flask.g.user: + query_args['logged_in_user'] = flask.g.user + if flask.g.user.is_admin: # God mode + query_args['admin'] = True + # Use elastic search for term searching rss_query_string = _generate_query_string(term, category, quality_filter, user_name) - return flask.render_template('user.html', - torrent_query=query, - search=query_args, - user=user, - user_page=True, - rss_filter=rss_query_string) + use_elastic = app.config.get('USE_ELASTIC_SEARCH') + if use_elastic and term: + query_args['term'] = term + + max_search_results = app.config.get('ES_MAX_SEARCH_RESULT') + if not max_search_results: + max_search_results = DEFAULT_MAX_SEARCH_RESULT + + max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages + + query_args['page'] = max_page + query_args['max_search_results'] = max_search_results + + query_results = search_elastic(**query_args) + + max_results = min(max_search_results, query_results['hits']['total']) + # change p= argument to whatever you change page_parameter to or pagination breaks + pagination = Pagination(p=query_args['page'], per_page=per_page, + total=max_results, bs_version=3, page_parameter='p', + display_msg=SERACH_PAGINATE_DISPLAY_MSG) + return flask.render_template('user.html', + use_elastic=True, + pagination=pagination, + torrent_query=query_results, + search=query_args, + user=user, + user_page=True, + rss_filter=rss_query_string) + # Similar logic as home page + else: + if use_elastic: + query_args['term'] = '' + else: + query_args['term'] = term or '' + query = search_db(**query_args) + return flask.render_template('user.html', + use_elastic=False, + torrent_query=query, + search=query_args, + user=user, + user_page=True, + rss_filter=rss_query_string) @app.template_filter('rfc822') def _jinja2_filter_rfc822(date, fmt=None): return formatdate(float(date.strftime('%s'))) +@app.template_filter('rfc822_es') +def _jinja2_filter_rfc822(datestr, fmt=None): + return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s'))) -def render_rss(label, query): + +def render_rss(label, query, use_elastic): + print(query) rss_xml = flask.render_template('rss.xml', + use_elastic=use_elastic, term=label, site_url=flask.request.url_root, - query=query) + torrent_query=query) response = flask.make_response(rss_xml) response.headers['Content-Type'] = 'application/xml' return response -#@app.route('/about', methods=['GET']) +# @app.route('/about', methods=['GET']) # def about(): # return flask.render_template('about.html') @@ -485,7 +422,6 @@ def activate_user(payload): user.status = models.UserStatusType.ACTIVE - db.session.add(user) db.session.commit() diff --git a/nyaa/search.py b/nyaa/search.py new file mode 100644 index 0000000..7369fd3 --- /dev/null +++ b/nyaa/search.py @@ -0,0 +1,317 @@ +import flask +import re +import math +import json +import shlex + +from nyaa import app, db +from nyaa import models + +import sqlalchemy_fulltext.modes as FullTextMode +from sqlalchemy_fulltext import FullTextSearch +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search, Q + + +def search_elastic(term='', user=None, sort='id', order='desc', + category='0_0', quality_filter='0', page=1, + rss=False, admin=False, logged_in_user=None, + per_page=75, max_search_results=1000): + # This function can easily be memcached now + + es_client = Elasticsearch() + + es_sort_keys = { + 'id': 'id', + 'size': 'filesize', + # 'name': 'display_name', # This is slow and buggy + 'seeders': 'seed_count', + 'leechers': 'leech_count', + 'downloads': 'download_count' + } + + sort_ = sort.lower() + if sort_ not in es_sort_keys: + flask.abort(400) + + es_sort = es_sort_keys[sort] + + order_keys = { + 'desc': 'desc', + 'asc': 'asc' + } + + order_ = order.lower() + if order_ not in order_keys: + flask.abort(400) + + # Only allow ID, desc if RSS + if rss: + sort = es_sort_keys['id'] + order = 'desc' + + # funky, es sort is default asc, prefixed by '-' if desc + if 'desc' == order: + es_sort = '-' + es_sort + + # Quality filter + quality_keys = [ + '0', # Show all + '1', # No remakes + '2', # Only trusted + '3' # Only completed + ] + + if quality_filter.lower() not in quality_keys: + flask.abort(400) + + quality_filter = int(quality_filter) + + # Category filter + main_category = None + sub_category = None + main_cat_id = 0 + sub_cat_id = 0 + if category: + cat_match = re.match(r'^(\d+)_(\d+)$', category) + if not cat_match: + flask.abort(400) + + main_cat_id = int(cat_match.group(1)) + sub_cat_id = int(cat_match.group(2)) + + if main_cat_id > 0: + if sub_cat_id > 0: + sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id) + if not sub_category: + flask.abort(400) + else: + main_category = models.MainCategory.by_id(main_cat_id) + if not main_category: + flask.abort(400) + + # This might be useless since we validate users + # before coming into this method, but just to be safe... + if user: + user = models.User.by_id(user) + if not user: + flask.abort(404) + user = user.id + + same_user = False + if logged_in_user: + same_user = user == logged_in_user.id + + s = Search(using=es_client, index=app.config.get('ES_INDEX_NAME')) # todo, sukebei prefix + + # Apply search term + if term: + s = s.query('simple_query_string', + analyzer='my_search_analyzer', + default_operator="AND", + query=term) + + # User view (/user/username) + if user: + s = s.filter('term', uploader_id=user) + + if not admin: + # Hide all DELETED torrents if regular user + s = s.filter('term', deleted=False) + # If logged in user is not the same as the user being viewed, + # show only torrents that aren't hidden or anonymous. + # + # If logged in user is the same as the user being viewed, + # show all torrents including hidden and anonymous ones. + # + # On RSS pages in user view, show only torrents that + # aren't hidden or anonymous no matter what + if not same_user or rss: + s = s.filter('term', hidden=False) + s = s.filter('term', anonymous=False) + # General view (homepage, general search view) + else: + if not admin: + # Hide all DELETED torrents if regular user + s = s.filter('term', deleted=False) + # If logged in, show all torrents that aren't hidden unless they belong to you + # On RSS pages, show all public torrents and nothing more. + if logged_in_user and not rss: + hiddenFilter = Q('term', hidden=False) + userFilter = Q('term', uploader_id=logged_in_user.id) + combinedFilter = hiddenFilter | userFilter + s = s.filter('bool', filter=[combinedFilter]) + else: + s = s.filter('term', hidden=False) + + if main_category: + s = s.filter('term', main_category_id=main_cat_id) + elif sub_category: + s = s.filter('term', main_category_id=main_cat_id) + s = s.filter('term', sub_category_id=sub_cat_id) + + if quality_filter == 0: + pass + elif quality_filter == 1: + s = s.filter('term', remake=False) + elif quality_filter == 2: + s = s.filter('term', trusted=True) + elif quality_filter == 3: + s = s.filter('term', complete=True) + + # Apply sort + s = s.sort(es_sort) + + # Only show first RESULTS_PER_PAGE items for RSS + if rss: + s = s[0:per_page] + else: + max_page = min(page, int(math.ceil(max_search_results / float(per_page)))) + from_idx = (max_page-1)*per_page + to_idx = min(max_search_results, max_page*per_page) + s = s[from_idx:to_idx] + + highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT') + if highlight: + s = s.highlight_options(tags_schema='styled') + s = s.highlight("display_name") + + # Return query, uncomment print line to debug query + from pprint import pprint + print(json.dumps(s.to_dict())) + return s.execute() + + +def search_db(term='', user=None, sort='id', order='desc', category='0_0', + quality_filter='0', page=1, rss=False, admin=False, + logged_in_user=None, per_page=75): + sort_keys = { + 'id': models.Torrent.id, + 'size': models.Torrent.filesize, + # 'name': models.Torrent.display_name, # Disable this because we disabled this in search_elastic, for the sake of consistency + 'seeders': models.Statistic.seed_count, + 'leechers': models.Statistic.leech_count, + 'downloads': models.Statistic.download_count + } + + sort_ = sort.lower() + if sort_ not in sort_keys: + flask.abort(400) + sort = sort_keys[sort] + + order_keys = { + 'desc': 'desc', + 'asc': 'asc' + } + + order_ = order.lower() + if order_ not in order_keys: + flask.abort(400) + + filter_keys = { + '0': None, + '1': (models.TorrentFlags.REMAKE, False), + '2': (models.TorrentFlags.TRUSTED, True), + '3': (models.TorrentFlags.COMPLETE, True) + } + + sentinel = object() + filter_tuple = filter_keys.get(quality_filter.lower(), sentinel) + if filter_tuple is sentinel: + flask.abort(400) + + if user: + user = models.User.by_id(user) + if not user: + flask.abort(404) + user = user.id + + main_category = None + sub_category = None + main_cat_id = 0 + sub_cat_id = 0 + if category: + cat_match = re.match(r'^(\d+)_(\d+)$', category) + if not cat_match: + flask.abort(400) + + main_cat_id = int(cat_match.group(1)) + sub_cat_id = int(cat_match.group(2)) + + if main_cat_id > 0: + if sub_cat_id > 0: + sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id) + else: + main_category = models.MainCategory.by_id(main_cat_id) + + if not category: + flask.abort(400) + + # Force sort by id desc if rss + if rss: + sort = sort_keys['id'] + order = 'desc' + + same_user = False + if logged_in_user: + same_user = logged_in_user.id == user + + if term: + query = db.session.query(models.TorrentNameSearch) + else: + query = models.Torrent.query + + # User view (/user/username) + if user: + query = query.filter(models.Torrent.uploader_id == user) + + if not admin: + # Hide all DELETED torrents if regular user + query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False)) + # If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous + # If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones + # On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what + if not same_user or rss: + query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN | + models.TorrentFlags.ANONYMOUS)).is_(False)) + # General view (homepage, general search view) + else: + if not admin: + # Hide all DELETED torrents if regular user + query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False)) + # If logged in, show all torrents that aren't hidden unless they belong to you + # On RSS pages, show all public torrents and nothing more. + if logged_in_user and not rss: + query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) | + (models.Torrent.uploader_id == logged_in_user.id)) + # Otherwise, show all torrents that aren't hidden + else: + query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) + + if main_category: + query = query.filter(models.Torrent.main_category_id == main_cat_id) + elif sub_category: + query = query.filter((models.Torrent.main_category_id == main_cat_id) & + (models.Torrent.sub_category_id == sub_cat_id)) + + if filter_tuple: + query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1])) + + if term: + for item in shlex.split(term, posix=False): + if len(item) >= 2: + query = query.filter(FullTextSearch( + item, models.TorrentNameSearch, FullTextMode.NATURAL)) + + # Sort and order + if sort.class_ != models.Torrent: + query = query.join(sort.class_) + + query = query.order_by(getattr(sort, order)()) + + if rss: + query = query.limit(per_page) + else: + query = query.paginate_faste(page, per_page=per_page, step=5) + + return query diff --git a/nyaa/templates/rss.xml b/nyaa/templates/rss.xml index 266e524..c495cda 100644 --- a/nyaa/templates/rss.xml +++ b/nyaa/templates/rss.xml @@ -4,20 +4,32 @@ RSS Feed for {{ term }} {{ url_for('home', _external=True) }} - {% for torrent in query %} + {% for torrent in torrent_query %} {% if torrent.has_torrent %} {{ torrent.display_name }} + {% if use_elastic %} + {{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }} + {{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }} + {{ torrent.created_time|rfc822_es }} + {% else %} {{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }} {{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }} {{ torrent.created_time|rfc822 }} + {% endif %} {% else %} {{ torrent.display_name }} + {% if use_elastic %} + {{ torrent.info_hash }} + {{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }} + {{ torrent.created_time|rfc822_es }} + {% else %} {{ torrent.magnet_uri }} {{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }} {{ torrent.created_time|rfc822 }} + {% endif %} {% endif %} {% endfor %} diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html index 8d6f9da..e8e08be 100644 --- a/nyaa/templates/search_results.html +++ b/nyaa/templates/search_results.html @@ -8,7 +8,7 @@ {{ caller() }} {% endmacro %} -{% if torrent_query.hits.total > 0 %} +{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
@@ -16,7 +16,7 @@ {% call render_column_header("hdr-category", "width:80px;", center_text=True) %}
Category
{% endcall %} - {% call render_column_header("hdr-name", "width:auto;", sort_key="name") %} + {% call render_column_header("hdr-name", "width:auto;") %}
Name
{% endcall %} {% call render_column_header("hdr-link", "width:0;", center_text=True) %} @@ -45,26 +45,46 @@ - {% for torrent in torrent_query %} + {% set torrents = torrent_query if use_elastic else torrent_query.items %} + {% for torrent in torrents %} - {% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) %} + {% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) if use_elastic else (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %} {% set icon_dir = config.SITE_FLAVOR %} + {% if use_elastic %} + {% else %} + + {% endif %} + {% if use_elastic %} + {% else %} + + {% endif %} + {% if config.ENABLE_SHOW_STATS %} + {% if use_elastic %} + {% else %} + + + + {% endif %} {% endif %} {% endfor %} @@ -75,9 +95,12 @@

No results found

{% endif %} -{#
+ {% if use_elastic %} + {{ pagination.info }} + {{ pagination.links }} + {% else %} {% from "bootstrap/pagination.html" import render_pagination %} {{ render_pagination(torrent_query) }} + {% endif %}
-#} diff --git a/requirements.txt b/requirements.txt index af89eab..843b935 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,7 +32,9 @@ visitor==0.1.3 webassets==0.12.1 Werkzeug==0.12.1 WTForms==2.1 -## The following requirements were added by pip freeze: +## elasticsearch dependencies elasticsearch==5.3.0 elasticsearch-dsl==5.2.0 -progressbar2==3.20.0 \ No newline at end of file +progressbar2==3.20.0 +mysql-replication==0.13 +flask-paginate==0.4.5 \ No newline at end of file diff --git a/sync_es.py b/sync_es.py index 81ad17f..45c98a5 100644 --- a/sync_es.py +++ b/sync_es.py @@ -40,7 +40,12 @@ log.setLevel(logging.INFO) #logging.getLogger('elasticsearch').setLevel(logging.DEBUG) # in prod want in /var/lib somewhere probably -SAVE_LOC = "/tmp/sync_es_position.json" +SAVE_LOC = "/var/lib/sync_es_position.json" +MYSQL_HOST = '127.0.0.1' +MYSQL_PORT = 3306 +MYSQL_USER = 'test' +MYSQL_PW = 'test123' +NT_DB = 'nyaav2' with open(SAVE_LOC) as f: pos = json.load(f) @@ -50,16 +55,16 @@ es = Elasticsearch() stream = BinLogStreamReader( # TODO parse out from config.py or something connection_settings = { - 'host': '127.0.0.1', - 'port': 13306, - 'user': 'root', - 'passwd': 'dunnolol' + 'host': MYSQL_HOST, + 'port': MYSQL_PORT, + 'user': MYSQL_USER, + 'passwd': MYSQL_PW }, server_id=10, # arbitrary - # only care about this table currently - only_schemas=["nyaav2"], - # TODO sukebei - only_tables=["nyaa_torrents", "nyaa_statistics"], + # only care about this database currently + only_schemas=[NT_DB], + # these tables in the database + only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"], # from our save file resume_stream=True, log_file=pos['log_file'], @@ -72,7 +77,7 @@ stream = BinLogStreamReader( # using aiomysql if anybody wants to revive that. blocking=True) -def reindex_torrent(t): +def reindex_torrent(t, index_name): # XXX annoyingly different from import_to_es, and # you need to keep them in sync manually. f = t['flags'] @@ -103,14 +108,14 @@ def reindex_torrent(t): } # update, so we don't delete the stats if present es.update( - index='nyaav2', + index=index_name, doc_type='torrent', id=t['id'], body={"doc": doc, "doc_as_upsert": True}) -def reindex_stats(s): +def reindex_stats(s, index_name): es.update( - index='nyaav2', + index=index_name, doc_type='torrent', id=s['torrent_id'], body={ @@ -126,21 +131,29 @@ last_save = time.time() for event in stream: for row in event.rows: - if event.table == "nyaa_torrents": + if event.table == "nyaa_torrents" or event.table == "sukebei_torrents": + if event.table == "nyaa_torrents": + index_name = "nyaa" + else: + index_name = "sukebei" if type(event) is WriteRowsEvent: - reindex_torrent(row['values']) + reindex_torrent(row['values'], index_name) elif type(event) is UpdateRowsEvent: - reindex_torrent(row['after_values']) + reindex_torrent(row['after_values'], index_name) elif type(event) is DeleteRowsEvent: # just delete it - es.delete(index='nyaav2', doc_type='torrent', id=row['values']['id']) + es.delete(index=index_name, doc_type='torrent', id=row['values']['id']) else: raise Exception(f"unknown event {type(event)}") - elif event.table == "nyaa_statistics": + elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics": + if event.table == "nyaa_torrents": + index_name = "nyaa" + else: + index_name = "sukebei" if type(event) is WriteRowsEvent: - reindex_stats(row['values']) + reindex_stats(row['values'], index_name) elif type(event) is UpdateRowsEvent: - reindex_stats(row['after_values']) + reindex_stats(row['after_values'], index_name) elif type(event) is DeleteRowsEvent: # uh ok. assume that the torrent row will get deleted later. pass From 200517435863d9411ada755621809bbe0e0db964 Mon Sep 17 00:00:00 2001 From: aldacron Date: Tue, 16 May 2017 00:46:25 -0700 Subject: [PATCH 12/68] finished up rss, changed rss behavior to include pre-defined trackers only, also cleaned up debug statements --- nyaa/routes.py | 3 +-- nyaa/search.py | 4 ++-- nyaa/templates/rss.xml | 2 +- nyaa/templates/search_results.html | 4 ++++ nyaa/torrents.py | 34 +++++++++++++++++++++++++++++- 5 files changed, 41 insertions(+), 6 deletions(-) diff --git a/nyaa/routes.py b/nyaa/routes.py index 5fbcb5c..edc302b 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -178,7 +178,7 @@ def home(rss): query_args['term'] = '' else: # Otherwise, use db search for everything query_args['term'] = term or '' - print(query_args) + query = search_db(**query_args) if rss: return render_rss('/', query, use_elastic=False) @@ -287,7 +287,6 @@ def _jinja2_filter_rfc822(datestr, fmt=None): def render_rss(label, query, use_elastic): - print(query) rss_xml = flask.render_template('rss.xml', use_elastic=use_elastic, term=label, diff --git a/nyaa/search.py b/nyaa/search.py index 7369fd3..e6353c5 100644 --- a/nyaa/search.py +++ b/nyaa/search.py @@ -177,8 +177,8 @@ def search_elastic(term='', user=None, sort='id', order='desc', s = s.highlight("display_name") # Return query, uncomment print line to debug query - from pprint import pprint - print(json.dumps(s.to_dict())) + # from pprint import pprint + # print(json.dumps(s.to_dict())) return s.execute() diff --git a/nyaa/templates/rss.xml b/nyaa/templates/rss.xml index c495cda..e1787d2 100644 --- a/nyaa/templates/rss.xml +++ b/nyaa/templates/rss.xml @@ -22,7 +22,7 @@ {{ torrent.display_name }} {% if use_elastic %} - {{ torrent.info_hash }} + {{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }} {{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }} {{ torrent.created_time|rfc822_es }} {% else %} diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html index e8e08be..cc0988f 100644 --- a/nyaa/templates/search_results.html +++ b/nyaa/templates/search_results.html @@ -66,7 +66,11 @@ {% endif %} {% if use_elastic %} diff --git a/nyaa/torrents.py b/nyaa/torrents.py index a8ad6d0..3d35cc7 100644 --- a/nyaa/torrents.py +++ b/nyaa/torrents.py @@ -3,6 +3,7 @@ import base64 import time from urllib.parse import urlencode from orderedset import OrderedSet +from nyaa import app from nyaa import bencode from nyaa import app @@ -53,10 +54,23 @@ def get_trackers(torrent): return list(trackers) +def get_trackers_magnet(): + trackers = OrderedSet() + + # Our main one first + main_announce_url = app.config.get('MAIN_ANNOUNCE_URL') + if main_announce_url: + trackers.add(main_announce_url) + + # and finally our tracker list + trackers.update(default_trackers()) + + return list(trackers) + def create_magnet(torrent, max_trackers=5, trackers=None): if trackers is None: - trackers = get_trackers(torrent) + trackers = get_trackers_magnet() magnet_parts = [ ('dn', torrent.display_name) @@ -68,6 +82,24 @@ def create_magnet(torrent, max_trackers=5, trackers=None): return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts) +# For processing ES links +@app.context_processor +def create_magnet_from_info(): + def _create_magnet_from_info(display_name, info_hash, max_trackers=5, trackers=None): + if trackers is None: + trackers = get_trackers_magnet() + + magnet_parts = [ + ('dn', display_name) + ] + for tracker in trackers[:max_trackers]: + magnet_parts.append(('tr', tracker)) + + b32_info_hash = base64.b32encode(bytes.fromhex(info_hash)).decode('utf-8') + return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts) + return dict(create_magnet_from_info=_create_magnet_from_info) + + def create_default_metadata_base(torrent, trackers=None): if trackers is None: trackers = get_trackers(torrent) From 1d0177480eae96aadf4ade0aa28bd42b32f35e63 Mon Sep 17 00:00:00 2001 From: aldacron Date: Tue, 16 May 2017 01:04:08 -0700 Subject: [PATCH 13/68] updated time display --- nyaa/routes.py | 12 ++++++++++++ nyaa/templates/search_results.html | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/nyaa/routes.py b/nyaa/routes.py index edc302b..6cee9c4 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -98,6 +98,18 @@ def _generate_query_string(term, category, filter, user): return params +@app.template_filter('utc_time') +def get_utc_timestamp(datetime_str): + ''' Returns a UTC POSIX timestamp, as seconds ''' + UTC_EPOCH = datetime.utcfromtimestamp(0) + return int((datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S') - UTC_EPOCH).total_seconds()) + + +@app.template_filter('display_time') +def get_display_time(datetime_str): + return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d %H:%M') + + @app.route('/rss', defaults={'rss': True}) @app.route('/', defaults={'rss': False}) def home(rss): diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html index cc0988f..4b5663f 100644 --- a/nyaa/templates/search_results.html +++ b/nyaa/templates/search_results.html @@ -74,7 +74,7 @@ {% if use_elastic %} - + {% else %} {% endif %} From 261490daa75a49c1c976751f269234e3c7c5577d Mon Sep 17 00:00:00 2001 From: Kfir Hadas Date: Tue, 16 May 2017 11:43:10 +0300 Subject: [PATCH 14/68] Fix padding for single file in torrent file list --- nyaa/templates/view.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nyaa/templates/view.html b/nyaa/templates/view.html index fd64979..dfeb533 100644 --- a/nyaa/templates/view.html +++ b/nyaa/templates/view.html @@ -92,7 +92,7 @@   {{ key }} {{ loop(value.items()) }} {%- else %} - {%- endif %} From a87c192435d97122c29d6b59cbd544a540dcc05e Mon Sep 17 00:00:00 2001 From: nyaadev0 Date: Tue, 16 May 2017 04:10:06 -0500 Subject: [PATCH 15/68] Add Safari pinned-tab icon --- nyaa/static/pinned-tab.svg | Bin 0 -> 2856 bytes nyaa/templates/layout.html | 1 + 2 files changed, 1 insertion(+) create mode 100644 nyaa/static/pinned-tab.svg diff --git a/nyaa/static/pinned-tab.svg b/nyaa/static/pinned-tab.svg new file mode 100644 index 0000000000000000000000000000000000000000..28034c3db093b555f973cb48257ed3d704204b15 GIT binary patch literal 2856 zcmZ{m-BR2)48`wxik-c$k^CXsnUIU_H4o7lXxJ%*455&|eSgQhEz8o(gzW#=vaF9I z$)CT!eBJ!`^Zw!c)%NcB`P-+<%h%V}^jgy6)7Q&vW|v584&dgtiAC))X!A9Wx-a$om|w^I+2T1vZC9P4Xmzsfqg&}45Hn}90e7nhhqyF3EvEU zt6YqNMnZ&#N*^R4m326|`|ta(UXsr?PQMQ_1-F~R;WIaKQd0>oRP;>Yt7+uLgC}F%9pa+DW8N9w4KgqW_dC&nh?9KfD8kW75kY;q1+D{v zRyc;Uf`~y_n2LIE@C%N9;3D259%N@x6Jr|N9QFfM%Mk7bf-5zitF&YL!d5Vf%gtO` zv-f4o3B>eK9ZR9R^{&hl2qHllWFmTo5wAoJPgUC31g1=N3B1zrmLqgmiLS7`A@)c( zg~E-U8h#IvJ==A!aC=Zyuy@co-k^j{*L8>)GLj0$iAa59*<=-x0ZUiwm~&9yph+r# zO=ON#jaUM6hCzZO5N<|m5$vpX@{?;;wRFfUS{P1KOFgPsf`6*1LFCX>MpjX(Cb1z( zc94fv8C*LCtAJrYFpqMq=M{?6@@Dr0O8DpST8W~+WlV0~)Se7C{Qwt7pdya4oyBmY zJ;Vw}%R}Ao1ZuYtW)+C)ZDqNtYfAZ&u5WNj_n=BCr!toA(FVK~hKOh@jeHQrQ5I!_ zu6;}<-J14tf+fyr_)N1e1QJ)W2kJV(BWPe6G{%AB(23Q6H4EgJ2wNyBZgX4GZelih z8E{d736ZZKrQu*-{I(UPIjC#ZvC-pm%GPijb~_HJ>}!SPV&k#RQ0Kb4bVdj*+(|F6 z4pwPW7)-kmLEzyT-kssqb>Q*hq<-Ye`;?x*v)waz&Gdz-GNl(9x`r02Fr4nGhVH{x z?Rge5HoP(g-#*u>M&=rcMkVVLfgF=)(|c$(^yetFW8%eW(7Q|D!?WEpcum7Mdid%< zV>r=dV)Uk7&*beSwh+}hS9fgW{lkJz%)l!Sbej+jmAr)(C#oI|RGm~vfF8gVEX-l+ z1Z{auop(ko74NM&GxLMQhlOho%}6Ffj-59K)0(B3&Ik`Nq2t{mONQ=RD3meQGvW#- z2ty3HjsN()*5?|L4DEb4>1y-9GO4zbGCgbw#?bRe8|q@HrYCSG!;}WR0@U2$xkAlr zW?2)OSIY<;X(`hQw9uyBV=P3G8@2Gf6NK@+7v&-&yXa-Lz~LRRI|av`6^YaDqtJ)- QQC2vr@f4^M(1Be(tNdN!< literal 0 HcmV?d00001 diff --git a/nyaa/templates/layout.html b/nyaa/templates/layout.html index 232d2d5..806c0bc 100644 --- a/nyaa/templates/layout.html +++ b/nyaa/templates/layout.html @@ -8,6 +8,7 @@ + From 472b78bac1c0138f104d63fe18bbeb47edcc6e02 Mon Sep 17 00:00:00 2001 From: nyaadev0 Date: Tue, 16 May 2017 04:27:13 -0500 Subject: [PATCH 16/68] Support legacy arguments in search route --- nyaa/routes.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/nyaa/routes.py b/nyaa/routes.py index 48c8428..64809b6 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -116,15 +116,13 @@ def home(rss): if flask.request.args.get('page') == 'rss': rss = True - term = flask.request.args.get('q') + term = flask.request.args.get('q', flask.request.args.get('term')) sort = flask.request.args.get('s') order = flask.request.args.get('o') - category = flask.request.args.get('c') - quality_filter = flask.request.args.get('f') - user_name = flask.request.args.get('u') - page = flask.request.args.get('p') - if page: - page = int(page) + category = flask.request.args.get('c', flask.request.args.get('cats')) + quality_filter = flask.request.args.get('f', flask.request.args.get('filter')) + user_name = flask.request.args.get('u', flask.request.args.get('user')) + page = flask.request.args.get('p', flask.request.args.get('offset', 1, int), int) per_page = app.config.get('RESULTS_PER_PAGE') if not per_page: @@ -143,7 +141,7 @@ def home(rss): 'order': order or 'desc', 'category': category or '0_0', 'quality_filter': quality_filter or '0', - 'page': page or 1, + 'page': page, 'rss': rss, 'per_page': per_page } From a79c0f8a9368b84aee897300eaeca44660912f96 Mon Sep 17 00:00:00 2001 From: sharkykh Date: Sun, 14 May 2017 09:25:01 +0300 Subject: [PATCH 17/68] PEP8 (a run of lint.sh) --- nyaa/api_handler.py | 5 +++-- nyaa/backend.py | 7 ++++--- nyaa/fix_paginate.py | 2 ++ nyaa/forms.py | 7 ++++++- nyaa/models.py | 12 ++++++++---- nyaa/routes.py | 3 +-- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/nyaa/api_handler.py b/nyaa/api_handler.py index 7ecd720..f512192 100644 --- a/nyaa/api_handler.py +++ b/nyaa/api_handler.py @@ -297,14 +297,15 @@ def api_upload(upload_request): # Store tracker refs in DB for order, tracker in enumerate(db_trackers): torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, - tracker_id=tracker.id, order=order) + tracker_id=tracker.id, order=order) db.session.add(torrent_tracker) db.session.commit() if app.config.get('BACKUP_TORRENT_FOLDER'): torrent_file.seek(0, 0) - torrent_path = os.path.join(app.config['BACKUP_TORRENT_FOLDER'], '{}.{}'.format(torrent.id, secure_filename(torrent_file.filename))) + torrent_path = os.path.join(app.config['BACKUP_TORRENT_FOLDER'], '{}.{}'.format( + torrent.id, secure_filename(torrent_file.filename))) torrent_file.save(torrent_path) torrent_file.close() diff --git a/nyaa/backend.py b/nyaa/backend.py index 240a895..20e5dfe 100644 --- a/nyaa/backend.py +++ b/nyaa/backend.py @@ -142,7 +142,7 @@ def handle_torrent_upload(upload_form, uploading_user=None): # Store tracker refs in DB for order, tracker in enumerate(db_trackers): torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, - tracker_id=tracker.id, order=order) + tracker_id=tracker.id, order=order) db.session.add(torrent_tracker) db.session.commit() @@ -156,8 +156,9 @@ def handle_torrent_upload(upload_form, uploading_user=None): if not os.path.exists(torrent_dir): os.makedirs(torrent_dir) - torrent_path = os.path.join(torrent_dir, '{}.{}'.format(torrent.id, secure_filename(torrent_file.filename))) + torrent_path = os.path.join(torrent_dir, '{}.{}'.format( + torrent.id, secure_filename(torrent_file.filename))) torrent_file.save(torrent_path) torrent_file.close() - return torrent \ No newline at end of file + return torrent diff --git a/nyaa/fix_paginate.py b/nyaa/fix_paginate.py index 38b7808..59334b3 100644 --- a/nyaa/fix_paginate.py +++ b/nyaa/fix_paginate.py @@ -1,6 +1,7 @@ from flask_sqlalchemy import Pagination, BaseQuery from flask import abort + def paginate_faste(self, page=1, per_page=50, max_page=None, step=5): if page < 1: abort(404) @@ -25,4 +26,5 @@ def paginate_faste(self, page=1, per_page=50, max_page=None, step=5): return Pagination(self, page, per_page, total, items) + BaseQuery.paginate_faste = paginate_faste diff --git a/nyaa/forms.py b/nyaa/forms.py index 6a6508a..3bf38a1 100644 --- a/nyaa/forms.py +++ b/nyaa/forms.py @@ -209,7 +209,7 @@ class UploadForm(FlaskForm): # Decode and ensure data is bencoded data try: torrent_dict = bencode.decode(field.data) - #field.data.close() + # field.data.close() except (bencode.MalformedBencodeException, UnicodeError): raise ValidationError('Malformed torrent file') @@ -266,7 +266,12 @@ class TorrentFileData(object): # https://wiki.theory.org/BitTorrentSpecification#Metainfo_File_Structure +<<<<<<< master def _validate_trackers(torrent_dict, tracker_to_check_for=None): +======= + +def _validate_trackers(torrent_dict): +>>>>>>> PEP8 (a run of lint.sh) announce = torrent_dict.get('announce') announce_string = _validate_bytes(announce, 'announce', 'utf-8') diff --git a/nyaa/models.py b/nyaa/models.py index 34fac59..e6d714d 100644 --- a/nyaa/models.py +++ b/nyaa/models.py @@ -41,8 +41,10 @@ class TorrentFlags(IntEnum): COMPLETE = 16 DELETED = 32 + DB_TABLE_PREFIX = app.config['TABLE_PREFIX'] + class Torrent(db.Model): __tablename__ = DB_TABLE_PREFIX + 'torrents' @@ -118,7 +120,6 @@ class Torrent(db.Model): # Escaped return escape_markup(self.information) - @property def magnet_uri(self): return create_magnet(self) @@ -224,7 +225,8 @@ class Trackers(db.Model): __tablename__ = 'trackers' id = db.Column(db.Integer, primary_key=True) - uri = db.Column(db.String(length=255, collation=COL_UTF8_GENERAL_CI), nullable=False, unique=True) + uri = db.Column(db.String(length=255, collation=COL_UTF8_GENERAL_CI), + nullable=False, unique=True) disabled = db.Column(db.Boolean, nullable=False, default=False) @classmethod @@ -235,8 +237,10 @@ class Trackers(db.Model): class TorrentTrackers(db.Model): __tablename__ = DB_TABLE_PREFIX + 'torrent_trackers' - torrent_id = db.Column(db.Integer, db.ForeignKey(DB_TABLE_PREFIX + 'torrents.id', ondelete="CASCADE"), primary_key=True) - tracker_id = db.Column(db.Integer, db.ForeignKey('trackers.id', ondelete="CASCADE"), primary_key=True) + torrent_id = db.Column(db.Integer, db.ForeignKey( + DB_TABLE_PREFIX + 'torrents.id', ondelete="CASCADE"), primary_key=True) + tracker_id = db.Column(db.Integer, db.ForeignKey( + 'trackers.id', ondelete="CASCADE"), primary_key=True) order = db.Column(db.Integer, nullable=False, index=True) tracker = db.relationship('Trackers', uselist=False, lazy='joined') diff --git a/nyaa/routes.py b/nyaa/routes.py index 48c8428..17bc9cc 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -60,7 +60,6 @@ def filter_truthy(input_list): the search_results.html template ''' return [item for item in input_list if item] - @app.errorhandler(404) def not_found(error): return flask.render_template('404.html'), 404 @@ -619,7 +618,7 @@ def site_help(): #################################### API ROUTES #################################### # DISABLED FOR NOW -@app.route('/api/upload', methods = ['POST']) +@app.route('/api/upload', methods=['POST']) def api_upload(): api_response = api_handler.api_upload(flask.request) return api_response \ No newline at end of file From 1b99908283cd83c89210c9b7b03d28e92041e6b6 Mon Sep 17 00:00:00 2001 From: sharkykh Date: Sun, 14 May 2017 09:33:46 +0300 Subject: [PATCH 18/68] PEP8 routes.py (E501) line too long --- nyaa/routes.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/nyaa/routes.py b/nyaa/routes.py index 17bc9cc..703237d 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -60,6 +60,7 @@ def filter_truthy(input_list): the search_results.html template ''' return [item for item in input_list if item] + @app.errorhandler(404) def not_found(error): return flask.render_template('404.html'), 404 @@ -327,7 +328,8 @@ def login(): if not user: user = models.User.by_email(username) - if not user or password != user.password_hash or user.status == models.UserStatusType.INACTIVE: + if (not user or password != user.password_hash + or user.status == models.UserStatusType.INACTIVE): flask.flash(flask.Markup( 'Login failed! Incorrect username or password.'), 'danger') return flask.redirect(flask.url_for('login')) @@ -505,7 +507,8 @@ def edit_torrent(torrent_id): if flask.request.method == 'POST' and form.validate(): # Form has been sent, edit torrent with data. - torrent.main_category_id, torrent.sub_category_id = form.category.parsed_data.get_category_ids() + torrent.main_category_id, torrent.sub_category_id = \ + form.category.parsed_data.get_category_ids() torrent.display_name = (form.display_name.data or '').strip() torrent.information = (form.information.data or '').strip() torrent.description = (form.description.data or '').strip() @@ -532,7 +535,10 @@ def edit_torrent(torrent_id): form.is_complete.data = torrent.complete form.is_anonymous.data = torrent.anonymous - return flask.render_template('edit.html', form=form, torrent=torrent, admin=flask.g.user.is_admin) + return flask.render_template('edit.html', + form=form, + torrent=torrent, + admin=flask.g.user.is_admin) @app.route('/view//magnet') @@ -584,8 +590,10 @@ def get_activation_link(user): def send_verification_email(to_address, activ_link): - ''' this is until we have our own mail server, obviously. This can be greatly cut down if on same machine. - probably can get rid of all but msg formatting/building, init line and sendmail line if local SMTP server ''' + ''' this is until we have our own mail server, obviously. + This can be greatly cut down if on same machine. + probably can get rid of all but msg formatting/building, + init line and sendmail line if local SMTP server ''' msg_body = 'Please click on: ' + activ_link + ' to activate your account.\n\n\nUnsubscribe:' From beb5be99896d15c82c9a5e3705392e9a80bf66be Mon Sep 17 00:00:00 2001 From: sharkykh Date: Sun, 14 May 2017 10:24:34 +0300 Subject: [PATCH 19/68] 8: PEP8 routes.py (E265, E266, E713) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E265 block comment should start with ‘# ‘ E266 too many leading ‘#’ for block comment E713 test for membership should be ‘not in’ --- nyaa/routes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nyaa/routes.py b/nyaa/routes.py index 703237d..0ab24e6 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -76,7 +76,7 @@ def before_request(): flask.g.user = user - if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now(): + if not 'timeout' not in flask.session or flask.session['timeout'] < datetime.now(): flask.session['timeout'] = datetime.now() + timedelta(days=7) flask.session.permanent = True flask.session.modified = True @@ -311,7 +311,7 @@ def render_rss(label, query, use_elastic): # @app.route('/about', methods=['GET']) # def about(): -# return flask.render_template('about.html') + # return flask.render_template('about.html') @app.route('/login', methods=['GET', 'POST']) @@ -613,7 +613,7 @@ def send_verification_email(to_address, activ_link): server.quit() -#################################### STATIC PAGES #################################### +# #################################### STATIC PAGES #################################### @app.route('/rules', methods=['GET']) def site_rules(): return flask.render_template('rules.html') @@ -624,7 +624,7 @@ def site_help(): return flask.render_template('help.html') -#################################### API ROUTES #################################### +# #################################### API ROUTES #################################### # DISABLED FOR NOW @app.route('/api/upload', methods=['POST']) def api_upload(): From b0325bc681e051c5aa21bd3657ab39f03de376cd Mon Sep 17 00:00:00 2001 From: sharkykh Date: Sun, 14 May 2017 10:37:12 +0300 Subject: [PATCH 20/68] PEP8 api_handler.py (E265, E266) E265 block comment should start with '# ' E266 too many leading '#' for block comment --- nyaa/api_handler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nyaa/api_handler.py b/nyaa/api_handler.py index f512192..6e8e875 100644 --- a/nyaa/api_handler.py +++ b/nyaa/api_handler.py @@ -10,7 +10,7 @@ from orderedset import OrderedSet from werkzeug import secure_filename DEBUG_API = False -#################################### API ROUTES #################################### +# #################################### API ROUTES #################################### CATEGORIES = [ ('Anime', ['Anime Music Video', 'English-translated', 'Non-English-translated', 'Raw']), ('Audio', ['Lossless', 'Lossy']), @@ -30,7 +30,7 @@ def validate_main_sub_cat(main_cat_name, sub_cat_name): cat_id = main_cat.id_as_string sub_cat_id = sub_cat.id_as_string cat_sub_cat = sub_cat_id.split('_') - #print('cat: {0} sub_cat: {1}'.format(cat_sub_cat[0], cat_sub_cat[1])) + # print('cat: {0} sub_cat: {1}'.format(cat_sub_cat[0], cat_sub_cat[1])) return True, cat_sub_cat[0], cat_sub_cat[1] @@ -309,7 +309,7 @@ def api_upload(upload_request): torrent_file.save(torrent_path) torrent_file.close() - #print('Success? {0}'.format(torrent.id)) + # print('Success? {0}'.format(torrent.id)) return flask.make_response(flask.jsonify({"Success": "Request was processed {0}".format(torrent.id)}), 200) except Exception as e: print('Exception: {0}'.format(e)) From a3be6ee89f639dd698d60f28cbffc567c8e06102 Mon Sep 17 00:00:00 2001 From: sharkykh Date: Sun, 14 May 2017 10:51:13 +0300 Subject: [PATCH 21/68] PEP8 api_handler.py (E501) E501 line too long --- nyaa/api_handler.py | 50 ++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/nyaa/api_handler.py b/nyaa/api_handler.py index 6e8e875..2ce1d70 100644 --- a/nyaa/api_handler.py +++ b/nyaa/api_handler.py @@ -112,17 +112,22 @@ def api_upload(upload_request): if DEBUG_API: print(json.dumps(j, indent=4)) - _json_keys = ['username', 'password', - 'display_name', 'main_cat', 'sub_cat', 'flags'] # 'information' and 'description' are not required + _json_keys = ['username', + 'password', + 'display_name', + 'main_cat', + 'sub_cat', + 'flags'] # 'information' and 'description' are not required # Check that required fields are present for _k in _json_keys: if _k not in j.keys(): - return flask.make_response(flask.jsonify({"Error": "Missing JSON field: {0}.".format(_k)}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Missing JSON field: {0}.".format(_k)}), 400) # Check that no extra fields are present for k in j.keys(): - if k not in ['username', 'password', - 'display_name', 'main_cat', 'sub_cat', 'information', 'description', 'flags']: - return flask.make_response(flask.jsonify({"Error": "Incorrect JSON field(s)."}), 400) + if k not in set(_json_keys + ['information', 'description']): + return flask.make_response(flask.jsonify( + {"Error": "Incorrect JSON field(s)."}), 400) else: return flask.make_response(flask.jsonify({"Error": "No metadata."}), 400) if 'torrent' in upload_request.files: @@ -143,14 +148,17 @@ def api_upload(upload_request): if not user: user = models.User.by_email(username) - if not user or password != user.password_hash or user.status == models.UserStatusType.INACTIVE: - return flask.make_response(flask.jsonify({"Error": "Incorrect username or password"}), 403) + if (not user or password != user.password_hash + or user.status == models.UserStatusType.INACTIVE): + return flask.make_response(flask.jsonify( + {"Error": "Incorrect username or password"}), 403) current_user = user display_name = j['display_name'] if (len(display_name) < 3) or (len(display_name) > 1024): - return flask.make_response(flask.jsonify({"Error": "Torrent name must be between 3 and 1024 characters."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Torrent name must be between 3 and 1024 characters."}), 400) main_cat_name = j['main_cat'] sub_cat_name = j['sub_cat'] @@ -158,14 +166,16 @@ def api_upload(upload_request): cat_subcat_status, cat_id, sub_cat_id = validate_main_sub_cat( main_cat_name, sub_cat_name) if not cat_subcat_status: - return flask.make_response(flask.jsonify({"Error": "Incorrect Category / Sub-Category."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Incorrect Category / Sub-Category."}), 400) # TODO Sanitize information information = None try: information = j['information'] if len(information) > 255: - return flask.make_response(flask.jsonify({"Error": "Information is limited to 255 characters."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Information is limited to 255 characters."}), 400) except Exception as e: information = '' @@ -173,8 +183,10 @@ def api_upload(upload_request): description = None try: description = j['description'] - if len(description) > (10 * 1024): - return flask.make_response(flask.jsonify({"Error": "Description is limited to {0} characters.".format(10 * 1024)}), 403) + limit = 10 * 1024 + if len(description) > limit: + return flask.make_response(flask.jsonify( + {"Error": "Description is limited to {0} characters.".format(limit)}), 403) except Exception as e: description = '' @@ -182,13 +194,15 @@ def api_upload(upload_request): if v_flags: torrent_flags = j['flags'] else: - return flask.make_response(flask.jsonify({"Error": "Incorrect torrent flags."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Incorrect torrent flags."}), 400) torrent_status, torrent_data = validate_torrent_file( torrent_file.filename, torrent_file.read()) # Needs validation if not torrent_status: - return flask.make_response(flask.jsonify({"Error": "Invalid or Duplicate torrent file."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Invalid or Duplicate torrent file."}), 400) # The torrent has been validated and is safe to access with ['foo'] etc - all relevant # keys and values have been checked for (see UploadForm in forms.py for details) @@ -310,9 +324,11 @@ def api_upload(upload_request): torrent_file.close() # print('Success? {0}'.format(torrent.id)) - return flask.make_response(flask.jsonify({"Success": "Request was processed {0}".format(torrent.id)}), 200) + return flask.make_response(flask.jsonify( + {"Success": "Request was processed {0}".format(torrent.id)}), 200) except Exception as e: print('Exception: {0}'.format(e)) - return flask.make_response(flask.jsonify({"Error": "Incorrect JSON. Please see HELP page for examples."}), 400) + return flask.make_response(flask.jsonify( + {"Error": "Incorrect JSON. Please see HELP page for examples."}), 400) else: return flask.make_response(flask.jsonify({"Error": "Bad request"}), 400) From 5e60847cb6049888e9aa176a1d25d24942f01600 Mon Sep 17 00:00:00 2001 From: sharkykh Date: Sun, 14 May 2017 11:14:47 +0300 Subject: [PATCH 22/68] More E501 line too long - multiple files nyaa/backend.py:75 nyaa/forms.py:129,175 nyaa/models.py:88 --- nyaa/backend.py | 3 ++- nyaa/forms.py | 6 ++++-- nyaa/models.py | 5 +++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/nyaa/backend.py b/nyaa/backend.py index 20e5dfe..5b10397 100644 --- a/nyaa/backend.py +++ b/nyaa/backend.py @@ -72,7 +72,8 @@ def handle_torrent_upload(upload_form, uploading_user=None): models.UserLevelType.TRUSTED) if uploading_user else False # Set category ids - torrent.main_category_id, torrent.sub_category_id = upload_form.category.parsed_data.get_category_ids() + torrent.main_category_id, torrent.sub_category_id = \ + upload_form.category.parsed_data.get_category_ids() # print('Main cat id: {0}, Sub cat id: {1}'.format( # torrent.main_category_id, torrent.sub_category_id)) diff --git a/nyaa/forms.py b/nyaa/forms.py index 3bf38a1..cfe177b 100644 --- a/nyaa/forms.py +++ b/nyaa/forms.py @@ -126,7 +126,8 @@ class DisabledSelectField(SelectField): class EditForm(FlaskForm): display_name = TextField('Torrent display name', [ Length(min=3, max=255, - message='Torrent display name must be at least %(min)d characters long and %(max)d at most.') + message='Torrent display name must be at least %(min)d characters long ' + 'and %(max)d at most.') ]) category = DisabledSelectField('Category') @@ -172,7 +173,8 @@ class UploadForm(FlaskForm): display_name = TextField('Torrent display name (optional)', [ Optional(), Length(min=3, max=255, - message='Torrent display name must be at least %(min)d characters long and %(max)d at most.') + message='Torrent display name must be at least %(min)d characters long and ' + '%(max)d at most.') ]) # category = SelectField('Category') diff --git a/nyaa/models.py b/nyaa/models.py index e6d714d..038b13f 100644 --- a/nyaa/models.py +++ b/nyaa/models.py @@ -85,8 +85,9 @@ class Torrent(db.Model): main_category = db.relationship('MainCategory', uselist=False, back_populates='torrents', lazy="joined") sub_category = db.relationship('SubCategory', uselist=False, backref='torrents', lazy="joined", - primaryjoin="and_(SubCategory.id == foreign(Torrent.sub_category_id), " - "SubCategory.main_category_id == Torrent.main_category_id)") + primaryjoin=( + "and_(SubCategory.id == foreign(Torrent.sub_category_id), " + "SubCategory.main_category_id == Torrent.main_category_id)")) info = db.relationship('TorrentInfo', uselist=False, back_populates='torrent') filelist = db.relationship('TorrentFilelist', uselist=False, back_populates='torrent') stats = db.relationship('Statistic', uselist=False, back_populates='torrent', lazy='joined') From 571b7f29300ef06ab86eb23fd787799059a59419 Mon Sep 17 00:00:00 2001 From: sharkykh Date: Sun, 14 May 2017 16:10:20 +0300 Subject: [PATCH 23/68] Ignore routes import line --- nyaa/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nyaa/__init__.py b/nyaa/__init__.py index aeda6be..7e934cb 100644 --- a/nyaa/__init__.py +++ b/nyaa/__init__.py @@ -60,4 +60,4 @@ assets = Environment(app) # output='style.css', depends='**/*.scss') # assets.register('style_all', css) -from nyaa import routes +from nyaa import routes # noqa From 60b7029fccd2e1146b453444ffec07da2d0805ee Mon Sep 17 00:00:00 2001 From: sharkykh Date: Sun, 14 May 2017 16:47:04 +0300 Subject: [PATCH 24/68] Remove merge conflict indicators --- nyaa/forms.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/nyaa/forms.py b/nyaa/forms.py index cfe177b..46810b2 100644 --- a/nyaa/forms.py +++ b/nyaa/forms.py @@ -268,12 +268,8 @@ class TorrentFileData(object): # https://wiki.theory.org/BitTorrentSpecification#Metainfo_File_Structure -<<<<<<< master -def _validate_trackers(torrent_dict, tracker_to_check_for=None): -======= -def _validate_trackers(torrent_dict): ->>>>>>> PEP8 (a run of lint.sh) +def _validate_trackers(torrent_dict, tracker_to_check_for=None): announce = torrent_dict.get('announce') announce_string = _validate_bytes(announce, 'announce', 'utf-8') From 9ac56ba3d7cd675aedb0fc868caa4f6099f88821 Mon Sep 17 00:00:00 2001 From: sharkykh Date: Sun, 14 May 2017 16:47:18 +0300 Subject: [PATCH 25/68] AutoPEP8 --- nyaa/forms.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/nyaa/forms.py b/nyaa/forms.py index 46810b2..cc2454d 100644 --- a/nyaa/forms.py +++ b/nyaa/forms.py @@ -223,7 +223,6 @@ class UploadForm(FlaskForm): except AssertionError as e: raise ValidationError('Malformed torrent metadata ({})'.format(e.args[0])) - site_tracker = app.config.get('MAIN_ANNOUNCE_URL') ensure_tracker = app.config.get('ENFORCE_MAIN_ANNOUNCE_URL') @@ -235,11 +234,12 @@ class UploadForm(FlaskForm): # Ensure private torrents are using our tracker if torrent_dict['info'].get('private') == 1: if torrent_dict['announce'].decode('utf-8') != site_tracker: - raise ValidationError('Private torrent: please set {} as the main tracker'.format(site_tracker)) + raise ValidationError( + 'Private torrent: please set {} as the main tracker'.format(site_tracker)) elif ensure_tracker and not tracker_found: - raise ValidationError('Please include {} in the trackers of the torrent'.format(site_tracker)) - + raise ValidationError( + 'Please include {} in the trackers of the torrent'.format(site_tracker)) # Note! bencode will sort dict keys, as per the spec # This may result in a different hash if the uploaded torrent does not match the @@ -273,7 +273,8 @@ def _validate_trackers(torrent_dict, tracker_to_check_for=None): announce = torrent_dict.get('announce') announce_string = _validate_bytes(announce, 'announce', 'utf-8') - tracker_found = tracker_to_check_for and (announce_string.lower() == tracker_to_check_for.lower()) or False + tracker_found = tracker_to_check_for and ( + announce_string.lower() == tracker_to_check_for.lower()) or False announce_list = torrent_dict.get('announce-list') if announce_list is not None: From 4e9409fb30a334d28e1658e134577993831d4b48 Mon Sep 17 00:00:00 2001 From: Kfir Hadas Date: Tue, 16 May 2017 12:47:06 +0300 Subject: [PATCH 26/68] AutoPEP8 (after elasticsearch merge) --- nyaa/routes.py | 11 +++++++---- nyaa/search.py | 19 ++++++++++++------- nyaa/torrents.py | 1 + 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/nyaa/routes.py b/nyaa/routes.py index 0ab24e6..5554452 100644 --- a/nyaa/routes.py +++ b/nyaa/routes.py @@ -162,7 +162,8 @@ def home(rss): if not max_search_results: max_search_results = DEFAULT_MAX_SEARCH_RESULT - max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages + # Only allow up to (max_search_results / page) pages + max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) query_args['page'] = max_page query_args['max_search_results'] = max_search_results @@ -188,7 +189,7 @@ def home(rss): # If ES is enabled, default to db search for browsing if use_elastic: query_args['term'] = '' - else: # Otherwise, use db search for everything + else: # Otherwise, use db search for everything query_args['term'] = term or '' query = search_db(**query_args) @@ -253,7 +254,8 @@ def view_user(user_name): if not max_search_results: max_search_results = DEFAULT_MAX_SEARCH_RESULT - max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages + # Only allow up to (max_search_results / page) pages + max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) query_args['page'] = max_page query_args['max_search_results'] = max_search_results @@ -293,6 +295,7 @@ def view_user(user_name): def _jinja2_filter_rfc822(date, fmt=None): return formatdate(float(date.strftime('%s'))) + @app.template_filter('rfc822_es') def _jinja2_filter_rfc822(datestr, fmt=None): return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s'))) @@ -629,4 +632,4 @@ def site_help(): @app.route('/api/upload', methods=['POST']) def api_upload(): api_response = api_handler.api_upload(flask.request) - return api_response \ No newline at end of file + return api_response diff --git a/nyaa/search.py b/nyaa/search.py index e6353c5..931a903 100644 --- a/nyaa/search.py +++ b/nyaa/search.py @@ -167,8 +167,8 @@ def search_elastic(term='', user=None, sort='id', order='desc', s = s[0:per_page] else: max_page = min(page, int(math.ceil(max_search_results / float(per_page)))) - from_idx = (max_page-1)*per_page - to_idx = min(max_search_results, max_page*per_page) + from_idx = (max_page - 1) * per_page + to_idx = min(max_search_results, max_page * per_page) s = s[from_idx:to_idx] highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT') @@ -267,10 +267,12 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0', if not admin: # Hide all DELETED torrents if regular user - query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False)) + query = query.filter(models.Torrent.flags.op('&')( + int(models.TorrentFlags.DELETED)).is_(False)) # If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous # If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones - # On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what + # On RSS pages in user view, show only torrents that aren't hidden or + # anonymous no matter what if not same_user or rss: query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN | models.TorrentFlags.ANONYMOUS)).is_(False)) @@ -278,7 +280,8 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0', else: if not admin: # Hide all DELETED torrents if regular user - query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False)) + query = query.filter(models.Torrent.flags.op('&')( + int(models.TorrentFlags.DELETED)).is_(False)) # If logged in, show all torrents that aren't hidden unless they belong to you # On RSS pages, show all public torrents and nothing more. if logged_in_user and not rss: @@ -286,7 +289,8 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0', (models.Torrent.uploader_id == logged_in_user.id)) # Otherwise, show all torrents that aren't hidden else: - query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) + query = query.filter(models.Torrent.flags.op('&')( + int(models.TorrentFlags.HIDDEN)).is_(False)) if main_category: query = query.filter(models.Torrent.main_category_id == main_cat_id) @@ -295,7 +299,8 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0', (models.Torrent.sub_category_id == sub_cat_id)) if filter_tuple: - query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1])) + query = query.filter(models.Torrent.flags.op('&')( + int(filter_tuple[0])).is_(filter_tuple[1])) if term: for item in shlex.split(term, posix=False): diff --git a/nyaa/torrents.py b/nyaa/torrents.py index 1b5dfae..eff6f54 100644 --- a/nyaa/torrents.py +++ b/nyaa/torrents.py @@ -54,6 +54,7 @@ def get_trackers(torrent): return list(trackers) + def get_trackers_magnet(): trackers = OrderedSet() From 2c9ed4cb949e0d2694a0c442d22255c3964bfc7d Mon Sep 17 00:00:00 2001 From: Kfir Hadas Date: Tue, 16 May 2017 12:52:48 +0300 Subject: [PATCH 27/68] PEP8 search.py (E501) --- nyaa/search.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/nyaa/search.py b/nyaa/search.py index 931a903..9e22f84 100644 --- a/nyaa/search.py +++ b/nyaa/search.py @@ -188,7 +188,8 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0', sort_keys = { 'id': models.Torrent.id, 'size': models.Torrent.filesize, - # 'name': models.Torrent.display_name, # Disable this because we disabled this in search_elastic, for the sake of consistency + # Disable this because we disabled this in search_elastic, for the sake of consistency: + # 'name': models.Torrent.display_name, 'seeders': models.Statistic.seed_count, 'leechers': models.Statistic.leech_count, 'downloads': models.Statistic.download_count @@ -269,13 +270,17 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0', # Hide all DELETED torrents if regular user query = query.filter(models.Torrent.flags.op('&')( int(models.TorrentFlags.DELETED)).is_(False)) - # If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous - # If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones - # On RSS pages in user view, show only torrents that aren't hidden or - # anonymous no matter what + # If logged in user is not the same as the user being viewed, + # show only torrents that aren't hidden or anonymous + # + # If logged in user is the same as the user being viewed, + # show all torrents including hidden and anonymous ones + # + # On RSS pages in user view, + # show only torrents that aren't hidden or anonymous no matter what if not same_user or rss: - query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN | - models.TorrentFlags.ANONYMOUS)).is_(False)) + query = query.filter(models.Torrent.flags.op('&')( + int(models.TorrentFlags.HIDDEN | models.TorrentFlags.ANONYMOUS)).is_(False)) # General view (homepage, general search view) else: if not admin: @@ -285,8 +290,9 @@ def search_db(term='', user=None, sort='id', order='desc', category='0_0', # If logged in, show all torrents that aren't hidden unless they belong to you # On RSS pages, show all public torrents and nothing more. if logged_in_user and not rss: - query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) | - (models.Torrent.uploader_id == logged_in_user.id)) + query = query.filter( + (models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) | + (models.Torrent.uploader_id == logged_in_user.id)) # Otherwise, show all torrents that aren't hidden else: query = query.filter(models.Torrent.flags.op('&')( From 36416a35974270868ded817afda312590455370e Mon Sep 17 00:00:00 2001 From: Nathan Yam Date: Tue, 16 May 2017 19:53:38 +1000 Subject: [PATCH 28/68] Fix search bar presentation Need to fix filters and categories though. Looks terrible. --- nyaa/static/css/main.css | 33 +++++++++++++++++++++++++++++++++ nyaa/templates/layout.html | 10 +++++----- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/nyaa/static/css/main.css b/nyaa/static/css/main.css index 1743595..321bb3b 100644 --- a/nyaa/static/css/main.css +++ b/nyaa/static/css/main.css @@ -99,6 +99,39 @@ table.torrent-list thead th.sorting_desc:after { } } +.search-container { + display: flex; + flex-direction: column; +} + +.form-control.search-bar { + order: 1; + width: 88%; +} + +.search-btn { + order: 2; + align-self: flex-end; + top: -34px; + height: 0; + width: auto; +} + +#navFilter-category { + order: 3; +} + +#navFilter-criteria { + order: 4; +} + +@media (min-width: 768px) { + .search-btn { + top: 0; + width: auto; + } +} + /* elasticsearch term highlight */ .hlt1 { font-style: normal; diff --git a/nyaa/templates/layout.html b/nyaa/templates/layout.html index 232d2d5..3c3e121 100644 --- a/nyaa/templates/layout.html +++ b/nyaa/templates/layout.html @@ -144,16 +144,16 @@ {% else %} {% endif %} -
- -
+ {% else %} - + {% endif %} {% if use_elastic %} - + {% else %} {% endif %} @@ -74,7 +74,7 @@ {% if use_elastic %} - + {% else %} {% endif %} From eedc6c170d6d00e62843da21655296880eb56e5d Mon Sep 17 00:00:00 2001 From: UnKnoWn Date: Wed, 17 May 2017 02:00:01 +0800 Subject: [PATCH 40/68] Added tabs to make profile more organized --- nyaa/templates/profile.html | 93 +++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 34 deletions(-) diff --git a/nyaa/templates/profile.html b/nyaa/templates/profile.html index 2e6e659..a60e676 100644 --- a/nyaa/templates/profile.html +++ b/nyaa/templates/profile.html @@ -4,40 +4,65 @@ {% from "_formhelpers.html" import render_field %}

Edit Profile

- - {{ form.csrf_token }} -
-
- {{ render_field(form.email, class_='form-control', placeholder='New email address') }} -
-
+
+ +
+
+ + {{ form.csrf_token }} +
+
+ {{ render_field(form.current_password, class_='form-control', placeholder='Current password') }} +
+
+
+
+ {{ render_field(form.new_password, class_='form-control', placeholder='New password') }} +
+
+
+
+ {{ render_field(form.password_confirm, class_='form-control', placeholder='New password (confirm)') }} +
+
+
+
+
+ +
+
+ +
+
+
+ {{ form.csrf_token }} +
+
+ {{ render_field(form.email, class_='form-control', placeholder='New email address') }} +
+
+
+
+ {{ render_field(form.current_password, class_='form-control', placeholder='Current password') }} +
+
+
+
+
+ +
+
+ +
+
+
-
-
- {{ render_field(form.current_password, class_='form-control', placeholder='Current password') }} -
-
- -
-
- {{ render_field(form.new_password, class_='form-control', placeholder='New password') }} -
-
- -
-
- {{ render_field(form.password_confirm, class_='form-control', placeholder='New password (confirm)') }} -
-
- -
- -
-
- -
-
- {% endblock %} - From 6f61970559aa8823617253dc4d912fe5000cb787 Mon Sep 17 00:00:00 2001 From: Nazo Date: Tue, 16 May 2017 21:29:22 +0300 Subject: [PATCH 41/68] Switch marked to commonmark --- nyaa/static/js/main.js | 7 ++++++- nyaa/templates/layout.html | 2 +- nyaa/templates/view.html | 7 +++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/nyaa/static/js/main.js b/nyaa/static/js/main.js index b9c140b..78a2807 100644 --- a/nyaa/static/js/main.js +++ b/nyaa/static/js/main.js @@ -105,8 +105,13 @@ document.addEventListener("DOMContentLoaded", function() { var previewTabEl = markdownEditor.querySelector(previewTabSelector); var targetEl = markdownEditor.querySelector(targetSelector); + var reader = new commonmark.Parser({safe: true}); + var writer = new commonmark.HtmlRenderer({safe: true}); + writer.softbreak = '
'; + previewTabEl.addEventListener('click', function () { - targetEl.innerHTML = marked(sourceSelector.value.trim(), { sanitize: true, breaks:true }); + var parsed = reader.parse(sourceSelector.value.trim()); + targetEl.innerHTML = writer.render(parsed); }); }); }); diff --git a/nyaa/templates/layout.html b/nyaa/templates/layout.html index 232d2d5..1466d31 100644 --- a/nyaa/templates/layout.html +++ b/nyaa/templates/layout.html @@ -34,7 +34,7 @@ - +
+ {% if use_elastic %} + {% else %} + + {% endif %} {%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}{{ torrent.display_name | escape }} {% if torrent.has_torrent %}{% endif %} {{ torrent.filesize | filesizeformat(True) }}{{ torrent.created_time }}{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}{{ torrent.seed_count }} {{ torrent.leech_count }} {{ torrent.download_count }}{{ torrent.stats.seed_count }}{{ torrent.stats.leech_count }}{{ torrent.stats.download_count }}
{% if torrent.has_torrent %}{% endif %} + {% if use_elastic %} + + {% else %} + {% endif %} {{ torrent.filesize | filesizeformat(True) }}{{ torrent.filesize | filesizeformat(True) }}{{ torrent.created_time }}{{ torrent.created_time | display_time }}{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }} +  {{ key }} {{ value | filesizeformat(True) }}{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}{{ torrent.display_name | escape }}{{ torrent.display_name | escape }} {% if torrent.has_torrent %}{% endif %} From d8ef29755ebcc3123c7a0acaf4ed56b8ab270ace Mon Sep 17 00:00:00 2001 From: Ricardo Ribeiro Date: Tue, 16 May 2017 15:08:47 +0100 Subject: [PATCH 39/68] Remove trailing spacebar on search_results --- nyaa/templates/search_results.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html index 92608c7..f169b6f 100644 --- a/nyaa/templates/search_results.html +++ b/nyaa/templates/search_results.html @@ -60,7 +60,7 @@ {%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}{{ torrent.display_name | escape }} {{ torrent.filesize | filesizeformat(True) }}{{ torrent.created_time | display_time }}{{ torrent.created_time | display_time }}{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}