mirror of
https://gitlab.com/SIGBUS/nyaa.git
synced 2024-12-22 18:00:00 +00:00
Merge remote-tracking branch 'refs/remotes/nyaadevs/master'
This commit is contained in:
commit
cfb3fafd64
|
@ -8,6 +8,7 @@ with a cron job or some binlog-reading thing (TODO)
|
||||||
from nyaa import app
|
from nyaa import app
|
||||||
from nyaa.models import Torrent
|
from nyaa.models import Torrent
|
||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
|
from elasticsearch.client import IndicesClient
|
||||||
from elasticsearch import helpers
|
from elasticsearch import helpers
|
||||||
import progressbar
|
import progressbar
|
||||||
import sys
|
import sys
|
||||||
|
@ -21,7 +22,8 @@ bar = progressbar.ProgressBar(
|
||||||
' (', progressbar.ETA(), ') ',
|
' (', progressbar.ETA(), ') ',
|
||||||
])
|
])
|
||||||
|
|
||||||
es = Elasticsearch()
|
es = Elasticsearch(timeout=30)
|
||||||
|
ic = IndicesClient(es)
|
||||||
|
|
||||||
# turn into thing that elasticsearch indexes. We flatten in
|
# turn into thing that elasticsearch indexes. We flatten in
|
||||||
# the stats (seeders/leechers) so we can order by them in es naturally.
|
# the stats (seeders/leechers) so we can order by them in es naturally.
|
||||||
|
@ -89,4 +91,10 @@ def page_query(query, limit=sys.maxsize, batch_size=10000):
|
||||||
bar.update(start)
|
bar.update(start)
|
||||||
start = min(limit, start + batch_size)
|
start = min(limit, start + batch_size)
|
||||||
|
|
||||||
|
# turn off refreshes while bulk loading
|
||||||
|
ic.put_settings(body={'index': {'refresh_interval': '-1'}}, index=app.config['ES_INDEX_NAME'])
|
||||||
|
|
||||||
helpers.bulk(es, (mk_es(t) for t in page_query(Torrent.query)), chunk_size=10000)
|
helpers.bulk(es, (mk_es(t) for t in page_query(Torrent.query)), chunk_size=10000)
|
||||||
|
|
||||||
|
# restore to near-enough real time
|
||||||
|
ic.put_settings(body={'index': {'refresh_interval': '30s'}}, index=app.config['ES_INDEX_NAME'])
|
||||||
|
|
|
@ -116,15 +116,13 @@ def home(rss):
|
||||||
if flask.request.args.get('page') == 'rss':
|
if flask.request.args.get('page') == 'rss':
|
||||||
rss = True
|
rss = True
|
||||||
|
|
||||||
term = flask.request.args.get('q')
|
term = flask.request.args.get('q', flask.request.args.get('term'))
|
||||||
sort = flask.request.args.get('s')
|
sort = flask.request.args.get('s')
|
||||||
order = flask.request.args.get('o')
|
order = flask.request.args.get('o')
|
||||||
category = flask.request.args.get('c')
|
category = flask.request.args.get('c', flask.request.args.get('cats'))
|
||||||
quality_filter = flask.request.args.get('f')
|
quality_filter = flask.request.args.get('f', flask.request.args.get('filter'))
|
||||||
user_name = flask.request.args.get('u')
|
user_name = flask.request.args.get('u', flask.request.args.get('user'))
|
||||||
page = flask.request.args.get('p')
|
page = flask.request.args.get('p', flask.request.args.get('offset', 1, int), int)
|
||||||
if page:
|
|
||||||
page = int(page)
|
|
||||||
|
|
||||||
per_page = app.config.get('RESULTS_PER_PAGE')
|
per_page = app.config.get('RESULTS_PER_PAGE')
|
||||||
if not per_page:
|
if not per_page:
|
||||||
|
@ -143,7 +141,7 @@ def home(rss):
|
||||||
'order': order or 'desc',
|
'order': order or 'desc',
|
||||||
'category': category or '0_0',
|
'category': category or '0_0',
|
||||||
'quality_filter': quality_filter or '0',
|
'quality_filter': quality_filter or '0',
|
||||||
'page': page or 1,
|
'page': page,
|
||||||
'rss': rss,
|
'rss': rss,
|
||||||
'per_page': per_page
|
'per_page': per_page
|
||||||
}
|
}
|
||||||
|
|
|
@ -105,8 +105,13 @@ document.addEventListener("DOMContentLoaded", function() {
|
||||||
var previewTabEl = markdownEditor.querySelector(previewTabSelector);
|
var previewTabEl = markdownEditor.querySelector(previewTabSelector);
|
||||||
var targetEl = markdownEditor.querySelector(targetSelector);
|
var targetEl = markdownEditor.querySelector(targetSelector);
|
||||||
|
|
||||||
|
var reader = new commonmark.Parser({safe: true});
|
||||||
|
var writer = new commonmark.HtmlRenderer({safe: true});
|
||||||
|
writer.softbreak = '<br />';
|
||||||
|
|
||||||
previewTabEl.addEventListener('click', function () {
|
previewTabEl.addEventListener('click', function () {
|
||||||
targetEl.innerHTML = marked(sourceSelector.value.trim(), { sanitize: true, breaks:true });
|
var parsed = reader.parse(sourceSelector.value.trim());
|
||||||
|
targetEl.innerHTML = writer.render(parsed);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
BIN
nyaa/static/pinned-tab.svg
Normal file
BIN
nyaa/static/pinned-tab.svg
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.8 KiB |
|
@ -8,6 +8,7 @@
|
||||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||||
<link rel="shortcut icon" type="image/png" href="/static/favicon.png">
|
<link rel="shortcut icon" type="image/png" href="/static/favicon.png">
|
||||||
<link rel="icon" type="image/png" href="/static/favicon.png">
|
<link rel="icon" type="image/png" href="/static/favicon.png">
|
||||||
|
<link rel="mask-icon" href="/static/pinned-tab.svg" color="#3582F7">
|
||||||
<link rel="alternate" type="application/rss+xml" href="{% if rss_filter %}{{ url_for('home', page='rss', _external=True, **rss_filter) }}{% else %}{{ url_for('home', page='rss', _external=True) }}{% endif %}" />
|
<link rel="alternate" type="application/rss+xml" href="{% if rss_filter %}{{ url_for('home', page='rss', _external=True, **rss_filter) }}{% else %}{{ url_for('home', page='rss', _external=True) }}{% endif %}" />
|
||||||
|
|
||||||
<!-- Bootstrap core CSS -->
|
<!-- Bootstrap core CSS -->
|
||||||
|
@ -34,7 +35,7 @@
|
||||||
<!-- Modified to not apply border-radius to selectpickers and stuff so our navbar looks cool -->
|
<!-- Modified to not apply border-radius to selectpickers and stuff so our navbar looks cool -->
|
||||||
<script src="/static/js/bootstrap-select.js"></script>
|
<script src="/static/js/bootstrap-select.js"></script>
|
||||||
<script src="/static/js/main.js"></script>
|
<script src="/static/js/main.js"></script>
|
||||||
<script src="//cdnjs.cloudflare.com/ajax/libs/marked/0.3.6/marked.min.js"></script>
|
<script src="//cdnjs.cloudflare.com/ajax/libs/commonmark/0.27.0/commonmark.min.js"></script>
|
||||||
|
|
||||||
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
||||||
<!--[if lt IE 9]>
|
<!--[if lt IE 9]>
|
||||||
|
|
|
@ -122,8 +122,11 @@
|
||||||
<script>
|
<script>
|
||||||
var target = document.getElementById('torrent-description');
|
var target = document.getElementById('torrent-description');
|
||||||
var text = target.innerHTML;
|
var text = target.innerHTML;
|
||||||
var html = marked(text.trim(), { sanitize: true, breaks:true });
|
var reader = new commonmark.Parser({safe: true});
|
||||||
target.innerHTML = html;
|
var writer = new commonmark.HtmlRenderer({safe: true});
|
||||||
|
writer.softbreak = '<br />';
|
||||||
|
var parsed = reader.parse(text.trim());
|
||||||
|
target.innerHTML = writer.render(parsed);
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
62
sync_es.py
62
sync_es.py
|
@ -23,6 +23,7 @@ changes that happen while the import_to_es script is dumping stuff from the
|
||||||
database into es, at the expense of redoing a (small) amount of indexing.
|
database into es, at the expense of redoing a (small) amount of indexing.
|
||||||
"""
|
"""
|
||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
|
from elasticsearch.helpers import bulk
|
||||||
from pymysqlreplication import BinLogStreamReader
|
from pymysqlreplication import BinLogStreamReader
|
||||||
from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent
|
from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
@ -50,7 +51,7 @@ NT_DB = 'nyaav2'
|
||||||
with open(SAVE_LOC) as f:
|
with open(SAVE_LOC) as f:
|
||||||
pos = json.load(f)
|
pos = json.load(f)
|
||||||
|
|
||||||
es = Elasticsearch()
|
es = Elasticsearch(timeout=30)
|
||||||
|
|
||||||
stream = BinLogStreamReader(
|
stream = BinLogStreamReader(
|
||||||
# TODO parse out from config.py or something
|
# TODO parse out from config.py or something
|
||||||
|
@ -58,8 +59,7 @@ stream = BinLogStreamReader(
|
||||||
'host': MYSQL_HOST,
|
'host': MYSQL_HOST,
|
||||||
'port': MYSQL_PORT,
|
'port': MYSQL_PORT,
|
||||||
'user': MYSQL_USER,
|
'user': MYSQL_USER,
|
||||||
'passwd': MYSQL_PW,
|
'passwd': MYSQL_PW
|
||||||
|
|
||||||
},
|
},
|
||||||
server_id=10, # arbitrary
|
server_id=10, # arbitrary
|
||||||
# only care about this database currently
|
# only care about this database currently
|
||||||
|
@ -108,60 +108,76 @@ def reindex_torrent(t, index_name):
|
||||||
"has_torrent": bool(t['has_torrent']),
|
"has_torrent": bool(t['has_torrent']),
|
||||||
}
|
}
|
||||||
# update, so we don't delete the stats if present
|
# update, so we don't delete the stats if present
|
||||||
es.update(
|
return {
|
||||||
index=index_name,
|
'_op_type': 'update',
|
||||||
doc_type='torrent',
|
'_index': index_name,
|
||||||
id=t['id'],
|
'_type': 'torrent',
|
||||||
body={"doc": doc, "doc_as_upsert": True})
|
'_id': str(t['id']),
|
||||||
|
"doc": doc,
|
||||||
|
"doc_as_upsert": True
|
||||||
|
}
|
||||||
|
|
||||||
def reindex_stats(s, index_name):
|
def reindex_stats(s, index_name):
|
||||||
es.update(
|
# update the torrent at torrent_id, assumed to exist;
|
||||||
index=index_name,
|
# this will always be the case if you're reading the binlog
|
||||||
doc_type='torrent',
|
# in order; the foreign key constraint on torrrent_id prevents
|
||||||
id=s['torrent_id'],
|
# the stats row rom existing if the torrent isn't around.
|
||||||
body={
|
return {
|
||||||
|
'_op_type': 'update',
|
||||||
|
'_index': index_name,
|
||||||
|
'_type': 'torrent',
|
||||||
|
'_id': str(s['torrent_id']),
|
||||||
"doc": {
|
"doc": {
|
||||||
"stats_last_updated": s["last_updated"],
|
"stats_last_updated": s["last_updated"],
|
||||||
"download_count": s["download_count"],
|
"download_count": s["download_count"],
|
||||||
"leech_count": s['leech_count'],
|
"leech_count": s['leech_count'],
|
||||||
"seed_count": s['seed_count'],
|
"seed_count": s['seed_count'],
|
||||||
}, "doc_as_upsert": True})
|
}}
|
||||||
|
|
||||||
|
def delet_this(row, index_name):
|
||||||
|
return {
|
||||||
|
"_op_type": 'delete',
|
||||||
|
'_index': index_name,
|
||||||
|
'_type': 'torrent',
|
||||||
|
'_id': str(row['values']['id'])}
|
||||||
|
|
||||||
n = 0
|
n = 0
|
||||||
last_save = time.time()
|
last_save = time.time()
|
||||||
|
|
||||||
for event in stream:
|
for event in stream:
|
||||||
for row in event.rows:
|
|
||||||
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
|
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
|
||||||
if event.table == "nyaa_torrents":
|
if event.table == "nyaa_torrents":
|
||||||
index_name = "nyaa"
|
index_name = "nyaa"
|
||||||
else:
|
else:
|
||||||
index_name = "sukebei"
|
index_name = "sukebei"
|
||||||
if type(event) is WriteRowsEvent:
|
if type(event) is WriteRowsEvent:
|
||||||
reindex_torrent(row['values'], index_name)
|
bulk(es, (reindex_torrent(row['values'], index_name) for row in event.rows))
|
||||||
elif type(event) is UpdateRowsEvent:
|
elif type(event) is UpdateRowsEvent:
|
||||||
reindex_torrent(row['after_values'], index_name)
|
# UpdateRowsEvent includes the old values too, but we don't care
|
||||||
|
bulk(es, (reindex_torrent(row['after_values'], index_name) for row in event.rows))
|
||||||
elif type(event) is DeleteRowsEvent:
|
elif type(event) is DeleteRowsEvent:
|
||||||
# just delete it
|
# ok, bye
|
||||||
es.delete(index=index_name, doc_type='torrent', id=row['values']['id'])
|
bulk(es, (delet_this(row, index_name) for row in event.rows))
|
||||||
else:
|
else:
|
||||||
raise Exception(f"unknown event {type(event)}")
|
raise Exception(f"unknown event {type(event)}")
|
||||||
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
|
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
|
||||||
if event.table == "nyaa_torrents":
|
if event.table == "nyaa_statistics":
|
||||||
index_name = "nyaa"
|
index_name = "nyaa"
|
||||||
else:
|
else:
|
||||||
index_name = "sukebei"
|
index_name = "sukebei"
|
||||||
if type(event) is WriteRowsEvent:
|
if type(event) is WriteRowsEvent:
|
||||||
reindex_stats(row['values'], index_name)
|
bulk(es, (reindex_stats(row['values'], index_name) for row in event.rows))
|
||||||
elif type(event) is UpdateRowsEvent:
|
elif type(event) is UpdateRowsEvent:
|
||||||
reindex_stats(row['after_values'], index_name)
|
bulk(es, (reindex_stats(row['after_values'], index_name) for row in event.rows))
|
||||||
elif type(event) is DeleteRowsEvent:
|
elif type(event) is DeleteRowsEvent:
|
||||||
# uh ok. assume that the torrent row will get deleted later.
|
# uh ok. assume that the torrent row will get deleted later,
|
||||||
|
# which will clean up the entire es "torrent" document
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise Exception(f"unknown event {type(event)}")
|
raise Exception(f"unknown event {type(event)}")
|
||||||
else:
|
else:
|
||||||
raise Exception(f"unknown table {s.table}")
|
raise Exception(f"unknown table {s.table}")
|
||||||
|
|
||||||
n += 1
|
n += 1
|
||||||
if n % 100 == 0 or time.time() - last_save > 30:
|
if n % 100 == 0 or time.time() - last_save > 30:
|
||||||
log.info(f"saving position {stream.log_file}/{stream.log_pos}")
|
log.info(f"saving position {stream.log_file}/{stream.log_pos}")
|
||||||
|
|
Loading…
Reference in a new issue