Merge remote-tracking branch 'refs/remotes/nyaadevs/master'

This commit is contained in:
UnKnoWn 2017-05-17 14:51:47 +08:00
commit cfb3fafd64
7 changed files with 100 additions and 62 deletions

View File

@ -8,6 +8,7 @@ with a cron job or some binlog-reading thing (TODO)
from nyaa import app
from nyaa.models import Torrent
from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch import helpers
import progressbar
import sys
@ -21,7 +22,8 @@ bar = progressbar.ProgressBar(
' (', progressbar.ETA(), ') ',
])
es = Elasticsearch()
es = Elasticsearch(timeout=30)
ic = IndicesClient(es)
# turn into thing that elasticsearch indexes. We flatten in
# the stats (seeders/leechers) so we can order by them in es naturally.
@ -89,4 +91,10 @@ def page_query(query, limit=sys.maxsize, batch_size=10000):
bar.update(start)
start = min(limit, start + batch_size)
# turn off refreshes while bulk loading
ic.put_settings(body={'index': {'refresh_interval': '-1'}}, index=app.config['ES_INDEX_NAME'])
helpers.bulk(es, (mk_es(t) for t in page_query(Torrent.query)), chunk_size=10000)
# restore to near-enough real time
ic.put_settings(body={'index': {'refresh_interval': '30s'}}, index=app.config['ES_INDEX_NAME'])

View File

@ -116,15 +116,13 @@ def home(rss):
if flask.request.args.get('page') == 'rss':
rss = True
term = flask.request.args.get('q')
term = flask.request.args.get('q', flask.request.args.get('term'))
sort = flask.request.args.get('s')
order = flask.request.args.get('o')
category = flask.request.args.get('c')
quality_filter = flask.request.args.get('f')
user_name = flask.request.args.get('u')
page = flask.request.args.get('p')
if page:
page = int(page)
category = flask.request.args.get('c', flask.request.args.get('cats'))
quality_filter = flask.request.args.get('f', flask.request.args.get('filter'))
user_name = flask.request.args.get('u', flask.request.args.get('user'))
page = flask.request.args.get('p', flask.request.args.get('offset', 1, int), int)
per_page = app.config.get('RESULTS_PER_PAGE')
if not per_page:
@ -143,7 +141,7 @@ def home(rss):
'order': order or 'desc',
'category': category or '0_0',
'quality_filter': quality_filter or '0',
'page': page or 1,
'page': page,
'rss': rss,
'per_page': per_page
}

View File

@ -105,8 +105,13 @@ document.addEventListener("DOMContentLoaded", function() {
var previewTabEl = markdownEditor.querySelector(previewTabSelector);
var targetEl = markdownEditor.querySelector(targetSelector);
var reader = new commonmark.Parser({safe: true});
var writer = new commonmark.HtmlRenderer({safe: true});
writer.softbreak = '<br />';
previewTabEl.addEventListener('click', function () {
targetEl.innerHTML = marked(sourceSelector.value.trim(), { sanitize: true, breaks:true });
var parsed = reader.parse(sourceSelector.value.trim());
targetEl.innerHTML = writer.render(parsed);
});
});
});

View File

@ -0,0 +1,7 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="16" height="16" viewBox="0 0 16 16" version="1.1">
<path
fill-rule="evenodd"
id="r"
d="M2.80456 0.0C1.53169 0.0 0.46231 0.8395 0.11722 1.9974C0.65595 2.6291 1.917 3.6348 4.10868 2.8607C7.12099 1.7967 7.21946 1.0731 9.37026 1.3071C9.58017 1.0151 9.8039 0.7698 10.04032 0.6029C10.70196 0.1359 11.16916 0.3905 11.45037 0.7121C11.67687 0.5235 12.04906 0.231 12.43243 0.0C12.43243 0.0 2.80456 0.0 2.80456 0.0C2.80456 0.0 2.80456 0.0 2.80456 0.0M8.00586 4.1565C7.84838 4.1551 7.51975 4.218 6.90756 4.5707C6.32291 5.0887 4.71324 5.8388 3.58983 7.3761C2.65561 8.6547 0.6318 10.4905 0.0 10.8481C0.0 10.8481 0.0 13.1958 0.0 13.1958C0.0 13.8053 0.19257 14.3683 0.52035 14.8274C0.74197 14.288 1.07908 13.6922 1.57055 13.322C2.65031 12.5087 3.13704 12.5341 3.75811 12.7751C4.52415 13.0724 6.64158 13.0993 7.80801 12.9699C7.81331 12.7432 7.82472 12.5788 7.82472 12.5788C8.51884 11.8426 9.00263 11.6533 9.31288 11.648C9.41697 11.6463 9.50152 11.6652 9.56759 11.692C9.63769 11.4325 9.69719 11.1385 9.7339 10.8124C9.43319 10.0384 9.30012 9.2173 9.30012 9.2173C8.8413 10.3077 8.68011 11.1204 8.68011 11.1204C8.44154 10.4669 8.48684 9.7137 8.52662 9.367C8.52662 9.367 8.03506 8.8767 8.03506 8.8767C8.03506 8.8767 8.11451 8.2391 8.11451 8.2391C8.00137 7.4964 8.01913 6.7829 8.04368 6.377C7.99896 6.3933 7.95905 6.4025 7.92287 6.4086C7.50219 6.4787 7.64242 5.8757 7.81069 5.0764C7.85267 4.877 7.93828 4.551 8.06321 4.1604C8.04737 4.1583 8.02836 4.1567 8.00586 4.1565C8.00586 4.1565 8.00586 4.1565 8.00586 4.1565M11.18032 6.515C11.09331 6.5159 11.00225 6.5201 10.90696 6.528C10.90696 6.528 10.54514 6.5705 10.44698 6.518C10.44698 6.518 10.57608 6.6381 10.85859 6.6324C10.3529 7.6834 10.61941 8.1146 10.61941 8.1146C10.58585 7.1328 11.21659 7.0483 11.35914 7.0621C11.235 7.2101 11.12397 7.6103 11.10413 7.9867C11.066 8.7101 11.76774 8.609 11.9888 8.5101C12.20598 8.4129 12.54484 8.2304 12.58425 7.379C12.81473 7.4825 13.04872 7.8568 13.04872 7.8568C13.04872 7.8568 12.94965 6.4965 11.18032 6.515C11.18032 6.515 11.18032 6.515 11.18032 6.515M1.57573 7.7361C0.71803 8.0557 0.18167 8.127 0.0 8.1429C0.0 8.1429 0.0 9.8096 0.0 9.8096C0.26935 9.6733 1.6355 8.5429 1.57573 7.7361C1.57573 7.7361 1.57573 7.7361 1.57573 7.7361M14.05083 10.8819C13.87555 11.471 13.48992 11.5621 13.48992 11.5621C13.9667 11.7094 14.09991 11.499 14.14899 11.3939C14.19807 11.2887 14.05083 10.8819 14.05083 10.8819C14.05083 10.8819 14.05083 10.8819 14.05083 10.8819M16.0 11.826C15.53318 12.3666 14.74715 13.2132 14.30324 13.336C13.86892 13.4562 13.39197 13.5215 13.119 13.5394C13.24082 13.6817 13.53787 14.0936 13.65064 14.5111C13.87676 14.7663 14.30387 15.2545 14.59348 15.63C15.43531 15.147 16.0 14.2398 16.0 13.1958C16.0 13.1958 16.0 11.826 16.0 11.826C16.0 11.826 16.0 11.826 16.0 11.826"
/>
</svg>

After

Width:  |  Height:  |  Size: 2.8 KiB

View File

@ -8,6 +8,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<link rel="shortcut icon" type="image/png" href="/static/favicon.png">
<link rel="icon" type="image/png" href="/static/favicon.png">
<link rel="mask-icon" href="/static/pinned-tab.svg" color="#3582F7">
<link rel="alternate" type="application/rss+xml" href="{% if rss_filter %}{{ url_for('home', page='rss', _external=True, **rss_filter) }}{% else %}{{ url_for('home', page='rss', _external=True) }}{% endif %}" />
<!-- Bootstrap core CSS -->
@ -34,7 +35,7 @@
<!-- Modified to not apply border-radius to selectpickers and stuff so our navbar looks cool -->
<script src="/static/js/bootstrap-select.js"></script>
<script src="/static/js/main.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/marked/0.3.6/marked.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/commonmark/0.27.0/commonmark.min.js"></script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!--[if lt IE 9]>

View File

@ -122,8 +122,11 @@
<script>
var target = document.getElementById('torrent-description');
var text = target.innerHTML;
var html = marked(text.trim(), { sanitize: true, breaks:true });
target.innerHTML = html;
var reader = new commonmark.Parser({safe: true});
var writer = new commonmark.HtmlRenderer({safe: true});
writer.softbreak = '<br />';
var parsed = reader.parse(text.trim());
target.innerHTML = writer.render(parsed);
</script>
{% endblock %}

View File

@ -23,6 +23,7 @@ changes that happen while the import_to_es script is dumping stuff from the
database into es, at the expense of redoing a (small) amount of indexing.
"""
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from pymysqlreplication import BinLogStreamReader
from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent
from datetime import datetime
@ -50,7 +51,7 @@ NT_DB = 'nyaav2'
with open(SAVE_LOC) as f:
pos = json.load(f)
es = Elasticsearch()
es = Elasticsearch(timeout=30)
stream = BinLogStreamReader(
# TODO parse out from config.py or something
@ -58,8 +59,7 @@ stream = BinLogStreamReader(
'host': MYSQL_HOST,
'port': MYSQL_PORT,
'user': MYSQL_USER,
'passwd': MYSQL_PW,
'passwd': MYSQL_PW
},
server_id=10, # arbitrary
# only care about this database currently
@ -108,60 +108,76 @@ def reindex_torrent(t, index_name):
"has_torrent": bool(t['has_torrent']),
}
# update, so we don't delete the stats if present
es.update(
index=index_name,
doc_type='torrent',
id=t['id'],
body={"doc": doc, "doc_as_upsert": True})
return {
'_op_type': 'update',
'_index': index_name,
'_type': 'torrent',
'_id': str(t['id']),
"doc": doc,
"doc_as_upsert": True
}
def reindex_stats(s, index_name):
es.update(
index=index_name,
doc_type='torrent',
id=s['torrent_id'],
body={
"doc": {
"stats_last_updated": s["last_updated"],
"download_count": s["download_count"],
"leech_count": s['leech_count'],
"seed_count": s['seed_count'],
}, "doc_as_upsert": True})
# update the torrent at torrent_id, assumed to exist;
# this will always be the case if you're reading the binlog
# in order; the foreign key constraint on torrrent_id prevents
# the stats row rom existing if the torrent isn't around.
return {
'_op_type': 'update',
'_index': index_name,
'_type': 'torrent',
'_id': str(s['torrent_id']),
"doc": {
"stats_last_updated": s["last_updated"],
"download_count": s["download_count"],
"leech_count": s['leech_count'],
"seed_count": s['seed_count'],
}}
def delet_this(row, index_name):
return {
"_op_type": 'delete',
'_index': index_name,
'_type': 'torrent',
'_id': str(row['values']['id'])}
n = 0
last_save = time.time()
for event in stream:
for row in event.rows:
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
if event.table == "nyaa_torrents":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
reindex_torrent(row['values'], index_name)
elif type(event) is UpdateRowsEvent:
reindex_torrent(row['after_values'], index_name)
elif type(event) is DeleteRowsEvent:
# just delete it
es.delete(index=index_name, doc_type='torrent', id=row['values']['id'])
else:
raise Exception(f"unknown event {type(event)}")
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
if event.table == "nyaa_torrents":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
reindex_stats(row['values'], index_name)
elif type(event) is UpdateRowsEvent:
reindex_stats(row['after_values'], index_name)
elif type(event) is DeleteRowsEvent:
# uh ok. assume that the torrent row will get deleted later.
pass
else:
raise Exception(f"unknown event {type(event)}")
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
if event.table == "nyaa_torrents":
index_name = "nyaa"
else:
raise Exception(f"unknown table {s.table}")
index_name = "sukebei"
if type(event) is WriteRowsEvent:
bulk(es, (reindex_torrent(row['values'], index_name) for row in event.rows))
elif type(event) is UpdateRowsEvent:
# UpdateRowsEvent includes the old values too, but we don't care
bulk(es, (reindex_torrent(row['after_values'], index_name) for row in event.rows))
elif type(event) is DeleteRowsEvent:
# ok, bye
bulk(es, (delet_this(row, index_name) for row in event.rows))
else:
raise Exception(f"unknown event {type(event)}")
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
if event.table == "nyaa_statistics":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
bulk(es, (reindex_stats(row['values'], index_name) for row in event.rows))
elif type(event) is UpdateRowsEvent:
bulk(es, (reindex_stats(row['after_values'], index_name) for row in event.rows))
elif type(event) is DeleteRowsEvent:
# uh ok. assume that the torrent row will get deleted later,
# which will clean up the entire es "torrent" document
pass
else:
raise Exception(f"unknown event {type(event)}")
else:
raise Exception(f"unknown table {s.table}")
n += 1
if n % 100 == 0 or time.time() - last_save > 30:
log.info(f"saving position {stream.log_file}/{stream.log_pos}")