From 17217d9427c33ad139dc40423119b186bcdf2a72 Mon Sep 17 00:00:00 2001
From: queue <queue@hakase.org>
Date: Sun, 14 May 2017 00:48:17 -0600
Subject: [PATCH 01/10] WIP es stuff

---
 import_to_es.py | 90 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 import_to_es.py

diff --git a/import_to_es.py b/import_to_es.py
new file mode 100644
index 0000000..4be5e2b
--- /dev/null
+++ b/import_to_es.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+"""
+Bulk load torents from mysql into elasticsearch `nyaav2` index,
+which is assumed to already exist.
+This is a one-shot deal, so you'd either need to complement it
+with a cron job or some binlog-reading thing (TODO)
+"""
+from nyaa.models import Torrent
+from elasticsearch import Elasticsearch
+from elasticsearch import helpers
+import progressbar
+import sys
+
+bar = progressbar.ProgressBar(
+        max_value=Torrent.query.count(),
+        widgets=[
+            progressbar.SimpleProgress(),
+            ' [', progressbar.Timer(), '] ',
+            progressbar.Bar(),
+            ' (', progressbar.ETA(), ') ',
+            ])
+
+es = Elasticsearch()
+
+# turn into thing that elasticsearch indexes. We flatten in
+# the stats (seeders/leechers) so we can order by them in es naturally.
+# we _don't_ dereference uploader_id to the user's display name however,
+# instead doing that at query time. I _think_ this is right because
+# we don't want to reindex all the user's torrents just because they
+# changed their name, and we don't really want to FTS search on the user anyway.
+# Maybe it's more convenient to derefence though.
+def mk_es(t):
+    return {
+        "_id": t.id,
+        "_type": "torrent",
+        "_index": "nyaav2",
+        "_source": {
+            "display_name": t.display_name,
+            "created_time": t.created_time,
+            "updated_time": t.updated_time,
+            "description": t.description,
+            # not analyzed but included so we can render magnet links
+            # without querying sql again.
+            "info_hash": t.info_hash.hex(),
+            "filesize": t.filesize,
+            "uploader_id": t.uploader_id,
+            "main_category_id": t.main_category_id,
+            "sub_category_id": t.sub_category_id,
+            # XXX all the bitflags are numbers
+            "anonymous": bool(t.anonymous),
+            "trusted": bool(t.trusted),
+            "remake": bool(t.remake),
+            "complete": bool(t.complete),
+            # TODO instead of indexing and filtering later
+            # could delete from es entirely. Probably won't matter
+            # for at least a few months.
+            "hidden": bool(t.hidden),
+            "deleted": bool(t.deleted),
+            "has_torrent": t.has_torrent,
+            # XXX last_updated isn't initialized
+            "stats_last_updated": t.stats.last_updated or t.created_time,
+            "download_count": t.stats.download_count,
+            "leech_count": t.stats.leech_count,
+            "seed_count": t.stats.seed_count,
+        }
+    }
+
+# page through an sqlalchemy query, like the per_fetch but
+# doesn't break the eager joins its doing against the stats table.
+# annoying that this isn't built in somehow.
+def page_query(query, limit=sys.maxsize, batch_size=10000):
+    start = 0
+    while True:
+        # XXX very inelegant way to do this, i'm confus
+        stop = min(limit, start + batch_size)
+        if stop == start:
+            break
+        things = query.slice(start, stop)
+        if not things:
+            break
+        had_things = False
+        for thing in things:
+            had_things = True
+            yield(thing)
+        if not had_things or stop == limit:
+            break
+        bar.update(start)
+        start = min(limit, start + batch_size)
+
+helpers.bulk(es, (mk_es(t) for t in page_query(Torrent.query)), chunk_size=10000)

From d7d24ef49efe7195e833e27bea86169a42692950 Mon Sep 17 00:00:00 2001
From: queue <queue@hakase.org>
Date: Sun, 14 May 2017 00:55:08 -0600
Subject: [PATCH 02/10] update requirements.txt

has ipython stuck in there too, oh well
---
 requirements.txt | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 224866b..dbf234d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,11 +24,33 @@ pycodestyle==2.3.1
 pycparser==2.17
 pyparsing==2.2.0
 six==1.10.0
-SQLAlchemy>=1.1.9
+SQLAlchemy==1.1.9
 SQLAlchemy-FullText-Search==0.2.3
-SQLAlchemy-Utils>=0.32.14
+SQLAlchemy-Utils==0.32.14
 uWSGI==2.0.15
 visitor==0.1.3
 webassets==0.12.1
 Werkzeug==0.12.1
 WTForms==2.1
+## The following requirements were added by pip freeze:
+decorator==4.0.11
+elasticsearch==5.3.0
+elasticsearch-dsl==5.2.0
+ipython==6.0.0
+ipython-genutils==0.2.0
+jedi==0.10.2
+mysql-replication==0.13
+pexpect==4.2.1
+pickleshare==0.7.4
+pkg-resources==0.0.0
+progressbar2==3.20.0
+prompt-toolkit==1.0.14
+ptyprocess==0.5.1
+Pygments==2.2.0
+PyMySQL==0.7.11
+python-dateutil==2.6.0
+python-utils==2.1.0
+simplegeneric==0.8.1
+traitlets==4.3.2
+urllib3==1.21.1
+wcwidth==0.1.7

From 3cbe2e4221e0c66105ef0d72221a148f78007d8c Mon Sep 17 00:00:00 2001
From: queue <queue@hakase.org>
Date: Sun, 14 May 2017 02:01:26 -0600
Subject: [PATCH 03/10] WIP hack in es as the provider for search results

real sketch. lots of stuff is still broken. But! you can
make elasticsearch q= style queries and it shows up properly.
only first page works; need to adapt pager to elasticsearch's "total-hits" thing.
---
 import_to_es.py                    |  4 +++
 nyaa/routes.py                     | 44 ++++++++++++++++++++++++++++--
 nyaa/templates/search_results.html | 20 ++++++++------
 3 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/import_to_es.py b/import_to_es.py
index 4be5e2b..e714da5 100644
--- a/import_to_es.py
+++ b/import_to_es.py
@@ -35,6 +35,10 @@ def mk_es(t):
         "_type": "torrent",
         "_index": "nyaav2",
         "_source": {
+            # we're also indexing the id as a number so you can
+            # order by it. seems like this is just equivalent to
+            # order by created_time, but oh well
+            "id": t.id,
             "display_name": t.display_name,
             "created_time": t.created_time,
             "updated_time": t.updated_time,
diff --git a/nyaa/routes.py b/nyaa/routes.py
index 4064b15..2941e5b 100644
--- a/nyaa/routes.py
+++ b/nyaa/routes.py
@@ -27,6 +27,11 @@ from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.utils import formatdate
 
+from elasticsearch import Elasticsearch
+from elasticsearch_dsl import Search, Q
+
+es_client = Elasticsearch()
+
 DEBUG_API = False
 
 
@@ -67,6 +72,16 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_
     sort_ = sort.lower()
     if sort_ not in sort_keys:
         flask.abort(400)
+
+    # XXX gross why are all the names subtly different
+    es_sort = ({
+        'id': 'id',
+        'size': 'filesize',
+        'name': 'display_name',
+        'seeders': 'seed_count',
+        'leechers': 'leech_count',
+        'downloads': 'download_count'
+    })[sort]
     sort = sort_keys[sort]
 
     order_keys = {
@@ -78,6 +93,10 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_
     if order_ not in order_keys:
         flask.abort(400)
 
+    # funky, es sort is default asc, prefixed by '-' if desc
+    if "desc" == order:
+        es_sort = "-" + es_sort
+
     filter_keys = {
         '0': None,
         '1': (models.TorrentFlags.REMAKE, False),
@@ -126,28 +145,37 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_
     if flask.g.user:
         same_user = flask.g.user.id == user
 
+    s = Search(using=es_client, index='nyaav2')
     if term:
         query = db.session.query(models.TorrentNameSearch)
+        s = s.query("query_string", default_field="display_name", default_operator="AND", query=term)
     else:
         query = models.Torrent.query
 
     # Filter by user
     if user:
+        s = s.filter("term", uploader_id=user)
+
         query = query.filter(models.Torrent.uploader_id == user)
         # If admin, show everything
         if not admin:
             # If user is not logged in or the accessed feed doesn't belong to user,
             # hide anonymous torrents belonging to the queried user
             if not same_user:
+                # TODO adapt to es syntax
                 query = query.filter(models.Torrent.flags.op('&')(
                     int(models.TorrentFlags.ANONYMOUS | models.TorrentFlags.DELETED)).is_(False))
 
     if main_category:
+        s = s.filter("term", main_category_id=main_cat_id)
         query = query.filter(models.Torrent.main_category_id == main_cat_id)
     elif sub_category:
+        s = s.filter("term", main_category_id=main_cat_id)
+        s = s.filter("term", sub_category_id=sub_cat_id)
         query = query.filter((models.Torrent.main_category_id == main_cat_id) &
                              (models.Torrent.sub_category_id == sub_cat_id))
 
+    # TODO i dunno what this means in es
     if filter_tuple:
         query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
 
@@ -157,6 +185,7 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_
             int(models.TorrentFlags.HIDDEN | models.TorrentFlags.DELETED)).is_(False))
 
     if term:
+        # note already handled in es
         for item in shlex.split(term, posix=False):
             if len(item) >= 2:
                 query = query.filter(FullTextSearch(
@@ -166,14 +195,22 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_
     if sort.class_ != models.Torrent:
         query = query.join(sort.class_)
 
+    s = s.sort(es_sort)
     query = query.order_by(getattr(sort, order)())
 
+    per = app.config['RESULTS_PER_PAGE']
     if rss:
-        query = query.limit(app.config['RESULTS_PER_PAGE'])
+        pass
+        #query = query.limit(app.config['RESULTS_PER_PAGE'])
     else:
-        query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5)
+        # page is 1-based?
+        s = s[(page-1)*per:page*per]
+        #query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5)
 
-    return query
+    #return query
+    from pprint import pprint
+    print(json.dumps(s.to_dict()))
+    return s.execute()
 
 
 @app.errorhandler(404)
@@ -445,6 +482,7 @@ def activate_user(payload):
 
     user.status = models.UserStatusType.ACTIVE
 
+
     db.session.add(user)
     db.session.commit()
 
diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html
index 95d18d5..4bce368 100644
--- a/nyaa/templates/search_results.html
+++ b/nyaa/templates/search_results.html
@@ -8,7 +8,7 @@
 	{{ caller() }}
 </th>
 {% endmacro %}
-{% if torrent_query.items %}
+{% if torrent_query.hits.total > 0 %}
 <div class="table-responsive">
 	<table class="table table-bordered table-hover table-striped torrent-list">
 		<thead>
@@ -45,26 +45,26 @@
 			</tr>
 		</thead>
 		<tbody>
-			{% for torrent in torrent_query.items %}
+			{% for torrent in torrent_query %}
 			<tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}">
-				{% set cat_id = (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %}
+				{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) %}
 				{% set icon_dir = config.SITE_FLAVOR %}
 				<td style="padding:0 4px;">
-				<a href="/?c={{ cat_id }}" title="{{ torrent.main_category.name }} - {{ torrent.sub_category.name }}">
+				<a href="/?c={{ cat_id }}" title="{{ torrent.main_category_id }} - {{ torrent.sub_category_id }}">
 					<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
 				</a>
 				</td>
-				<td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}">{{ torrent.display_name | escape }}</a></td>
+				<td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{{ torrent.display_name | escape }}</a></td>
 				<td style="white-space: nowrap;text-align: center;">
 					{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
 					<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>
 				</td>
 				<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
-				<td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td>
+                <td class="text-center" {#data-timestamp="{{ torrent.created_time|int }}"#}>{{ torrent.created_time }}</td>
 				{% if config.ENABLE_SHOW_STATS %}
-				<td class="text-center" style="color: green;">{{ torrent.stats.seed_count }}</td>
-				<td class="text-center" style="color: red;">{{ torrent.stats.leech_count }}</td>
-				<td class="text-center">{{ torrent.stats.download_count }}</td>
+				<td class="text-center" style="color: green;">{{ torrent.seed_count }}</td>
+				<td class="text-center" style="color: red;">{{ torrent.leech_count }}</td>
+				<td class="text-center">{{ torrent.download_count }}</td>
 				{% endif %}
 			</tr>
 			{% endfor %}
@@ -75,7 +75,9 @@
 <h3>No results found</h3>
 {% endif %}
 
+{#
 <center>
 	{% from "bootstrap/pagination.html" import render_pagination %}
 	{{ render_pagination(torrent_query) }}
 </center>
+#}

From 8c951210d497e9ed237457e2474c31e392d1891a Mon Sep 17 00:00:00 2001
From: queue <queue@hakase.org>
Date: Sun, 14 May 2017 02:19:42 -0600
Subject: [PATCH 04/10] es: implement highlighting

no apologies for styling. somebody else with opinions will fix it later.
---
 nyaa/routes.py                     |  3 +++
 nyaa/static/css/main.css           | 12 +++++++++++-
 nyaa/templates/search_results.html |  2 +-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/nyaa/routes.py b/nyaa/routes.py
index 2941e5b..3e87a2a 100644
--- a/nyaa/routes.py
+++ b/nyaa/routes.py
@@ -207,6 +207,9 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_
         s = s[(page-1)*per:page*per]
         #query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5)
 
+    s = s.highlight_options(tags_schema='styled')
+    s = s.highlight("display_name")
+
     #return query
     from pprint import pprint
     print(json.dumps(s.to_dict()))
diff --git a/nyaa/static/css/main.css b/nyaa/static/css/main.css
index ee01f9b..f7234ec 100644
--- a/nyaa/static/css/main.css
+++ b/nyaa/static/css/main.css
@@ -92,4 +92,14 @@ table.torrent-list thead th.sorting_desc:after {
 	    margin-left: 20px;
 	    margin-bottom: 10px;
 	}
-}
\ No newline at end of file
+}
+
+/* elasticsearch term highlight */
+.hlt1 {
+    font-style: normal;
+    display: inline-block;
+    padding: 0 3px;
+    border-radius: 3px;
+    border: 1px solid rgba(100, 56, 0, 0.8);
+    background: rgba(200,127,0,0.3);
+}
diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html
index 4bce368..7657bb1 100644
--- a/nyaa/templates/search_results.html
+++ b/nyaa/templates/search_results.html
@@ -54,7 +54,7 @@
 					<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
 				</a>
 				</td>
-				<td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{{ torrent.display_name | escape }}</a></td>
+				<td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{{ torrent.meta.highlight.display_name[0] | safe }}</a></td>
 				<td style="white-space: nowrap;text-align: center;">
 					{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
 					<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>

From 85ba16545f486f879e84f01dc5003c2f6c3c1857 Mon Sep 17 00:00:00 2001
From: queue <queue@hakase.org>
Date: Sun, 14 May 2017 22:26:44 -0600
Subject: [PATCH 05/10] es: fix highlighting without query term

I like highlighting.
---
 nyaa/templates/search_results.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html
index 23f9a40..8d6f9da 100644
--- a/nyaa/templates/search_results.html
+++ b/nyaa/templates/search_results.html
@@ -54,7 +54,7 @@
 					<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
 				</a>
 				</td>
-				<td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{{ torrent.meta.highlight.display_name[0] | safe }}</a></td>
+                <td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}</a></td>
 				<td style="white-space: nowrap;text-align: center;">
 					{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
 					<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>

From 32b9170a81b14541318842481f0d62bdfddadc20 Mon Sep 17 00:00:00 2001
From: queue <queue@hakase.org>
Date: Mon, 15 May 2017 01:32:56 -0600
Subject: [PATCH 06/10] es: add sync_es script for binlog maintenance

lightly documented.
---
 sync_es.py | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 sync_es.py

diff --git a/sync_es.py b/sync_es.py
new file mode 100644
index 0000000..81ad17f
--- /dev/null
+++ b/sync_es.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+"""
+stream changes in mysql (on the torrents and statistics table) into
+elasticsearch as they happen on the binlog. This keeps elasticsearch in sync
+with whatever you do to the database, including stuff like admin queries. Also,
+because mysql keeps the binlog around for N days before deleting old stuff, you
+can survive a hiccup of elasticsearch or this script dying and pick up where
+you left off.
+
+For that "picking up" part, this script depends on one piece of external state:
+its last known binlog filename and position. This is saved off as a JSON file
+to a configurable location on the filesystem periodically. If the file is not
+present then you can initialize it with the values from `SHOW MASTER STATUS`
+from the mysql repl, which will start the sync from current state.
+
+In the case of catastrophic elasticsearch meltdown where you need to
+reconstruct the index, you'll want to be a bit careful with coordinating
+sync_es and import_to_es scripts. If you run import_to_es first than run
+sync_es against SHOW MASTER STATUS, anything that changed the database between
+when import_to_es and sync_es will be lost. Instead, you can run SHOW MASTER
+STATUS _before_ you run import_to_es. That way you'll definitely pick up any
+changes that happen while the import_to_es script is dumping stuff from the
+database into es, at the expense of redoing a (small) amount of indexing.
+"""
+from elasticsearch import Elasticsearch
+from pymysqlreplication import BinLogStreamReader
+from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent
+from datetime import datetime
+from nyaa.models import TorrentFlags
+import sys
+import json
+import time
+import logging
+
+logging.basicConfig()
+
+log = logging.getLogger('sync_es')
+log.setLevel(logging.INFO)
+
+#logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
+
+# in prod want in /var/lib somewhere probably
+SAVE_LOC = "/tmp/sync_es_position.json"
+
+with open(SAVE_LOC) as f:
+    pos = json.load(f)
+
+es = Elasticsearch()
+
+stream = BinLogStreamReader(
+        # TODO parse out from config.py or something
+        connection_settings = {
+            'host': '127.0.0.1',
+            'port': 13306,
+            'user': 'root',
+            'passwd': 'dunnolol'
+        },
+        server_id=10, # arbitrary
+        # only care about this table currently
+        only_schemas=["nyaav2"],
+        # TODO sukebei
+        only_tables=["nyaa_torrents", "nyaa_statistics"],
+        # from our save file
+        resume_stream=True,
+        log_file=pos['log_file'],
+        log_pos=pos['log_pos'],
+        # skip the other stuff like table mapping
+        only_events=[UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent],
+        # if we're at the head of the log, block until something happens
+        # note it'd be nice to block async-style instead, but the mainline
+        # binlogreader is synchronous. there is an (unmaintained?) fork
+        # using aiomysql if anybody wants to revive that.
+        blocking=True)
+
+def reindex_torrent(t):
+    # XXX annoyingly different from import_to_es, and
+    # you need to keep them in sync manually.
+    f = t['flags']
+    doc = {
+        "id": t['id'],
+        "display_name": t['display_name'],
+        "created_time": t['created_time'],
+        "updated_time": t['updated_time'],
+        "description": t['description'],
+        # not analyzed but included so we can render magnet links
+        # without querying sql again.
+        "info_hash": t['info_hash'].hex(),
+        "filesize": t['filesize'],
+        "uploader_id": t['uploader_id'],
+        "main_category_id": t['main_category_id'],
+        "sub_category_id": t['sub_category_id'],
+        # XXX all the bitflags are numbers
+        "anonymous": bool(f & TorrentFlags.ANONYMOUS),
+        "trusted": bool(f & TorrentFlags.TRUSTED),
+        "remake": bool(f & TorrentFlags.REMAKE),
+        "complete": bool(f & TorrentFlags.COMPLETE),
+        # TODO instead of indexing and filtering later
+        # could delete from es entirely. Probably won't matter
+        # for at least a few months.
+        "hidden": bool(f & TorrentFlags.HIDDEN),
+        "deleted": bool(f & TorrentFlags.DELETED),
+        "has_torrent": bool(t['has_torrent']),
+    }
+    # update, so we don't delete the stats if present
+    es.update(
+        index='nyaav2',
+        doc_type='torrent',
+        id=t['id'],
+        body={"doc": doc, "doc_as_upsert": True})
+
+def reindex_stats(s):
+    es.update(
+        index='nyaav2',
+        doc_type='torrent',
+        id=s['torrent_id'],
+        body={
+            "doc": {
+                "stats_last_updated": s["last_updated"],
+                "download_count": s["download_count"],
+                "leech_count": s['leech_count'],
+                "seed_count": s['seed_count'],
+            }})
+
+n = 0
+last_save = time.time()
+
+for event in stream:
+    for row in event.rows:
+        if event.table == "nyaa_torrents":
+            if type(event) is WriteRowsEvent:
+                reindex_torrent(row['values'])
+            elif type(event) is UpdateRowsEvent:
+                reindex_torrent(row['after_values'])
+            elif type(event) is DeleteRowsEvent:
+                # just delete it
+                es.delete(index='nyaav2', doc_type='torrent', id=row['values']['id'])
+            else:
+                raise Exception(f"unknown event {type(event)}")
+        elif event.table == "nyaa_statistics":
+            if type(event) is WriteRowsEvent:
+                reindex_stats(row['values'])
+            elif type(event) is UpdateRowsEvent:
+                reindex_stats(row['after_values'])
+            elif type(event) is DeleteRowsEvent:
+                # uh ok. assume that the torrent row will get deleted later.
+                pass
+            else:
+                raise Exception(f"unknown event {type(event)}")
+        else:
+          raise Exception(f"unknown table {s.table}")
+    n += 1
+    if n % 100 == 0 or time.time() - last_save > 30:
+        log.info(f"saving position {stream.log_file}/{stream.log_pos}")
+        with open(SAVE_LOC, 'w') as f:
+            json.dump({"log_file": stream.log_file, "log_pos": stream.log_pos}, f)

From c2c547e7867742b06cd97c95210c864b0ed9789f Mon Sep 17 00:00:00 2001
From: aldacron <aldacron@users.noreply.github.com>
Date: Mon, 15 May 2017 11:14:01 -0700
Subject: [PATCH 07/10] some more elasticsearch work, including index mapping
 and analyzer

---
 create_es.sh     |  3 ++
 es_mapping.yml   | 91 ++++++++++++++++++++++++++++++++++++++++++++++++
 import_to_es.py  |  5 +--
 nyaa/routes.py   |  2 +-
 requirements.txt | 20 +----------
 5 files changed, 97 insertions(+), 24 deletions(-)
 create mode 100755 create_es.sh
 create mode 100644 es_mapping.yml

diff --git a/create_es.sh b/create_es.sh
new file mode 100755
index 0000000..2b83620
--- /dev/null
+++ b/create_es.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+curl -v -XPUT 'localhost:9200/nyaav2?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
diff --git a/es_mapping.yml b/es_mapping.yml
new file mode 100644
index 0000000..9085ec2
--- /dev/null
+++ b/es_mapping.yml
@@ -0,0 +1,91 @@
+---
+# CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml
+# fo inline comments.
+settings:
+  analysis:
+    analyzer:
+      my_search_analyzer:
+        type: custom
+        tokenizer: standard
+        char_filter:
+          - my_char_filter
+        filter:
+          - standard
+          - lowercase
+      my_index_analyzer:
+        type: custom
+        tokenizer: standard
+        char_filter:
+          - my_char_filter
+        filter:
+          - lowercase
+          - my_ngram
+    filter:
+      my_ngram:
+        type: edgeNGram
+        min_gram: 1
+        max_gram: 15
+    char_filter:
+      my_char_filter:
+        type: mapping
+        mappings: ["-=>_", "!=>_"]
+  index:
+    # we're running a single es node, so no sharding necessary,
+    # plus replicas don't really help either.
+    number_of_shards: 1
+    number_of_replicas : 0
+    mapper:
+      # disable elasticsearch's "helpful" autoschema
+      dynamic: false
+    # since we disabled the _all field, default query the
+    # name of the torrent.
+    query:
+      default_field: display_name
+mappings:
+  torrent:
+    # don't want everything concatenated
+    _all:
+      enabled: false
+    properties:
+      id:
+        type: long
+      display_name:
+        # TODO could do a fancier tokenizer here to parse out the
+        # the scene convention of stuff in brackets, plus stuff like k-on
+        type: text
+        analyzer: my_index_analyzer
+        fielddata: true
+      created_time:
+        type: date
+        # Only in the ES index for generating magnet links
+      info_hash:
+        enabled: false
+      filesize:
+        type: long
+      anonymous:
+        type: boolean
+      trusted:
+        type: boolean
+      remake:
+        type: boolean
+      complete:
+        type: boolean
+      hidden:
+        type: boolean
+      deleted:
+        type: boolean
+      has_torrent:
+        type: boolean
+      download_count:
+        type: long
+      leech_count:
+        type: long
+      seed_count:
+        type: long
+      # these ids are really only for filtering, thus keyword
+      uploader_id:
+        type: keyword
+      main_category_id:
+        type: keyword
+      sub_category_id:
+        type: keyword
\ No newline at end of file
diff --git a/import_to_es.py b/import_to_es.py
index e714da5..046bde6 100644
--- a/import_to_es.py
+++ b/import_to_es.py
@@ -41,8 +41,6 @@ def mk_es(t):
             "id": t.id,
             "display_name": t.display_name,
             "created_time": t.created_time,
-            "updated_time": t.updated_time,
-            "description": t.description,
             # not analyzed but included so we can render magnet links
             # without querying sql again.
             "info_hash": t.info_hash.hex(),
@@ -61,8 +59,7 @@ def mk_es(t):
             "hidden": bool(t.hidden),
             "deleted": bool(t.deleted),
             "has_torrent": t.has_torrent,
-            # XXX last_updated isn't initialized
-            "stats_last_updated": t.stats.last_updated or t.created_time,
+            # Stats
             "download_count": t.stats.download_count,
             "leech_count": t.stats.leech_count,
             "seed_count": t.stats.seed_count,
diff --git a/nyaa/routes.py b/nyaa/routes.py
index 3e87a2a..758635a 100644
--- a/nyaa/routes.py
+++ b/nyaa/routes.py
@@ -148,7 +148,7 @@ def search(term='', user=None, sort='id', order='desc', category='0_0', quality_
     s = Search(using=es_client, index='nyaav2')
     if term:
         query = db.session.query(models.TorrentNameSearch)
-        s = s.query("query_string", default_field="display_name", default_operator="AND", query=term)
+        s = s.query("simple_query_string", analyzer="my_search_analyzer", default_operator="AND", query=term)
     else:
         query = models.Torrent.query
 
diff --git a/requirements.txt b/requirements.txt
index dbf234d..af89eab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,24 +33,6 @@ webassets==0.12.1
 Werkzeug==0.12.1
 WTForms==2.1
 ## The following requirements were added by pip freeze:
-decorator==4.0.11
 elasticsearch==5.3.0
 elasticsearch-dsl==5.2.0
-ipython==6.0.0
-ipython-genutils==0.2.0
-jedi==0.10.2
-mysql-replication==0.13
-pexpect==4.2.1
-pickleshare==0.7.4
-pkg-resources==0.0.0
-progressbar2==3.20.0
-prompt-toolkit==1.0.14
-ptyprocess==0.5.1
-Pygments==2.2.0
-PyMySQL==0.7.11
-python-dateutil==2.6.0
-python-utils==2.1.0
-simplegeneric==0.8.1
-traitlets==4.3.2
-urllib3==1.21.1
-wcwidth==0.1.7
+progressbar2==3.20.0
\ No newline at end of file

From 899aa01473654e6ed85698f23df29c2e8081521b Mon Sep 17 00:00:00 2001
From: aldacron <aldacron@users.noreply.github.com>
Date: Mon, 15 May 2017 23:51:58 -0700
Subject: [PATCH 08/10] hooked up ES... 90% done, need to figure out how to
 generate magnet URIs

---
 README.md                          |  38 ++++
 config.example.py                  |  13 +-
 create_es.sh                       |   4 +-
 import_to_es.py                    |   3 +-
 my.cnf                             |   6 +
 nyaa/routes.py                     | 322 ++++++++++++-----------------
 nyaa/search.py                     | 317 ++++++++++++++++++++++++++++
 nyaa/templates/rss.xml             |  14 +-
 nyaa/templates/search_results.html |  35 +++-
 requirements.txt                   |   6 +-
 sync_es.py                         |  53 +++--
 11 files changed, 585 insertions(+), 226 deletions(-)
 create mode 100644 nyaa/search.py

diff --git a/README.md b/README.md
index 5ec0077..632ce73 100644
--- a/README.md
+++ b/README.md
@@ -44,5 +44,43 @@
 - Start the dev server with `python run.py`
 - Deactivate `source deactivate`
 
+# Enabling ElasticSearch
+
+## Basics
+- Install jdk `sudo apt-get install openjdk-8-jdk`
+- Install elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html
+- `sudo systemctl enable elasticsearch.service`
+- `sudo systemctl start elasticsearch.service`
+- Run `curl -XGET 'localhost:9200'` and make sure ES is running
+- Optional: install Kabana as a search frontend for ES
+
+## Enable MySQL Binlogging
+- Add the `[mariadb]` bin-log section to my.cnf and reload mysql server
+- Connect to mysql
+- `SHOW VARIABLES LIKE 'binlog_format';`
+    - Make sure it shows ROW
+- Connect to root user
+- `GRANT REPLICATION SLAVE ON *.* TO 'test'@'localhost';` where test is the user you will be running `sync_es.py` with
+
+## Setting up ES
+- Run `./create_es.sh` and this creates two indicies: `nyaa` and `sukebei`
+- The output should show `akncolwedged: true` twice
+- The safest bet is to disable the webapp here to ensure there's no database writes
+- Run `python import_to_es.py` with `SITE_FLAVOR` set to `nyaa`
+- Run `python import_to_es.py` with `SITE_FLAVOR` set to `sukebei`
+- These will take some time to run as it's indexing
+
+## Setting up sync_es.py
+- Sync_es.py keeps the ElasticSearch index updated by reading the BinLog
+- Configure the MySQL options with the user where you granted the REPLICATION permissions
+- Connect to MySQL, run `SHOW MASTER STATUS;`.
+- Copy the output to `/var/lib/sync_es_position.json` with the contents `{"log_file": "FILE", "log_pos": POSITION}` and replace FILENAME with File (something like master1-bin.000002) in the SQL output and POSITION (something like 892528513) with Position
+- Set up `sync_es.py` as a service and run it, preferably as the system/root
+- Make sure `sync_es.py` runs within venv with the right dependencies
+
+## Good to go!
+- After that, enable the `USE_ELASTIC_SEARCH` flag and restart the webapp and you're good to go
+
+
 ## Code Quality:
 - Remember to follow PEP8 style guidelines and run `./lint.sh` before committing.
diff --git a/config.example.py b/config.example.py
index f34c554..73702b9 100644
--- a/config.example.py
+++ b/config.example.py
@@ -33,8 +33,6 @@ MAIL_FROM_ADDRESS = '***'
 SMTP_USERNAME = '***'
 SMTP_PASSWORD = '***'
 
-RESULTS_PER_PAGE = 75
-
 # What the site identifies itself as.
 SITE_NAME = 'Nyaa'
 
@@ -49,3 +47,14 @@ ENFORCE_MAIN_ANNOUNCE_URL = False
 MAIN_ANNOUNCE_URL = ''
 
 BACKUP_TORRENT_FOLDER = 'torrents'
+
+#
+# Search Options
+#
+# Max ES search results, do not set over 10000
+RESULTS_PER_PAGE = 75
+
+USE_ELASTIC_SEARCH = False
+ENABLE_ELASTIC_SEARCH_HIGHLIGHT = False
+ES_MAX_SEARCH_RESULT = 1000
+ES_INDEX_NAME = SITE_FLAVOR  # we create indicies named nyaa or sukebei
\ No newline at end of file
diff --git a/create_es.sh b/create_es.sh
index 2b83620..5b0c564 100755
--- a/create_es.sh
+++ b/create_es.sh
@@ -1,3 +1,5 @@
 #!/usr/bin/env bash
 
-curl -v -XPUT 'localhost:9200/nyaav2?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
+# create indicies named "nyaa" and "sukebei", these are hardcoded
+curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
+curl -v -XPUT 'localhost:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
diff --git a/import_to_es.py b/import_to_es.py
index 046bde6..886211f 100644
--- a/import_to_es.py
+++ b/import_to_es.py
@@ -5,6 +5,7 @@ which is assumed to already exist.
 This is a one-shot deal, so you'd either need to complement it
 with a cron job or some binlog-reading thing (TODO)
 """
+from nyaa import app
 from nyaa.models import Torrent
 from elasticsearch import Elasticsearch
 from elasticsearch import helpers
@@ -33,7 +34,7 @@ def mk_es(t):
     return {
         "_id": t.id,
         "_type": "torrent",
-        "_index": "nyaav2",
+        "_index": app.config['ES_INDEX_NAME'],
         "_source": {
             # we're also indexing the id as a number so you can
             # order by it. seems like this is just equivalent to
diff --git a/my.cnf b/my.cnf
index 657a8f6..d586484 100644
--- a/my.cnf
+++ b/my.cnf
@@ -4,3 +4,9 @@ ft_min_word_len=2
 innodb_ft_cache_size = 80000000
 innodb_ft_total_cache_size = 1600000000
 max_allowed_packet = 100M
+
+[mariadb]
+log-bin
+server_id=1
+log-basename=master1
+binlog-format = row
diff --git a/nyaa/routes.py b/nyaa/routes.py
index 758635a..5fbcb5c 100644
--- a/nyaa/routes.py
+++ b/nyaa/routes.py
@@ -6,18 +6,16 @@ from nyaa import bencode, utils
 from nyaa import torrents
 from nyaa import backend
 from nyaa import api_handler
+from nyaa.search import search_elastic, search_db
 import config
 
 import json
-import re
 from datetime import datetime, timedelta
 import ipaddress
 import os.path
 import base64
 from urllib.parse import quote
-import sqlalchemy_fulltext.modes as FullTextMode
-from sqlalchemy_fulltext import FullTextSearch
-import shlex
+import math
 from werkzeug import url_encode
 
 from itsdangerous import URLSafeSerializer, BadSignature
@@ -27,12 +25,14 @@ from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.utils import formatdate
 
-from elasticsearch import Elasticsearch
-from elasticsearch_dsl import Search, Q
+from flask_paginate import Pagination
 
-es_client = Elasticsearch()
 
 DEBUG_API = False
+DEFAULT_MAX_SEARCH_RESULT = 1000
+DEFAULT_PER_PAGE = 75
+SERACH_PAGINATE_DISPLAY_MSG = '''Displaying results {start}-{end} out of {total} results.<br>
+                                 Please refine your search results if you can't find what you were looking for.'''
 
 
 def redirect_url():
@@ -53,168 +53,13 @@ def modify_query(**new_values):
 
     return '{}?{}'.format(flask.request.path, url_encode(args))
 
+
 @app.template_global()
 def filter_truthy(input_list):
     ''' Jinja2 can't into list comprehension so this is for
         the search_results.html template '''
     return [item for item in input_list if item]
 
-def search(term='', user=None, sort='id', order='desc', category='0_0', quality_filter='0', page=1, rss=False, admin=False):
-    sort_keys = {
-        'id': models.Torrent.id,
-        'size': models.Torrent.filesize,
-        'name': models.Torrent.display_name,
-        'seeders': models.Statistic.seed_count,
-        'leechers': models.Statistic.leech_count,
-        'downloads': models.Statistic.download_count
-    }
-
-    sort_ = sort.lower()
-    if sort_ not in sort_keys:
-        flask.abort(400)
-
-    # XXX gross why are all the names subtly different
-    es_sort = ({
-        'id': 'id',
-        'size': 'filesize',
-        'name': 'display_name',
-        'seeders': 'seed_count',
-        'leechers': 'leech_count',
-        'downloads': 'download_count'
-    })[sort]
-    sort = sort_keys[sort]
-
-    order_keys = {
-        'desc': 'desc',
-        'asc': 'asc'
-    }
-
-    order_ = order.lower()
-    if order_ not in order_keys:
-        flask.abort(400)
-
-    # funky, es sort is default asc, prefixed by '-' if desc
-    if "desc" == order:
-        es_sort = "-" + es_sort
-
-    filter_keys = {
-        '0': None,
-        '1': (models.TorrentFlags.REMAKE, False),
-        '2': (models.TorrentFlags.TRUSTED, True),
-        '3': (models.TorrentFlags.COMPLETE, True)
-    }
-
-    sentinel = object()
-    filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
-    if filter_tuple is sentinel:
-        flask.abort(400)
-
-    if user:
-        user = models.User.by_id(user)
-        if not user:
-            flask.abort(404)
-        user = user.id
-
-    main_category = None
-    sub_category = None
-    main_cat_id = 0
-    sub_cat_id = 0
-    if category:
-        cat_match = re.match(r'^(\d+)_(\d+)$', category)
-        if not cat_match:
-            flask.abort(400)
-
-        main_cat_id = int(cat_match.group(1))
-        sub_cat_id = int(cat_match.group(2))
-
-        if main_cat_id > 0:
-            if sub_cat_id > 0:
-                sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
-            else:
-                main_category = models.MainCategory.by_id(main_cat_id)
-
-            if not category:
-                flask.abort(400)
-
-    # Force sort by id desc if rss
-    if rss:
-        sort = sort_keys['id']
-        order = 'desc'
-
-    same_user = False
-    if flask.g.user:
-        same_user = flask.g.user.id == user
-
-    s = Search(using=es_client, index='nyaav2')
-    if term:
-        query = db.session.query(models.TorrentNameSearch)
-        s = s.query("simple_query_string", analyzer="my_search_analyzer", default_operator="AND", query=term)
-    else:
-        query = models.Torrent.query
-
-    # Filter by user
-    if user:
-        s = s.filter("term", uploader_id=user)
-
-        query = query.filter(models.Torrent.uploader_id == user)
-        # If admin, show everything
-        if not admin:
-            # If user is not logged in or the accessed feed doesn't belong to user,
-            # hide anonymous torrents belonging to the queried user
-            if not same_user:
-                # TODO adapt to es syntax
-                query = query.filter(models.Torrent.flags.op('&')(
-                    int(models.TorrentFlags.ANONYMOUS | models.TorrentFlags.DELETED)).is_(False))
-
-    if main_category:
-        s = s.filter("term", main_category_id=main_cat_id)
-        query = query.filter(models.Torrent.main_category_id == main_cat_id)
-    elif sub_category:
-        s = s.filter("term", main_category_id=main_cat_id)
-        s = s.filter("term", sub_category_id=sub_cat_id)
-        query = query.filter((models.Torrent.main_category_id == main_cat_id) &
-                             (models.Torrent.sub_category_id == sub_cat_id))
-
-    # TODO i dunno what this means in es
-    if filter_tuple:
-        query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
-
-    # If admin, show everything
-    if not admin:
-        query = query.filter(models.Torrent.flags.op('&')(
-            int(models.TorrentFlags.HIDDEN | models.TorrentFlags.DELETED)).is_(False))
-
-    if term:
-        # note already handled in es
-        for item in shlex.split(term, posix=False):
-            if len(item) >= 2:
-                query = query.filter(FullTextSearch(
-                    item, models.TorrentNameSearch, FullTextMode.NATURAL))
-
-    # Sort and order
-    if sort.class_ != models.Torrent:
-        query = query.join(sort.class_)
-
-    s = s.sort(es_sort)
-    query = query.order_by(getattr(sort, order)())
-
-    per = app.config['RESULTS_PER_PAGE']
-    if rss:
-        pass
-        #query = query.limit(app.config['RESULTS_PER_PAGE'])
-    else:
-        # page is 1-based?
-        s = s[(page-1)*per:page*per]
-        #query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5)
-
-    s = s.highlight_options(tags_schema='styled')
-    s = s.highlight("display_name")
-
-    #return query
-    from pprint import pprint
-    print(json.dumps(s.to_dict()))
-    return s.execute()
-
 
 @app.errorhandler(404)
 def not_found(error):
@@ -232,7 +77,6 @@ def before_request():
         flask.g.user = user
 
         if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now():
-            print("hio")
             flask.session['timeout'] = datetime.now() + timedelta(days=7)
             flask.session.permanent = True
             flask.session.modified = True
@@ -270,6 +114,10 @@ def home(rss):
     if page:
         page = int(page)
 
+    per_page = app.config.get('RESULTS_PER_PAGE')
+    if not per_page:
+        per_page = DEFAULT_PER_PAGE
+
     user_id = None
     if user_name:
         user = models.User.by_username(user_name)
@@ -278,30 +126,72 @@ def home(rss):
         user_id = user.id
 
     query_args = {
-        'term': term or '',
         'user': user_id,
         'sort': sort or 'id',
         'order': order or 'desc',
         'category': category or '0_0',
         'quality_filter': quality_filter or '0',
         'page': page or 1,
-        'rss': rss
+        'rss': rss,
+        'per_page': per_page
     }
 
-    # God mode
-    if flask.g.user and flask.g.user.is_admin:
-        query_args['admin'] = True
+    if flask.g.user:
+        query_args['logged_in_user'] = flask.g.user
+        if flask.g.user.is_admin:  # God mode
+            query_args['admin'] = True
 
-    query = search(**query_args)
+    # If searching, we get results from elastic search
+    use_elastic = app.config.get('USE_ELASTIC_SEARCH')
+    if use_elastic and term:
+        query_args['term'] = term
 
-    if rss:
-        return render_rss('/', query)
+        max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
+        if not max_search_results:
+            max_search_results = DEFAULT_MAX_SEARCH_RESULT
+
+        max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages 
+
+        query_args['page'] = max_page
+        query_args['max_search_results'] = max_search_results
+
+        query_results = search_elastic(**query_args)
+
+        if rss:
+            return render_rss('/', query_results, use_elastic=True)
+        else:
+            rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
+            max_results = min(max_search_results, query_results['hits']['total'])
+            # change p= argument to whatever you change page_parameter to or pagination breaks
+            pagination = Pagination(p=query_args['page'], per_page=per_page,
+                                    total=max_results, bs_version=3, page_parameter='p',
+                                    display_msg=SERACH_PAGINATE_DISPLAY_MSG)
+            return flask.render_template('home.html',
+                                         use_elastic=True,
+                                         pagination=pagination,
+                                         torrent_query=query_results,
+                                         search=query_args,
+                                         rss_filter=rss_query_string)
     else:
-        rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
-        return flask.render_template('home.html',
-                                     torrent_query=query,
-                                     search=query_args,
-                                     rss_filter=rss_query_string)
+        # If ES is enabled, default to db search for browsing
+        if use_elastic:
+            query_args['term'] = ''
+        else: # Otherwise, use db search for everything
+            query_args['term'] = term or ''
+        print(query_args)
+        query = search_db(**query_args)
+        if rss:
+            return render_rss('/', query, use_elastic=False)
+        else:
+            rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
+            # Use elastic is always false here because we only hit this section
+            # if we're browsing without a search term (which means we default to DB)
+            # or if ES is disabled
+            return flask.render_template('home.html',
+                                         use_elastic=False,
+                                         torrent_query=query,
+                                         search=query_args,
+                                         rss_filter=rss_query_string)
 
 
 @app.route('/user/<user_name>')
@@ -320,6 +210,10 @@ def view_user(user_name):
     if page:
         page = int(page)
 
+    per_page = app.config.get('RESULTS_PER_PAGE')
+    if not per_page:
+        per_page = DEFAULT_PER_PAGE
+
     query_args = {
         'term': term or '',
         'user': user.id,
@@ -328,40 +222,83 @@ def view_user(user_name):
         'category': category or '0_0',
         'quality_filter': quality_filter or '0',
         'page': page or 1,
-        'rss': False
+        'rss': False,
+        'per_page': per_page
     }
 
-    # God mode
-    if flask.g.user and flask.g.user.is_admin:
-        query_args['admin'] = True
-
-    query = search(**query_args)
+    if flask.g.user:
+        query_args['logged_in_user'] = flask.g.user
+        if flask.g.user.is_admin:  # God mode
+            query_args['admin'] = True
 
+    # Use elastic search for term searching
     rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
-    return flask.render_template('user.html',
-                                 torrent_query=query,
-                                 search=query_args,
-                                 user=user,
-                                 user_page=True,
-                                 rss_filter=rss_query_string)
+    use_elastic = app.config.get('USE_ELASTIC_SEARCH')
+    if use_elastic and term:
+        query_args['term'] = term
+
+        max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
+        if not max_search_results:
+            max_search_results = DEFAULT_MAX_SEARCH_RESULT
+
+        max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages 
+
+        query_args['page'] = max_page
+        query_args['max_search_results'] = max_search_results
+
+        query_results = search_elastic(**query_args)
+
+        max_results = min(max_search_results, query_results['hits']['total'])
+        # change p= argument to whatever you change page_parameter to or pagination breaks
+        pagination = Pagination(p=query_args['page'], per_page=per_page,
+                                total=max_results, bs_version=3, page_parameter='p',
+                                display_msg=SERACH_PAGINATE_DISPLAY_MSG)
+        return flask.render_template('user.html',
+                                     use_elastic=True,
+                                     pagination=pagination,
+                                     torrent_query=query_results,
+                                     search=query_args,
+                                     user=user,
+                                     user_page=True,
+                                     rss_filter=rss_query_string)
+    # Similar logic as home page
+    else:
+        if use_elastic:
+            query_args['term'] = ''
+        else:
+            query_args['term'] = term or ''
+        query = search_db(**query_args)
+        return flask.render_template('user.html',
+                                     use_elastic=False,
+                                     torrent_query=query,
+                                     search=query_args,
+                                     user=user,
+                                     user_page=True,
+                                     rss_filter=rss_query_string)
 
 
 @app.template_filter('rfc822')
 def _jinja2_filter_rfc822(date, fmt=None):
     return formatdate(float(date.strftime('%s')))
 
+@app.template_filter('rfc822_es')
+def _jinja2_filter_rfc822(datestr, fmt=None):
+    return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s')))
 
-def render_rss(label, query):
+
+def render_rss(label, query, use_elastic):
+    print(query)
     rss_xml = flask.render_template('rss.xml',
+                                    use_elastic=use_elastic,
                                     term=label,
                                     site_url=flask.request.url_root,
-                                    query=query)
+                                    torrent_query=query)
     response = flask.make_response(rss_xml)
     response.headers['Content-Type'] = 'application/xml'
     return response
 
 
-#@app.route('/about', methods=['GET'])
+# @app.route('/about', methods=['GET'])
 # def about():
 #    return flask.render_template('about.html')
 
@@ -485,7 +422,6 @@ def activate_user(payload):
 
     user.status = models.UserStatusType.ACTIVE
 
-
     db.session.add(user)
     db.session.commit()
 
diff --git a/nyaa/search.py b/nyaa/search.py
new file mode 100644
index 0000000..7369fd3
--- /dev/null
+++ b/nyaa/search.py
@@ -0,0 +1,317 @@
+import flask
+import re
+import math
+import json
+import shlex
+
+from nyaa import app, db
+from nyaa import models
+
+import sqlalchemy_fulltext.modes as FullTextMode
+from sqlalchemy_fulltext import FullTextSearch
+from elasticsearch import Elasticsearch
+from elasticsearch_dsl import Search, Q
+
+
+def search_elastic(term='', user=None, sort='id', order='desc',
+                   category='0_0', quality_filter='0', page=1,
+                   rss=False, admin=False, logged_in_user=None,
+                   per_page=75, max_search_results=1000):
+    # This function can easily be memcached now
+
+    es_client = Elasticsearch()
+
+    es_sort_keys = {
+        'id': 'id',
+        'size': 'filesize',
+        # 'name': 'display_name',  # This is slow and buggy
+        'seeders': 'seed_count',
+        'leechers': 'leech_count',
+        'downloads': 'download_count'
+    }
+
+    sort_ = sort.lower()
+    if sort_ not in es_sort_keys:
+        flask.abort(400)
+
+    es_sort = es_sort_keys[sort]
+
+    order_keys = {
+        'desc': 'desc',
+        'asc': 'asc'
+    }
+
+    order_ = order.lower()
+    if order_ not in order_keys:
+        flask.abort(400)
+
+    # Only allow ID, desc if RSS
+    if rss:
+        sort = es_sort_keys['id']
+        order = 'desc'
+
+    # funky, es sort is default asc, prefixed by '-' if desc
+    if 'desc' == order:
+        es_sort = '-' + es_sort
+
+    # Quality filter
+    quality_keys = [
+        '0',  # Show all
+        '1',  # No remakes
+        '2',  # Only trusted
+        '3'   # Only completed
+    ]
+
+    if quality_filter.lower() not in quality_keys:
+        flask.abort(400)
+
+    quality_filter = int(quality_filter)
+
+    # Category filter
+    main_category = None
+    sub_category = None
+    main_cat_id = 0
+    sub_cat_id = 0
+    if category:
+        cat_match = re.match(r'^(\d+)_(\d+)$', category)
+        if not cat_match:
+            flask.abort(400)
+
+        main_cat_id = int(cat_match.group(1))
+        sub_cat_id = int(cat_match.group(2))
+
+        if main_cat_id > 0:
+            if sub_cat_id > 0:
+                sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
+                if not sub_category:
+                    flask.abort(400)
+            else:
+                main_category = models.MainCategory.by_id(main_cat_id)
+                if not main_category:
+                    flask.abort(400)
+
+    # This might be useless since we validate users
+    # before coming into this method, but just to be safe...
+    if user:
+        user = models.User.by_id(user)
+        if not user:
+            flask.abort(404)
+        user = user.id
+
+    same_user = False
+    if logged_in_user:
+        same_user = user == logged_in_user.id
+
+    s = Search(using=es_client, index=app.config.get('ES_INDEX_NAME'))  # todo, sukebei prefix
+
+    # Apply search term
+    if term:
+        s = s.query('simple_query_string',
+                    analyzer='my_search_analyzer',
+                    default_operator="AND",
+                    query=term)
+
+    # User view (/user/username)
+    if user:
+        s = s.filter('term', uploader_id=user)
+
+        if not admin:
+            # Hide all DELETED torrents if regular user
+            s = s.filter('term', deleted=False)
+            # If logged in user is not the same as the user being viewed,
+            # show only torrents that aren't hidden or anonymous.
+            #
+            # If logged in user is the same as the user being viewed,
+            # show all torrents including hidden and anonymous ones.
+            #
+            # On RSS pages in user view, show only torrents that
+            # aren't hidden or anonymous no matter what
+            if not same_user or rss:
+                s = s.filter('term', hidden=False)
+                s = s.filter('term', anonymous=False)
+    # General view (homepage, general search view)
+    else:
+        if not admin:
+            # Hide all DELETED torrents if regular user
+            s = s.filter('term', deleted=False)
+            # If logged in, show all torrents that aren't hidden unless they belong to you
+            # On RSS pages, show all public torrents and nothing more.
+            if logged_in_user and not rss:
+                hiddenFilter = Q('term', hidden=False)
+                userFilter = Q('term', uploader_id=logged_in_user.id)
+                combinedFilter = hiddenFilter | userFilter
+                s = s.filter('bool', filter=[combinedFilter])
+            else:
+                s = s.filter('term', hidden=False)
+
+    if main_category:
+        s = s.filter('term', main_category_id=main_cat_id)
+    elif sub_category:
+        s = s.filter('term', main_category_id=main_cat_id)
+        s = s.filter('term', sub_category_id=sub_cat_id)
+
+    if quality_filter == 0:
+        pass
+    elif quality_filter == 1:
+        s = s.filter('term', remake=False)
+    elif quality_filter == 2:
+        s = s.filter('term', trusted=True)
+    elif quality_filter == 3:
+        s = s.filter('term', complete=True)
+
+    # Apply sort
+    s = s.sort(es_sort)
+
+    # Only show first RESULTS_PER_PAGE items for RSS
+    if rss:
+        s = s[0:per_page]
+    else:
+        max_page = min(page, int(math.ceil(max_search_results / float(per_page))))
+        from_idx = (max_page-1)*per_page
+        to_idx = min(max_search_results, max_page*per_page)
+        s = s[from_idx:to_idx]
+
+    highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT')
+    if highlight:
+        s = s.highlight_options(tags_schema='styled')
+        s = s.highlight("display_name")
+
+    # Return query, uncomment print line to debug query
+    from pprint import pprint
+    print(json.dumps(s.to_dict()))
+    return s.execute()
+
+
+def search_db(term='', user=None, sort='id', order='desc', category='0_0',
+              quality_filter='0', page=1, rss=False, admin=False,
+              logged_in_user=None, per_page=75):
+    sort_keys = {
+        'id': models.Torrent.id,
+        'size': models.Torrent.filesize,
+        # 'name': models.Torrent.display_name, # Disable this because we disabled this in search_elastic, for the sake of consistency
+        'seeders': models.Statistic.seed_count,
+        'leechers': models.Statistic.leech_count,
+        'downloads': models.Statistic.download_count
+    }
+
+    sort_ = sort.lower()
+    if sort_ not in sort_keys:
+        flask.abort(400)
+    sort = sort_keys[sort]
+
+    order_keys = {
+        'desc': 'desc',
+        'asc': 'asc'
+    }
+
+    order_ = order.lower()
+    if order_ not in order_keys:
+        flask.abort(400)
+
+    filter_keys = {
+        '0': None,
+        '1': (models.TorrentFlags.REMAKE, False),
+        '2': (models.TorrentFlags.TRUSTED, True),
+        '3': (models.TorrentFlags.COMPLETE, True)
+    }
+
+    sentinel = object()
+    filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
+    if filter_tuple is sentinel:
+        flask.abort(400)
+
+    if user:
+        user = models.User.by_id(user)
+        if not user:
+            flask.abort(404)
+        user = user.id
+
+    main_category = None
+    sub_category = None
+    main_cat_id = 0
+    sub_cat_id = 0
+    if category:
+        cat_match = re.match(r'^(\d+)_(\d+)$', category)
+        if not cat_match:
+            flask.abort(400)
+
+        main_cat_id = int(cat_match.group(1))
+        sub_cat_id = int(cat_match.group(2))
+
+        if main_cat_id > 0:
+            if sub_cat_id > 0:
+                sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
+            else:
+                main_category = models.MainCategory.by_id(main_cat_id)
+
+            if not category:
+                flask.abort(400)
+
+    # Force sort by id desc if rss
+    if rss:
+        sort = sort_keys['id']
+        order = 'desc'
+
+    same_user = False
+    if logged_in_user:
+        same_user = logged_in_user.id == user
+
+    if term:
+        query = db.session.query(models.TorrentNameSearch)
+    else:
+        query = models.Torrent.query
+
+    # User view (/user/username)
+    if user:
+        query = query.filter(models.Torrent.uploader_id == user)
+
+        if not admin:
+            # Hide all DELETED torrents if regular user
+            query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
+            # If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous
+            # If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones
+            # On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what
+            if not same_user or rss:
+                query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN |
+                                                                      models.TorrentFlags.ANONYMOUS)).is_(False))
+    # General view (homepage, general search view)
+    else:
+        if not admin:
+            # Hide all DELETED torrents if regular user
+            query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
+            # If logged in, show all torrents that aren't hidden unless they belong to you
+            # On RSS pages, show all public torrents and nothing more.
+            if logged_in_user and not rss:
+                query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
+                                     (models.Torrent.uploader_id == logged_in_user.id))
+            # Otherwise, show all torrents that aren't hidden
+            else:
+                query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False))
+
+    if main_category:
+        query = query.filter(models.Torrent.main_category_id == main_cat_id)
+    elif sub_category:
+        query = query.filter((models.Torrent.main_category_id == main_cat_id) &
+                             (models.Torrent.sub_category_id == sub_cat_id))
+
+    if filter_tuple:
+        query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
+
+    if term:
+        for item in shlex.split(term, posix=False):
+            if len(item) >= 2:
+                query = query.filter(FullTextSearch(
+                    item, models.TorrentNameSearch, FullTextMode.NATURAL))
+
+    # Sort and order
+    if sort.class_ != models.Torrent:
+        query = query.join(sort.class_)
+
+    query = query.order_by(getattr(sort, order)())
+
+    if rss:
+        query = query.limit(per_page)
+    else:
+        query = query.paginate_faste(page, per_page=per_page, step=5)
+
+    return query
diff --git a/nyaa/templates/rss.xml b/nyaa/templates/rss.xml
index 266e524..c495cda 100644
--- a/nyaa/templates/rss.xml
+++ b/nyaa/templates/rss.xml
@@ -4,20 +4,32 @@
 		<description>RSS Feed for {{ term }}</description>
 		<link>{{ url_for('home', _external=True) }}</link>
 		<atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" />
-		{% for torrent in query %}
+		{% for torrent in torrent_query %}
 		{% if torrent.has_torrent %}
 		<item>
 			<title>{{ torrent.display_name }}</title>
+			{% if use_elastic %}
+			<link>{{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }}</link>
+			<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
+			<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
+			{% else %}
 			<link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link>
 			<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
 			<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
+			{% endif %}			
 		</item>
 		{% else %}
 		<item>
 			<title>{{ torrent.display_name }}</title>
+			{% if use_elastic %}
+			<link>{{ torrent.info_hash }}</link>
+			<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
+			<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
+			{% else %}
 			<link>{{ torrent.magnet_uri }}</link>
 			<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
 			<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
+			{% endif %}
 		</item>
 		{% endif %}
 		{% endfor %}
diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html
index 8d6f9da..e8e08be 100644
--- a/nyaa/templates/search_results.html
+++ b/nyaa/templates/search_results.html
@@ -8,7 +8,7 @@
 	{{ caller() }}
 </th>
 {% endmacro %}
-{% if torrent_query.hits.total > 0 %}
+{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
 <div class="table-responsive">
 	<table class="table table-bordered table-hover table-striped torrent-list">
 		<thead>
@@ -16,7 +16,7 @@
 				{% call render_column_header("hdr-category", "width:80px;", center_text=True) %}
 					<div>Category</div>
 				{% endcall %}
-				{% call render_column_header("hdr-name", "width:auto;", sort_key="name") %}
+				{% call render_column_header("hdr-name", "width:auto;") %}
 					<div>Name</div>
 				{% endcall %}
 				{% call render_column_header("hdr-link", "width:0;", center_text=True) %}
@@ -45,26 +45,46 @@
 			</tr>
 		</thead>
 		<tbody>
-			{% for torrent in torrent_query %}
+			{% set torrents = torrent_query if use_elastic else torrent_query.items %}
+			{% for torrent in torrents %}
 			<tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}">
-				{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) %}
+				{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) if use_elastic else (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %}
 				{% set icon_dir = config.SITE_FLAVOR %}
 				<td style="padding:0 4px;">
+				{% if use_elastic %}
 				<a href="/?c={{ cat_id }}" title="{{ torrent.main_category_id }} - {{ torrent.sub_category_id }}">
+				{% else %}
+				<a href="/?c={{ cat_id }}" title="{{ torrent.main_category.name }} - {{ torrent.sub_category.name }}">
+				{% endif %}
 					<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
 				</a>
 				</td>
+				{% if use_elastic %}
                 <td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}</a></td>
+				{% else %}
+				<td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}">{{ torrent.display_name | escape }}</a></td>
+				{% endif %}
 				<td style="white-space: nowrap;text-align: center;">
 					{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
 					<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>
 				</td>
 				<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
+				{% if use_elastic %}
                 <td class="text-center" {#data-timestamp="{{ torrent.created_time|int }}"#}>{{ torrent.created_time }}</td>
+				{% else %}
+				<td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td>
+				{% endif %}
+				
 				{% if config.ENABLE_SHOW_STATS %}
+				{% if use_elastic %}
 				<td class="text-center" style="color: green;">{{ torrent.seed_count }}</td>
 				<td class="text-center" style="color: red;">{{ torrent.leech_count }}</td>
 				<td class="text-center">{{ torrent.download_count }}</td>
+				{% else %}
+				<td class="text-center" style="color: green;">{{ torrent.stats.seed_count }}</td>
+				<td class="text-center" style="color: red;">{{ torrent.stats.leech_count }}</td>
+				<td class="text-center">{{ torrent.stats.download_count }}</td>
+				{% endif %}
 				{% endif %}
 			</tr>
 			{% endfor %}
@@ -75,9 +95,12 @@
 <h3>No results found</h3>
 {% endif %}
 
-{#
 <center>
+	{% if use_elastic %}
+	{{ pagination.info }}
+	{{ pagination.links }}
+	{% else %}
 	{% from "bootstrap/pagination.html" import render_pagination %}
 	{{ render_pagination(torrent_query) }}
+	{% endif %}
 </center>
-#}
diff --git a/requirements.txt b/requirements.txt
index af89eab..843b935 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -32,7 +32,9 @@ visitor==0.1.3
 webassets==0.12.1
 Werkzeug==0.12.1
 WTForms==2.1
-## The following requirements were added by pip freeze:
+## elasticsearch dependencies
 elasticsearch==5.3.0
 elasticsearch-dsl==5.2.0
-progressbar2==3.20.0
\ No newline at end of file
+progressbar2==3.20.0
+mysql-replication==0.13
+flask-paginate==0.4.5
\ No newline at end of file
diff --git a/sync_es.py b/sync_es.py
index 81ad17f..45c98a5 100644
--- a/sync_es.py
+++ b/sync_es.py
@@ -40,7 +40,12 @@ log.setLevel(logging.INFO)
 #logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
 
 # in prod want in /var/lib somewhere probably
-SAVE_LOC = "/tmp/sync_es_position.json"
+SAVE_LOC = "/var/lib/sync_es_position.json"
+MYSQL_HOST = '127.0.0.1'
+MYSQL_PORT = 3306
+MYSQL_USER = 'test'
+MYSQL_PW = 'test123'
+NT_DB = 'nyaav2'
 
 with open(SAVE_LOC) as f:
     pos = json.load(f)
@@ -50,16 +55,16 @@ es = Elasticsearch()
 stream = BinLogStreamReader(
         # TODO parse out from config.py or something
         connection_settings = {
-            'host': '127.0.0.1',
-            'port': 13306,
-            'user': 'root',
-            'passwd': 'dunnolol'
+            'host': MYSQL_HOST,
+            'port': MYSQL_PORT,
+            'user': MYSQL_USER,
+            'passwd': MYSQL_PW
         },
         server_id=10, # arbitrary
-        # only care about this table currently
-        only_schemas=["nyaav2"],
-        # TODO sukebei
-        only_tables=["nyaa_torrents", "nyaa_statistics"],
+        # only care about this database currently
+        only_schemas=[NT_DB],
+        # these tables in the database
+        only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"],
         # from our save file
         resume_stream=True,
         log_file=pos['log_file'],
@@ -72,7 +77,7 @@ stream = BinLogStreamReader(
         # using aiomysql if anybody wants to revive that.
         blocking=True)
 
-def reindex_torrent(t):
+def reindex_torrent(t, index_name):
     # XXX annoyingly different from import_to_es, and
     # you need to keep them in sync manually.
     f = t['flags']
@@ -103,14 +108,14 @@ def reindex_torrent(t):
     }
     # update, so we don't delete the stats if present
     es.update(
-        index='nyaav2',
+        index=index_name,
         doc_type='torrent',
         id=t['id'],
         body={"doc": doc, "doc_as_upsert": True})
 
-def reindex_stats(s):
+def reindex_stats(s, index_name):
     es.update(
-        index='nyaav2',
+        index=index_name,
         doc_type='torrent',
         id=s['torrent_id'],
         body={
@@ -126,21 +131,29 @@ last_save = time.time()
 
 for event in stream:
     for row in event.rows:
-        if event.table == "nyaa_torrents":
+        if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
+            if event.table == "nyaa_torrents":
+                index_name = "nyaa"
+            else:
+                index_name = "sukebei"
             if type(event) is WriteRowsEvent:
-                reindex_torrent(row['values'])
+                reindex_torrent(row['values'], index_name)
             elif type(event) is UpdateRowsEvent:
-                reindex_torrent(row['after_values'])
+                reindex_torrent(row['after_values'], index_name)
             elif type(event) is DeleteRowsEvent:
                 # just delete it
-                es.delete(index='nyaav2', doc_type='torrent', id=row['values']['id'])
+                es.delete(index=index_name, doc_type='torrent', id=row['values']['id'])
             else:
                 raise Exception(f"unknown event {type(event)}")
-        elif event.table == "nyaa_statistics":
+        elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
+            if event.table == "nyaa_torrents":
+                index_name = "nyaa"
+            else:
+                index_name = "sukebei"
             if type(event) is WriteRowsEvent:
-                reindex_stats(row['values'])
+                reindex_stats(row['values'], index_name)
             elif type(event) is UpdateRowsEvent:
-                reindex_stats(row['after_values'])
+                reindex_stats(row['after_values'], index_name)
             elif type(event) is DeleteRowsEvent:
                 # uh ok. assume that the torrent row will get deleted later.
                 pass

From 200517435863d9411ada755621809bbe0e0db964 Mon Sep 17 00:00:00 2001
From: aldacron <aldacron@users.noreply.github.com>
Date: Tue, 16 May 2017 00:46:25 -0700
Subject: [PATCH 09/10] finished up rss, changed rss behavior to include
 pre-defined trackers only, also cleaned up debug statements

---
 nyaa/routes.py                     |  3 +--
 nyaa/search.py                     |  4 ++--
 nyaa/templates/rss.xml             |  2 +-
 nyaa/templates/search_results.html |  4 ++++
 nyaa/torrents.py                   | 34 +++++++++++++++++++++++++++++-
 5 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/nyaa/routes.py b/nyaa/routes.py
index 5fbcb5c..edc302b 100644
--- a/nyaa/routes.py
+++ b/nyaa/routes.py
@@ -178,7 +178,7 @@ def home(rss):
             query_args['term'] = ''
         else: # Otherwise, use db search for everything
             query_args['term'] = term or ''
-        print(query_args)
+
         query = search_db(**query_args)
         if rss:
             return render_rss('/', query, use_elastic=False)
@@ -287,7 +287,6 @@ def _jinja2_filter_rfc822(datestr, fmt=None):
 
 
 def render_rss(label, query, use_elastic):
-    print(query)
     rss_xml = flask.render_template('rss.xml',
                                     use_elastic=use_elastic,
                                     term=label,
diff --git a/nyaa/search.py b/nyaa/search.py
index 7369fd3..e6353c5 100644
--- a/nyaa/search.py
+++ b/nyaa/search.py
@@ -177,8 +177,8 @@ def search_elastic(term='', user=None, sort='id', order='desc',
         s = s.highlight("display_name")
 
     # Return query, uncomment print line to debug query
-    from pprint import pprint
-    print(json.dumps(s.to_dict()))
+    # from pprint import pprint
+    # print(json.dumps(s.to_dict()))
     return s.execute()
 
 
diff --git a/nyaa/templates/rss.xml b/nyaa/templates/rss.xml
index c495cda..e1787d2 100644
--- a/nyaa/templates/rss.xml
+++ b/nyaa/templates/rss.xml
@@ -22,7 +22,7 @@
 		<item>
 			<title>{{ torrent.display_name }}</title>
 			{% if use_elastic %}
-			<link>{{ torrent.info_hash }}</link>
+			<link>{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}</link>
 			<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
 			<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
 			{% else %}
diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html
index e8e08be..cc0988f 100644
--- a/nyaa/templates/search_results.html
+++ b/nyaa/templates/search_results.html
@@ -66,7 +66,11 @@
 				{% endif %}
 				<td style="white-space: nowrap;text-align: center;">
 					{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
+					{% if use_elastic %}
+					<a href="{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}"><i class="fa fa-fw fa-magnet"></i></a>
+					{% else %}
 					<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>
+					{% endif %}
 				</td>
 				<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
 				{% if use_elastic %}
diff --git a/nyaa/torrents.py b/nyaa/torrents.py
index a8ad6d0..3d35cc7 100644
--- a/nyaa/torrents.py
+++ b/nyaa/torrents.py
@@ -3,6 +3,7 @@ import base64
 import time
 from urllib.parse import urlencode
 from orderedset import OrderedSet
+from nyaa import app
 
 from nyaa import bencode
 from nyaa import app
@@ -53,10 +54,23 @@ def get_trackers(torrent):
 
     return list(trackers)
 
+def get_trackers_magnet():
+    trackers = OrderedSet()
+
+    # Our main one first
+    main_announce_url = app.config.get('MAIN_ANNOUNCE_URL')
+    if main_announce_url:
+        trackers.add(main_announce_url)
+
+    # and finally our tracker list
+    trackers.update(default_trackers())
+
+    return list(trackers)
+
 
 def create_magnet(torrent, max_trackers=5, trackers=None):
     if trackers is None:
-        trackers = get_trackers(torrent)
+        trackers = get_trackers_magnet()
 
     magnet_parts = [
         ('dn', torrent.display_name)
@@ -68,6 +82,24 @@ def create_magnet(torrent, max_trackers=5, trackers=None):
     return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts)
 
 
+# For processing ES links
+@app.context_processor
+def create_magnet_from_info():
+    def _create_magnet_from_info(display_name, info_hash, max_trackers=5, trackers=None):
+        if trackers is None:
+            trackers = get_trackers_magnet()
+
+        magnet_parts = [
+            ('dn', display_name)
+        ]
+        for tracker in trackers[:max_trackers]:
+            magnet_parts.append(('tr', tracker))
+
+        b32_info_hash = base64.b32encode(bytes.fromhex(info_hash)).decode('utf-8')
+        return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts)
+    return dict(create_magnet_from_info=_create_magnet_from_info)
+
+
 def create_default_metadata_base(torrent, trackers=None):
     if trackers is None:
         trackers = get_trackers(torrent)

From 1d0177480eae96aadf4ade0aa28bd42b32f35e63 Mon Sep 17 00:00:00 2001
From: aldacron <aldacron@users.noreply.github.com>
Date: Tue, 16 May 2017 01:04:08 -0700
Subject: [PATCH 10/10] updated time display

---
 nyaa/routes.py                     | 12 ++++++++++++
 nyaa/templates/search_results.html |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/nyaa/routes.py b/nyaa/routes.py
index edc302b..6cee9c4 100644
--- a/nyaa/routes.py
+++ b/nyaa/routes.py
@@ -98,6 +98,18 @@ def _generate_query_string(term, category, filter, user):
     return params
 
 
+@app.template_filter('utc_time')
+def get_utc_timestamp(datetime_str):
+    ''' Returns a UTC POSIX timestamp, as seconds '''
+    UTC_EPOCH = datetime.utcfromtimestamp(0)
+    return int((datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S') - UTC_EPOCH).total_seconds())
+
+
+@app.template_filter('display_time')
+def get_display_time(datetime_str):
+    return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d %H:%M')
+
+
 @app.route('/rss', defaults={'rss': True})
 @app.route('/', defaults={'rss': False})
 def home(rss):
diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html
index cc0988f..4b5663f 100644
--- a/nyaa/templates/search_results.html
+++ b/nyaa/templates/search_results.html
@@ -74,7 +74,7 @@
 				</td>
 				<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
 				{% if use_elastic %}
-                <td class="text-center" {#data-timestamp="{{ torrent.created_time|int }}"#}>{{ torrent.created_time }}</td>
+                <td class="text-center" data-timestamp="{{ torrent.created_time | utc_time }}">{{ torrent.created_time | display_time }}</td>
 				{% else %}
 				<td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td>
 				{% endif %}