1
0
Fork 0
mirror of https://gitlab.com/SIGBUS/nyaa.git synced 2024-12-22 05:29:59 +00:00

hooked up ES... 90% done, need to figure out how to generate magnet URIs

This commit is contained in:
aldacron 2017-05-15 23:51:58 -07:00
parent c2c547e786
commit 899aa01473
11 changed files with 585 additions and 226 deletions

View file

@ -44,5 +44,43 @@
- Start the dev server with `python run.py`
- Deactivate `source deactivate`
# Enabling ElasticSearch
## Basics
- Install jdk `sudo apt-get install openjdk-8-jdk`
- Install elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html
- `sudo systemctl enable elasticsearch.service`
- `sudo systemctl start elasticsearch.service`
- Run `curl -XGET 'localhost:9200'` and make sure ES is running
- Optional: install Kabana as a search frontend for ES
## Enable MySQL Binlogging
- Add the `[mariadb]` bin-log section to my.cnf and reload mysql server
- Connect to mysql
- `SHOW VARIABLES LIKE 'binlog_format';`
- Make sure it shows ROW
- Connect to root user
- `GRANT REPLICATION SLAVE ON *.* TO 'test'@'localhost';` where test is the user you will be running `sync_es.py` with
## Setting up ES
- Run `./create_es.sh` and this creates two indicies: `nyaa` and `sukebei`
- The output should show `akncolwedged: true` twice
- The safest bet is to disable the webapp here to ensure there's no database writes
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `nyaa`
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `sukebei`
- These will take some time to run as it's indexing
## Setting up sync_es.py
- Sync_es.py keeps the ElasticSearch index updated by reading the BinLog
- Configure the MySQL options with the user where you granted the REPLICATION permissions
- Connect to MySQL, run `SHOW MASTER STATUS;`.
- Copy the output to `/var/lib/sync_es_position.json` with the contents `{"log_file": "FILE", "log_pos": POSITION}` and replace FILENAME with File (something like master1-bin.000002) in the SQL output and POSITION (something like 892528513) with Position
- Set up `sync_es.py` as a service and run it, preferably as the system/root
- Make sure `sync_es.py` runs within venv with the right dependencies
## Good to go!
- After that, enable the `USE_ELASTIC_SEARCH` flag and restart the webapp and you're good to go
## Code Quality:
- Remember to follow PEP8 style guidelines and run `./lint.sh` before committing.

View file

@ -33,8 +33,6 @@ MAIL_FROM_ADDRESS = '***'
SMTP_USERNAME = '***'
SMTP_PASSWORD = '***'
RESULTS_PER_PAGE = 75
# What the site identifies itself as.
SITE_NAME = 'Nyaa'
@ -49,3 +47,14 @@ ENFORCE_MAIN_ANNOUNCE_URL = False
MAIN_ANNOUNCE_URL = ''
BACKUP_TORRENT_FOLDER = 'torrents'
#
# Search Options
#
# Max ES search results, do not set over 10000
RESULTS_PER_PAGE = 75
USE_ELASTIC_SEARCH = False
ENABLE_ELASTIC_SEARCH_HIGHLIGHT = False
ES_MAX_SEARCH_RESULT = 1000
ES_INDEX_NAME = SITE_FLAVOR # we create indicies named nyaa or sukebei

View file

@ -1,3 +1,5 @@
#!/usr/bin/env bash
curl -v -XPUT 'localhost:9200/nyaav2?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
# create indicies named "nyaa" and "sukebei", these are hardcoded
curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
curl -v -XPUT 'localhost:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml

View file

@ -5,6 +5,7 @@ which is assumed to already exist.
This is a one-shot deal, so you'd either need to complement it
with a cron job or some binlog-reading thing (TODO)
"""
from nyaa import app
from nyaa.models import Torrent
from elasticsearch import Elasticsearch
from elasticsearch import helpers
@ -33,7 +34,7 @@ def mk_es(t):
return {
"_id": t.id,
"_type": "torrent",
"_index": "nyaav2",
"_index": app.config['ES_INDEX_NAME'],
"_source": {
# we're also indexing the id as a number so you can
# order by it. seems like this is just equivalent to

6
my.cnf
View file

@ -4,3 +4,9 @@ ft_min_word_len=2
innodb_ft_cache_size = 80000000
innodb_ft_total_cache_size = 1600000000
max_allowed_packet = 100M
[mariadb]
log-bin
server_id=1
log-basename=master1
binlog-format = row

View file

@ -6,18 +6,16 @@ from nyaa import bencode, utils
from nyaa import torrents
from nyaa import backend
from nyaa import api_handler
from nyaa.search import search_elastic, search_db
import config
import json
import re
from datetime import datetime, timedelta
import ipaddress
import os.path
import base64
from urllib.parse import quote
import sqlalchemy_fulltext.modes as FullTextMode
from sqlalchemy_fulltext import FullTextSearch
import shlex
import math
from werkzeug import url_encode
from itsdangerous import URLSafeSerializer, BadSignature
@ -27,12 +25,14 @@ from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.utils import formatdate
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q
from flask_paginate import Pagination
es_client = Elasticsearch()
DEBUG_API = False
DEFAULT_MAX_SEARCH_RESULT = 1000
DEFAULT_PER_PAGE = 75
SERACH_PAGINATE_DISPLAY_MSG = '''Displaying results {start}-{end} out of {total} results.<br>
Please refine your search results if you can't find what you were looking for.'''
def redirect_url():
@ -53,168 +53,13 @@ def modify_query(**new_values):
return '{}?{}'.format(flask.request.path, url_encode(args))
@app.template_global()
def filter_truthy(input_list):
''' Jinja2 can't into list comprehension so this is for
the search_results.html template '''
return [item for item in input_list if item]
def search(term='', user=None, sort='id', order='desc', category='0_0', quality_filter='0', page=1, rss=False, admin=False):
sort_keys = {
'id': models.Torrent.id,
'size': models.Torrent.filesize,
'name': models.Torrent.display_name,
'seeders': models.Statistic.seed_count,
'leechers': models.Statistic.leech_count,
'downloads': models.Statistic.download_count
}
sort_ = sort.lower()
if sort_ not in sort_keys:
flask.abort(400)
# XXX gross why are all the names subtly different
es_sort = ({
'id': 'id',
'size': 'filesize',
'name': 'display_name',
'seeders': 'seed_count',
'leechers': 'leech_count',
'downloads': 'download_count'
})[sort]
sort = sort_keys[sort]
order_keys = {
'desc': 'desc',
'asc': 'asc'
}
order_ = order.lower()
if order_ not in order_keys:
flask.abort(400)
# funky, es sort is default asc, prefixed by '-' if desc
if "desc" == order:
es_sort = "-" + es_sort
filter_keys = {
'0': None,
'1': (models.TorrentFlags.REMAKE, False),
'2': (models.TorrentFlags.TRUSTED, True),
'3': (models.TorrentFlags.COMPLETE, True)
}
sentinel = object()
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
if filter_tuple is sentinel:
flask.abort(400)
if user:
user = models.User.by_id(user)
if not user:
flask.abort(404)
user = user.id
main_category = None
sub_category = None
main_cat_id = 0
sub_cat_id = 0
if category:
cat_match = re.match(r'^(\d+)_(\d+)$', category)
if not cat_match:
flask.abort(400)
main_cat_id = int(cat_match.group(1))
sub_cat_id = int(cat_match.group(2))
if main_cat_id > 0:
if sub_cat_id > 0:
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
else:
main_category = models.MainCategory.by_id(main_cat_id)
if not category:
flask.abort(400)
# Force sort by id desc if rss
if rss:
sort = sort_keys['id']
order = 'desc'
same_user = False
if flask.g.user:
same_user = flask.g.user.id == user
s = Search(using=es_client, index='nyaav2')
if term:
query = db.session.query(models.TorrentNameSearch)
s = s.query("simple_query_string", analyzer="my_search_analyzer", default_operator="AND", query=term)
else:
query = models.Torrent.query
# Filter by user
if user:
s = s.filter("term", uploader_id=user)
query = query.filter(models.Torrent.uploader_id == user)
# If admin, show everything
if not admin:
# If user is not logged in or the accessed feed doesn't belong to user,
# hide anonymous torrents belonging to the queried user
if not same_user:
# TODO adapt to es syntax
query = query.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.ANONYMOUS | models.TorrentFlags.DELETED)).is_(False))
if main_category:
s = s.filter("term", main_category_id=main_cat_id)
query = query.filter(models.Torrent.main_category_id == main_cat_id)
elif sub_category:
s = s.filter("term", main_category_id=main_cat_id)
s = s.filter("term", sub_category_id=sub_cat_id)
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
(models.Torrent.sub_category_id == sub_cat_id))
# TODO i dunno what this means in es
if filter_tuple:
query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
# If admin, show everything
if not admin:
query = query.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.HIDDEN | models.TorrentFlags.DELETED)).is_(False))
if term:
# note already handled in es
for item in shlex.split(term, posix=False):
if len(item) >= 2:
query = query.filter(FullTextSearch(
item, models.TorrentNameSearch, FullTextMode.NATURAL))
# Sort and order
if sort.class_ != models.Torrent:
query = query.join(sort.class_)
s = s.sort(es_sort)
query = query.order_by(getattr(sort, order)())
per = app.config['RESULTS_PER_PAGE']
if rss:
pass
#query = query.limit(app.config['RESULTS_PER_PAGE'])
else:
# page is 1-based?
s = s[(page-1)*per:page*per]
#query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5)
s = s.highlight_options(tags_schema='styled')
s = s.highlight("display_name")
#return query
from pprint import pprint
print(json.dumps(s.to_dict()))
return s.execute()
@app.errorhandler(404)
def not_found(error):
@ -232,7 +77,6 @@ def before_request():
flask.g.user = user
if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now():
print("hio")
flask.session['timeout'] = datetime.now() + timedelta(days=7)
flask.session.permanent = True
flask.session.modified = True
@ -270,6 +114,10 @@ def home(rss):
if page:
page = int(page)
per_page = app.config.get('RESULTS_PER_PAGE')
if not per_page:
per_page = DEFAULT_PER_PAGE
user_id = None
if user_name:
user = models.User.by_username(user_name)
@ -278,27 +126,69 @@ def home(rss):
user_id = user.id
query_args = {
'term': term or '',
'user': user_id,
'sort': sort or 'id',
'order': order or 'desc',
'category': category or '0_0',
'quality_filter': quality_filter or '0',
'page': page or 1,
'rss': rss
'rss': rss,
'per_page': per_page
}
# God mode
if flask.g.user and flask.g.user.is_admin:
if flask.g.user:
query_args['logged_in_user'] = flask.g.user
if flask.g.user.is_admin: # God mode
query_args['admin'] = True
query = search(**query_args)
# If searching, we get results from elastic search
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
if use_elastic and term:
query_args['term'] = term
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
if not max_search_results:
max_search_results = DEFAULT_MAX_SEARCH_RESULT
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages
query_args['page'] = max_page
query_args['max_search_results'] = max_search_results
query_results = search_elastic(**query_args)
if rss:
return render_rss('/', query)
return render_rss('/', query_results, use_elastic=True)
else:
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
max_results = min(max_search_results, query_results['hits']['total'])
# change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=per_page,
total=max_results, bs_version=3, page_parameter='p',
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
return flask.render_template('home.html',
use_elastic=True,
pagination=pagination,
torrent_query=query_results,
search=query_args,
rss_filter=rss_query_string)
else:
# If ES is enabled, default to db search for browsing
if use_elastic:
query_args['term'] = ''
else: # Otherwise, use db search for everything
query_args['term'] = term or ''
print(query_args)
query = search_db(**query_args)
if rss:
return render_rss('/', query, use_elastic=False)
else:
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
# Use elastic is always false here because we only hit this section
# if we're browsing without a search term (which means we default to DB)
# or if ES is disabled
return flask.render_template('home.html',
use_elastic=False,
torrent_query=query,
search=query_args,
rss_filter=rss_query_string)
@ -320,6 +210,10 @@ def view_user(user_name):
if page:
page = int(page)
per_page = app.config.get('RESULTS_PER_PAGE')
if not per_page:
per_page = DEFAULT_PER_PAGE
query_args = {
'term': term or '',
'user': user.id,
@ -328,17 +222,54 @@ def view_user(user_name):
'category': category or '0_0',
'quality_filter': quality_filter or '0',
'page': page or 1,
'rss': False
'rss': False,
'per_page': per_page
}
# God mode
if flask.g.user and flask.g.user.is_admin:
if flask.g.user:
query_args['logged_in_user'] = flask.g.user
if flask.g.user.is_admin: # God mode
query_args['admin'] = True
query = search(**query_args)
# Use elastic search for term searching
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
if use_elastic and term:
query_args['term'] = term
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
if not max_search_results:
max_search_results = DEFAULT_MAX_SEARCH_RESULT
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages
query_args['page'] = max_page
query_args['max_search_results'] = max_search_results
query_results = search_elastic(**query_args)
max_results = min(max_search_results, query_results['hits']['total'])
# change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=per_page,
total=max_results, bs_version=3, page_parameter='p',
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
return flask.render_template('user.html',
use_elastic=True,
pagination=pagination,
torrent_query=query_results,
search=query_args,
user=user,
user_page=True,
rss_filter=rss_query_string)
# Similar logic as home page
else:
if use_elastic:
query_args['term'] = ''
else:
query_args['term'] = term or ''
query = search_db(**query_args)
return flask.render_template('user.html',
use_elastic=False,
torrent_query=query,
search=query_args,
user=user,
@ -350,12 +281,18 @@ def view_user(user_name):
def _jinja2_filter_rfc822(date, fmt=None):
return formatdate(float(date.strftime('%s')))
@app.template_filter('rfc822_es')
def _jinja2_filter_rfc822(datestr, fmt=None):
return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s')))
def render_rss(label, query):
def render_rss(label, query, use_elastic):
print(query)
rss_xml = flask.render_template('rss.xml',
use_elastic=use_elastic,
term=label,
site_url=flask.request.url_root,
query=query)
torrent_query=query)
response = flask.make_response(rss_xml)
response.headers['Content-Type'] = 'application/xml'
return response
@ -485,7 +422,6 @@ def activate_user(payload):
user.status = models.UserStatusType.ACTIVE
db.session.add(user)
db.session.commit()

317
nyaa/search.py Normal file
View file

@ -0,0 +1,317 @@
import flask
import re
import math
import json
import shlex
from nyaa import app, db
from nyaa import models
import sqlalchemy_fulltext.modes as FullTextMode
from sqlalchemy_fulltext import FullTextSearch
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q
def search_elastic(term='', user=None, sort='id', order='desc',
category='0_0', quality_filter='0', page=1,
rss=False, admin=False, logged_in_user=None,
per_page=75, max_search_results=1000):
# This function can easily be memcached now
es_client = Elasticsearch()
es_sort_keys = {
'id': 'id',
'size': 'filesize',
# 'name': 'display_name', # This is slow and buggy
'seeders': 'seed_count',
'leechers': 'leech_count',
'downloads': 'download_count'
}
sort_ = sort.lower()
if sort_ not in es_sort_keys:
flask.abort(400)
es_sort = es_sort_keys[sort]
order_keys = {
'desc': 'desc',
'asc': 'asc'
}
order_ = order.lower()
if order_ not in order_keys:
flask.abort(400)
# Only allow ID, desc if RSS
if rss:
sort = es_sort_keys['id']
order = 'desc'
# funky, es sort is default asc, prefixed by '-' if desc
if 'desc' == order:
es_sort = '-' + es_sort
# Quality filter
quality_keys = [
'0', # Show all
'1', # No remakes
'2', # Only trusted
'3' # Only completed
]
if quality_filter.lower() not in quality_keys:
flask.abort(400)
quality_filter = int(quality_filter)
# Category filter
main_category = None
sub_category = None
main_cat_id = 0
sub_cat_id = 0
if category:
cat_match = re.match(r'^(\d+)_(\d+)$', category)
if not cat_match:
flask.abort(400)
main_cat_id = int(cat_match.group(1))
sub_cat_id = int(cat_match.group(2))
if main_cat_id > 0:
if sub_cat_id > 0:
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
if not sub_category:
flask.abort(400)
else:
main_category = models.MainCategory.by_id(main_cat_id)
if not main_category:
flask.abort(400)
# This might be useless since we validate users
# before coming into this method, but just to be safe...
if user:
user = models.User.by_id(user)
if not user:
flask.abort(404)
user = user.id
same_user = False
if logged_in_user:
same_user = user == logged_in_user.id
s = Search(using=es_client, index=app.config.get('ES_INDEX_NAME')) # todo, sukebei prefix
# Apply search term
if term:
s = s.query('simple_query_string',
analyzer='my_search_analyzer',
default_operator="AND",
query=term)
# User view (/user/username)
if user:
s = s.filter('term', uploader_id=user)
if not admin:
# Hide all DELETED torrents if regular user
s = s.filter('term', deleted=False)
# If logged in user is not the same as the user being viewed,
# show only torrents that aren't hidden or anonymous.
#
# If logged in user is the same as the user being viewed,
# show all torrents including hidden and anonymous ones.
#
# On RSS pages in user view, show only torrents that
# aren't hidden or anonymous no matter what
if not same_user or rss:
s = s.filter('term', hidden=False)
s = s.filter('term', anonymous=False)
# General view (homepage, general search view)
else:
if not admin:
# Hide all DELETED torrents if regular user
s = s.filter('term', deleted=False)
# If logged in, show all torrents that aren't hidden unless they belong to you
# On RSS pages, show all public torrents and nothing more.
if logged_in_user and not rss:
hiddenFilter = Q('term', hidden=False)
userFilter = Q('term', uploader_id=logged_in_user.id)
combinedFilter = hiddenFilter | userFilter
s = s.filter('bool', filter=[combinedFilter])
else:
s = s.filter('term', hidden=False)
if main_category:
s = s.filter('term', main_category_id=main_cat_id)
elif sub_category:
s = s.filter('term', main_category_id=main_cat_id)
s = s.filter('term', sub_category_id=sub_cat_id)
if quality_filter == 0:
pass
elif quality_filter == 1:
s = s.filter('term', remake=False)
elif quality_filter == 2:
s = s.filter('term', trusted=True)
elif quality_filter == 3:
s = s.filter('term', complete=True)
# Apply sort
s = s.sort(es_sort)
# Only show first RESULTS_PER_PAGE items for RSS
if rss:
s = s[0:per_page]
else:
max_page = min(page, int(math.ceil(max_search_results / float(per_page))))
from_idx = (max_page-1)*per_page
to_idx = min(max_search_results, max_page*per_page)
s = s[from_idx:to_idx]
highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT')
if highlight:
s = s.highlight_options(tags_schema='styled')
s = s.highlight("display_name")
# Return query, uncomment print line to debug query
from pprint import pprint
print(json.dumps(s.to_dict()))
return s.execute()
def search_db(term='', user=None, sort='id', order='desc', category='0_0',
quality_filter='0', page=1, rss=False, admin=False,
logged_in_user=None, per_page=75):
sort_keys = {
'id': models.Torrent.id,
'size': models.Torrent.filesize,
# 'name': models.Torrent.display_name, # Disable this because we disabled this in search_elastic, for the sake of consistency
'seeders': models.Statistic.seed_count,
'leechers': models.Statistic.leech_count,
'downloads': models.Statistic.download_count
}
sort_ = sort.lower()
if sort_ not in sort_keys:
flask.abort(400)
sort = sort_keys[sort]
order_keys = {
'desc': 'desc',
'asc': 'asc'
}
order_ = order.lower()
if order_ not in order_keys:
flask.abort(400)
filter_keys = {
'0': None,
'1': (models.TorrentFlags.REMAKE, False),
'2': (models.TorrentFlags.TRUSTED, True),
'3': (models.TorrentFlags.COMPLETE, True)
}
sentinel = object()
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
if filter_tuple is sentinel:
flask.abort(400)
if user:
user = models.User.by_id(user)
if not user:
flask.abort(404)
user = user.id
main_category = None
sub_category = None
main_cat_id = 0
sub_cat_id = 0
if category:
cat_match = re.match(r'^(\d+)_(\d+)$', category)
if not cat_match:
flask.abort(400)
main_cat_id = int(cat_match.group(1))
sub_cat_id = int(cat_match.group(2))
if main_cat_id > 0:
if sub_cat_id > 0:
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
else:
main_category = models.MainCategory.by_id(main_cat_id)
if not category:
flask.abort(400)
# Force sort by id desc if rss
if rss:
sort = sort_keys['id']
order = 'desc'
same_user = False
if logged_in_user:
same_user = logged_in_user.id == user
if term:
query = db.session.query(models.TorrentNameSearch)
else:
query = models.Torrent.query
# User view (/user/username)
if user:
query = query.filter(models.Torrent.uploader_id == user)
if not admin:
# Hide all DELETED torrents if regular user
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
# If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous
# If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones
# On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what
if not same_user or rss:
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN |
models.TorrentFlags.ANONYMOUS)).is_(False))
# General view (homepage, general search view)
else:
if not admin:
# Hide all DELETED torrents if regular user
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
# If logged in, show all torrents that aren't hidden unless they belong to you
# On RSS pages, show all public torrents and nothing more.
if logged_in_user and not rss:
query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
(models.Torrent.uploader_id == logged_in_user.id))
# Otherwise, show all torrents that aren't hidden
else:
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False))
if main_category:
query = query.filter(models.Torrent.main_category_id == main_cat_id)
elif sub_category:
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
(models.Torrent.sub_category_id == sub_cat_id))
if filter_tuple:
query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
if term:
for item in shlex.split(term, posix=False):
if len(item) >= 2:
query = query.filter(FullTextSearch(
item, models.TorrentNameSearch, FullTextMode.NATURAL))
# Sort and order
if sort.class_ != models.Torrent:
query = query.join(sort.class_)
query = query.order_by(getattr(sort, order)())
if rss:
query = query.limit(per_page)
else:
query = query.paginate_faste(page, per_page=per_page, step=5)
return query

View file

@ -4,20 +4,32 @@
<description>RSS Feed for {{ term }}</description>
<link>{{ url_for('home', _external=True) }}</link>
<atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" />
{% for torrent in query %}
{% for torrent in torrent_query %}
{% if torrent.has_torrent %}
<item>
<title>{{ torrent.display_name }}</title>
{% if use_elastic %}
<link>{{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
{% else %}
<link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
{% endif %}
</item>
{% else %}
<item>
<title>{{ torrent.display_name }}</title>
{% if use_elastic %}
<link>{{ torrent.info_hash }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
{% else %}
<link>{{ torrent.magnet_uri }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
{% endif %}
</item>
{% endif %}
{% endfor %}

View file

@ -8,7 +8,7 @@
{{ caller() }}
</th>
{% endmacro %}
{% if torrent_query.hits.total > 0 %}
{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
<div class="table-responsive">
<table class="table table-bordered table-hover table-striped torrent-list">
<thead>
@ -16,7 +16,7 @@
{% call render_column_header("hdr-category", "width:80px;", center_text=True) %}
<div>Category</div>
{% endcall %}
{% call render_column_header("hdr-name", "width:auto;", sort_key="name") %}
{% call render_column_header("hdr-name", "width:auto;") %}
<div>Name</div>
{% endcall %}
{% call render_column_header("hdr-link", "width:0;", center_text=True) %}
@ -45,26 +45,46 @@
</tr>
</thead>
<tbody>
{% for torrent in torrent_query %}
{% set torrents = torrent_query if use_elastic else torrent_query.items %}
{% for torrent in torrents %}
<tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}">
{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) %}
{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) if use_elastic else (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %}
{% set icon_dir = config.SITE_FLAVOR %}
<td style="padding:0 4px;">
{% if use_elastic %}
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category_id }} - {{ torrent.sub_category_id }}">
{% else %}
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category.name }} - {{ torrent.sub_category.name }}">
{% endif %}
<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
</a>
</td>
{% if use_elastic %}
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}</a></td>
{% else %}
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}">{{ torrent.display_name | escape }}</a></td>
{% endif %}
<td style="white-space: nowrap;text-align: center;">
{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>
</td>
<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
{% if use_elastic %}
<td class="text-center" {#data-timestamp="{{ torrent.created_time|int }}"#}>{{ torrent.created_time }}</td>
{% else %}
<td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td>
{% endif %}
{% if config.ENABLE_SHOW_STATS %}
{% if use_elastic %}
<td class="text-center" style="color: green;">{{ torrent.seed_count }}</td>
<td class="text-center" style="color: red;">{{ torrent.leech_count }}</td>
<td class="text-center">{{ torrent.download_count }}</td>
{% else %}
<td class="text-center" style="color: green;">{{ torrent.stats.seed_count }}</td>
<td class="text-center" style="color: red;">{{ torrent.stats.leech_count }}</td>
<td class="text-center">{{ torrent.stats.download_count }}</td>
{% endif %}
{% endif %}
</tr>
{% endfor %}
@ -75,9 +95,12 @@
<h3>No results found</h3>
{% endif %}
{#
<center>
{% if use_elastic %}
{{ pagination.info }}
{{ pagination.links }}
{% else %}
{% from "bootstrap/pagination.html" import render_pagination %}
{{ render_pagination(torrent_query) }}
{% endif %}
</center>
#}

View file

@ -32,7 +32,9 @@ visitor==0.1.3
webassets==0.12.1
Werkzeug==0.12.1
WTForms==2.1
## The following requirements were added by pip freeze:
## elasticsearch dependencies
elasticsearch==5.3.0
elasticsearch-dsl==5.2.0
progressbar2==3.20.0
mysql-replication==0.13
flask-paginate==0.4.5

View file

@ -40,7 +40,12 @@ log.setLevel(logging.INFO)
#logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
# in prod want in /var/lib somewhere probably
SAVE_LOC = "/tmp/sync_es_position.json"
SAVE_LOC = "/var/lib/sync_es_position.json"
MYSQL_HOST = '127.0.0.1'
MYSQL_PORT = 3306
MYSQL_USER = 'test'
MYSQL_PW = 'test123'
NT_DB = 'nyaav2'
with open(SAVE_LOC) as f:
pos = json.load(f)
@ -50,16 +55,16 @@ es = Elasticsearch()
stream = BinLogStreamReader(
# TODO parse out from config.py or something
connection_settings = {
'host': '127.0.0.1',
'port': 13306,
'user': 'root',
'passwd': 'dunnolol'
'host': MYSQL_HOST,
'port': MYSQL_PORT,
'user': MYSQL_USER,
'passwd': MYSQL_PW
},
server_id=10, # arbitrary
# only care about this table currently
only_schemas=["nyaav2"],
# TODO sukebei
only_tables=["nyaa_torrents", "nyaa_statistics"],
# only care about this database currently
only_schemas=[NT_DB],
# these tables in the database
only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"],
# from our save file
resume_stream=True,
log_file=pos['log_file'],
@ -72,7 +77,7 @@ stream = BinLogStreamReader(
# using aiomysql if anybody wants to revive that.
blocking=True)
def reindex_torrent(t):
def reindex_torrent(t, index_name):
# XXX annoyingly different from import_to_es, and
# you need to keep them in sync manually.
f = t['flags']
@ -103,14 +108,14 @@ def reindex_torrent(t):
}
# update, so we don't delete the stats if present
es.update(
index='nyaav2',
index=index_name,
doc_type='torrent',
id=t['id'],
body={"doc": doc, "doc_as_upsert": True})
def reindex_stats(s):
def reindex_stats(s, index_name):
es.update(
index='nyaav2',
index=index_name,
doc_type='torrent',
id=s['torrent_id'],
body={
@ -126,21 +131,29 @@ last_save = time.time()
for event in stream:
for row in event.rows:
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
if event.table == "nyaa_torrents":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
reindex_torrent(row['values'])
reindex_torrent(row['values'], index_name)
elif type(event) is UpdateRowsEvent:
reindex_torrent(row['after_values'])
reindex_torrent(row['after_values'], index_name)
elif type(event) is DeleteRowsEvent:
# just delete it
es.delete(index='nyaav2', doc_type='torrent', id=row['values']['id'])
es.delete(index=index_name, doc_type='torrent', id=row['values']['id'])
else:
raise Exception(f"unknown event {type(event)}")
elif event.table == "nyaa_statistics":
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
if event.table == "nyaa_torrents":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
reindex_stats(row['values'])
reindex_stats(row['values'], index_name)
elif type(event) is UpdateRowsEvent:
reindex_stats(row['after_values'])
reindex_stats(row['after_values'], index_name)
elif type(event) is DeleteRowsEvent:
# uh ok. assume that the torrent row will get deleted later.
pass