1
0
Fork 0
mirror of https://gitlab.com/SIGBUS/nyaa.git synced 2024-12-22 10:19:59 +00:00

hooked up ES... 90% done, need to figure out how to generate magnet URIs

This commit is contained in:
aldacron 2017-05-15 23:51:58 -07:00
parent c2c547e786
commit 899aa01473
11 changed files with 585 additions and 226 deletions

View file

@ -44,5 +44,43 @@
- Start the dev server with `python run.py` - Start the dev server with `python run.py`
- Deactivate `source deactivate` - Deactivate `source deactivate`
# Enabling ElasticSearch
## Basics
- Install jdk `sudo apt-get install openjdk-8-jdk`
- Install elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html
- `sudo systemctl enable elasticsearch.service`
- `sudo systemctl start elasticsearch.service`
- Run `curl -XGET 'localhost:9200'` and make sure ES is running
- Optional: install Kabana as a search frontend for ES
## Enable MySQL Binlogging
- Add the `[mariadb]` bin-log section to my.cnf and reload mysql server
- Connect to mysql
- `SHOW VARIABLES LIKE 'binlog_format';`
- Make sure it shows ROW
- Connect to root user
- `GRANT REPLICATION SLAVE ON *.* TO 'test'@'localhost';` where test is the user you will be running `sync_es.py` with
## Setting up ES
- Run `./create_es.sh` and this creates two indicies: `nyaa` and `sukebei`
- The output should show `akncolwedged: true` twice
- The safest bet is to disable the webapp here to ensure there's no database writes
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `nyaa`
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `sukebei`
- These will take some time to run as it's indexing
## Setting up sync_es.py
- Sync_es.py keeps the ElasticSearch index updated by reading the BinLog
- Configure the MySQL options with the user where you granted the REPLICATION permissions
- Connect to MySQL, run `SHOW MASTER STATUS;`.
- Copy the output to `/var/lib/sync_es_position.json` with the contents `{"log_file": "FILE", "log_pos": POSITION}` and replace FILENAME with File (something like master1-bin.000002) in the SQL output and POSITION (something like 892528513) with Position
- Set up `sync_es.py` as a service and run it, preferably as the system/root
- Make sure `sync_es.py` runs within venv with the right dependencies
## Good to go!
- After that, enable the `USE_ELASTIC_SEARCH` flag and restart the webapp and you're good to go
## Code Quality: ## Code Quality:
- Remember to follow PEP8 style guidelines and run `./lint.sh` before committing. - Remember to follow PEP8 style guidelines and run `./lint.sh` before committing.

View file

@ -33,8 +33,6 @@ MAIL_FROM_ADDRESS = '***'
SMTP_USERNAME = '***' SMTP_USERNAME = '***'
SMTP_PASSWORD = '***' SMTP_PASSWORD = '***'
RESULTS_PER_PAGE = 75
# What the site identifies itself as. # What the site identifies itself as.
SITE_NAME = 'Nyaa' SITE_NAME = 'Nyaa'
@ -49,3 +47,14 @@ ENFORCE_MAIN_ANNOUNCE_URL = False
MAIN_ANNOUNCE_URL = '' MAIN_ANNOUNCE_URL = ''
BACKUP_TORRENT_FOLDER = 'torrents' BACKUP_TORRENT_FOLDER = 'torrents'
#
# Search Options
#
# Max ES search results, do not set over 10000
RESULTS_PER_PAGE = 75
USE_ELASTIC_SEARCH = False
ENABLE_ELASTIC_SEARCH_HIGHLIGHT = False
ES_MAX_SEARCH_RESULT = 1000
ES_INDEX_NAME = SITE_FLAVOR # we create indicies named nyaa or sukebei

View file

@ -1,3 +1,5 @@
#!/usr/bin/env bash #!/usr/bin/env bash
curl -v -XPUT 'localhost:9200/nyaav2?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml # create indicies named "nyaa" and "sukebei", these are hardcoded
curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
curl -v -XPUT 'localhost:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml

View file

@ -5,6 +5,7 @@ which is assumed to already exist.
This is a one-shot deal, so you'd either need to complement it This is a one-shot deal, so you'd either need to complement it
with a cron job or some binlog-reading thing (TODO) with a cron job or some binlog-reading thing (TODO)
""" """
from nyaa import app
from nyaa.models import Torrent from nyaa.models import Torrent
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
from elasticsearch import helpers from elasticsearch import helpers
@ -33,7 +34,7 @@ def mk_es(t):
return { return {
"_id": t.id, "_id": t.id,
"_type": "torrent", "_type": "torrent",
"_index": "nyaav2", "_index": app.config['ES_INDEX_NAME'],
"_source": { "_source": {
# we're also indexing the id as a number so you can # we're also indexing the id as a number so you can
# order by it. seems like this is just equivalent to # order by it. seems like this is just equivalent to

6
my.cnf
View file

@ -4,3 +4,9 @@ ft_min_word_len=2
innodb_ft_cache_size = 80000000 innodb_ft_cache_size = 80000000
innodb_ft_total_cache_size = 1600000000 innodb_ft_total_cache_size = 1600000000
max_allowed_packet = 100M max_allowed_packet = 100M
[mariadb]
log-bin
server_id=1
log-basename=master1
binlog-format = row

View file

@ -6,18 +6,16 @@ from nyaa import bencode, utils
from nyaa import torrents from nyaa import torrents
from nyaa import backend from nyaa import backend
from nyaa import api_handler from nyaa import api_handler
from nyaa.search import search_elastic, search_db
import config import config
import json import json
import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
import ipaddress import ipaddress
import os.path import os.path
import base64 import base64
from urllib.parse import quote from urllib.parse import quote
import sqlalchemy_fulltext.modes as FullTextMode import math
from sqlalchemy_fulltext import FullTextSearch
import shlex
from werkzeug import url_encode from werkzeug import url_encode
from itsdangerous import URLSafeSerializer, BadSignature from itsdangerous import URLSafeSerializer, BadSignature
@ -27,12 +25,14 @@ from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
from email.utils import formatdate from email.utils import formatdate
from elasticsearch import Elasticsearch from flask_paginate import Pagination
from elasticsearch_dsl import Search, Q
es_client = Elasticsearch()
DEBUG_API = False DEBUG_API = False
DEFAULT_MAX_SEARCH_RESULT = 1000
DEFAULT_PER_PAGE = 75
SERACH_PAGINATE_DISPLAY_MSG = '''Displaying results {start}-{end} out of {total} results.<br>
Please refine your search results if you can't find what you were looking for.'''
def redirect_url(): def redirect_url():
@ -53,168 +53,13 @@ def modify_query(**new_values):
return '{}?{}'.format(flask.request.path, url_encode(args)) return '{}?{}'.format(flask.request.path, url_encode(args))
@app.template_global() @app.template_global()
def filter_truthy(input_list): def filter_truthy(input_list):
''' Jinja2 can't into list comprehension so this is for ''' Jinja2 can't into list comprehension so this is for
the search_results.html template ''' the search_results.html template '''
return [item for item in input_list if item] return [item for item in input_list if item]
def search(term='', user=None, sort='id', order='desc', category='0_0', quality_filter='0', page=1, rss=False, admin=False):
sort_keys = {
'id': models.Torrent.id,
'size': models.Torrent.filesize,
'name': models.Torrent.display_name,
'seeders': models.Statistic.seed_count,
'leechers': models.Statistic.leech_count,
'downloads': models.Statistic.download_count
}
sort_ = sort.lower()
if sort_ not in sort_keys:
flask.abort(400)
# XXX gross why are all the names subtly different
es_sort = ({
'id': 'id',
'size': 'filesize',
'name': 'display_name',
'seeders': 'seed_count',
'leechers': 'leech_count',
'downloads': 'download_count'
})[sort]
sort = sort_keys[sort]
order_keys = {
'desc': 'desc',
'asc': 'asc'
}
order_ = order.lower()
if order_ not in order_keys:
flask.abort(400)
# funky, es sort is default asc, prefixed by '-' if desc
if "desc" == order:
es_sort = "-" + es_sort
filter_keys = {
'0': None,
'1': (models.TorrentFlags.REMAKE, False),
'2': (models.TorrentFlags.TRUSTED, True),
'3': (models.TorrentFlags.COMPLETE, True)
}
sentinel = object()
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
if filter_tuple is sentinel:
flask.abort(400)
if user:
user = models.User.by_id(user)
if not user:
flask.abort(404)
user = user.id
main_category = None
sub_category = None
main_cat_id = 0
sub_cat_id = 0
if category:
cat_match = re.match(r'^(\d+)_(\d+)$', category)
if not cat_match:
flask.abort(400)
main_cat_id = int(cat_match.group(1))
sub_cat_id = int(cat_match.group(2))
if main_cat_id > 0:
if sub_cat_id > 0:
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
else:
main_category = models.MainCategory.by_id(main_cat_id)
if not category:
flask.abort(400)
# Force sort by id desc if rss
if rss:
sort = sort_keys['id']
order = 'desc'
same_user = False
if flask.g.user:
same_user = flask.g.user.id == user
s = Search(using=es_client, index='nyaav2')
if term:
query = db.session.query(models.TorrentNameSearch)
s = s.query("simple_query_string", analyzer="my_search_analyzer", default_operator="AND", query=term)
else:
query = models.Torrent.query
# Filter by user
if user:
s = s.filter("term", uploader_id=user)
query = query.filter(models.Torrent.uploader_id == user)
# If admin, show everything
if not admin:
# If user is not logged in or the accessed feed doesn't belong to user,
# hide anonymous torrents belonging to the queried user
if not same_user:
# TODO adapt to es syntax
query = query.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.ANONYMOUS | models.TorrentFlags.DELETED)).is_(False))
if main_category:
s = s.filter("term", main_category_id=main_cat_id)
query = query.filter(models.Torrent.main_category_id == main_cat_id)
elif sub_category:
s = s.filter("term", main_category_id=main_cat_id)
s = s.filter("term", sub_category_id=sub_cat_id)
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
(models.Torrent.sub_category_id == sub_cat_id))
# TODO i dunno what this means in es
if filter_tuple:
query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
# If admin, show everything
if not admin:
query = query.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.HIDDEN | models.TorrentFlags.DELETED)).is_(False))
if term:
# note already handled in es
for item in shlex.split(term, posix=False):
if len(item) >= 2:
query = query.filter(FullTextSearch(
item, models.TorrentNameSearch, FullTextMode.NATURAL))
# Sort and order
if sort.class_ != models.Torrent:
query = query.join(sort.class_)
s = s.sort(es_sort)
query = query.order_by(getattr(sort, order)())
per = app.config['RESULTS_PER_PAGE']
if rss:
pass
#query = query.limit(app.config['RESULTS_PER_PAGE'])
else:
# page is 1-based?
s = s[(page-1)*per:page*per]
#query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5)
s = s.highlight_options(tags_schema='styled')
s = s.highlight("display_name")
#return query
from pprint import pprint
print(json.dumps(s.to_dict()))
return s.execute()
@app.errorhandler(404) @app.errorhandler(404)
def not_found(error): def not_found(error):
@ -232,7 +77,6 @@ def before_request():
flask.g.user = user flask.g.user = user
if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now(): if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now():
print("hio")
flask.session['timeout'] = datetime.now() + timedelta(days=7) flask.session['timeout'] = datetime.now() + timedelta(days=7)
flask.session.permanent = True flask.session.permanent = True
flask.session.modified = True flask.session.modified = True
@ -270,6 +114,10 @@ def home(rss):
if page: if page:
page = int(page) page = int(page)
per_page = app.config.get('RESULTS_PER_PAGE')
if not per_page:
per_page = DEFAULT_PER_PAGE
user_id = None user_id = None
if user_name: if user_name:
user = models.User.by_username(user_name) user = models.User.by_username(user_name)
@ -278,27 +126,69 @@ def home(rss):
user_id = user.id user_id = user.id
query_args = { query_args = {
'term': term or '',
'user': user_id, 'user': user_id,
'sort': sort or 'id', 'sort': sort or 'id',
'order': order or 'desc', 'order': order or 'desc',
'category': category or '0_0', 'category': category or '0_0',
'quality_filter': quality_filter or '0', 'quality_filter': quality_filter or '0',
'page': page or 1, 'page': page or 1,
'rss': rss 'rss': rss,
'per_page': per_page
} }
# God mode if flask.g.user:
if flask.g.user and flask.g.user.is_admin: query_args['logged_in_user'] = flask.g.user
if flask.g.user.is_admin: # God mode
query_args['admin'] = True query_args['admin'] = True
query = search(**query_args) # If searching, we get results from elastic search
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
if use_elastic and term:
query_args['term'] = term
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
if not max_search_results:
max_search_results = DEFAULT_MAX_SEARCH_RESULT
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages
query_args['page'] = max_page
query_args['max_search_results'] = max_search_results
query_results = search_elastic(**query_args)
if rss: if rss:
return render_rss('/', query) return render_rss('/', query_results, use_elastic=True)
else: else:
rss_query_string = _generate_query_string(term, category, quality_filter, user_name) rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
max_results = min(max_search_results, query_results['hits']['total'])
# change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=per_page,
total=max_results, bs_version=3, page_parameter='p',
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
return flask.render_template('home.html', return flask.render_template('home.html',
use_elastic=True,
pagination=pagination,
torrent_query=query_results,
search=query_args,
rss_filter=rss_query_string)
else:
# If ES is enabled, default to db search for browsing
if use_elastic:
query_args['term'] = ''
else: # Otherwise, use db search for everything
query_args['term'] = term or ''
print(query_args)
query = search_db(**query_args)
if rss:
return render_rss('/', query, use_elastic=False)
else:
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
# Use elastic is always false here because we only hit this section
# if we're browsing without a search term (which means we default to DB)
# or if ES is disabled
return flask.render_template('home.html',
use_elastic=False,
torrent_query=query, torrent_query=query,
search=query_args, search=query_args,
rss_filter=rss_query_string) rss_filter=rss_query_string)
@ -320,6 +210,10 @@ def view_user(user_name):
if page: if page:
page = int(page) page = int(page)
per_page = app.config.get('RESULTS_PER_PAGE')
if not per_page:
per_page = DEFAULT_PER_PAGE
query_args = { query_args = {
'term': term or '', 'term': term or '',
'user': user.id, 'user': user.id,
@ -328,17 +222,54 @@ def view_user(user_name):
'category': category or '0_0', 'category': category or '0_0',
'quality_filter': quality_filter or '0', 'quality_filter': quality_filter or '0',
'page': page or 1, 'page': page or 1,
'rss': False 'rss': False,
'per_page': per_page
} }
# God mode if flask.g.user:
if flask.g.user and flask.g.user.is_admin: query_args['logged_in_user'] = flask.g.user
if flask.g.user.is_admin: # God mode
query_args['admin'] = True query_args['admin'] = True
query = search(**query_args) # Use elastic search for term searching
rss_query_string = _generate_query_string(term, category, quality_filter, user_name) rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
if use_elastic and term:
query_args['term'] = term
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
if not max_search_results:
max_search_results = DEFAULT_MAX_SEARCH_RESULT
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages
query_args['page'] = max_page
query_args['max_search_results'] = max_search_results
query_results = search_elastic(**query_args)
max_results = min(max_search_results, query_results['hits']['total'])
# change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=per_page,
total=max_results, bs_version=3, page_parameter='p',
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
return flask.render_template('user.html', return flask.render_template('user.html',
use_elastic=True,
pagination=pagination,
torrent_query=query_results,
search=query_args,
user=user,
user_page=True,
rss_filter=rss_query_string)
# Similar logic as home page
else:
if use_elastic:
query_args['term'] = ''
else:
query_args['term'] = term or ''
query = search_db(**query_args)
return flask.render_template('user.html',
use_elastic=False,
torrent_query=query, torrent_query=query,
search=query_args, search=query_args,
user=user, user=user,
@ -350,18 +281,24 @@ def view_user(user_name):
def _jinja2_filter_rfc822(date, fmt=None): def _jinja2_filter_rfc822(date, fmt=None):
return formatdate(float(date.strftime('%s'))) return formatdate(float(date.strftime('%s')))
@app.template_filter('rfc822_es')
def _jinja2_filter_rfc822(datestr, fmt=None):
return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s')))
def render_rss(label, query):
def render_rss(label, query, use_elastic):
print(query)
rss_xml = flask.render_template('rss.xml', rss_xml = flask.render_template('rss.xml',
use_elastic=use_elastic,
term=label, term=label,
site_url=flask.request.url_root, site_url=flask.request.url_root,
query=query) torrent_query=query)
response = flask.make_response(rss_xml) response = flask.make_response(rss_xml)
response.headers['Content-Type'] = 'application/xml' response.headers['Content-Type'] = 'application/xml'
return response return response
#@app.route('/about', methods=['GET']) # @app.route('/about', methods=['GET'])
# def about(): # def about():
# return flask.render_template('about.html') # return flask.render_template('about.html')
@ -485,7 +422,6 @@ def activate_user(payload):
user.status = models.UserStatusType.ACTIVE user.status = models.UserStatusType.ACTIVE
db.session.add(user) db.session.add(user)
db.session.commit() db.session.commit()

317
nyaa/search.py Normal file
View file

@ -0,0 +1,317 @@
import flask
import re
import math
import json
import shlex
from nyaa import app, db
from nyaa import models
import sqlalchemy_fulltext.modes as FullTextMode
from sqlalchemy_fulltext import FullTextSearch
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q
def search_elastic(term='', user=None, sort='id', order='desc',
category='0_0', quality_filter='0', page=1,
rss=False, admin=False, logged_in_user=None,
per_page=75, max_search_results=1000):
# This function can easily be memcached now
es_client = Elasticsearch()
es_sort_keys = {
'id': 'id',
'size': 'filesize',
# 'name': 'display_name', # This is slow and buggy
'seeders': 'seed_count',
'leechers': 'leech_count',
'downloads': 'download_count'
}
sort_ = sort.lower()
if sort_ not in es_sort_keys:
flask.abort(400)
es_sort = es_sort_keys[sort]
order_keys = {
'desc': 'desc',
'asc': 'asc'
}
order_ = order.lower()
if order_ not in order_keys:
flask.abort(400)
# Only allow ID, desc if RSS
if rss:
sort = es_sort_keys['id']
order = 'desc'
# funky, es sort is default asc, prefixed by '-' if desc
if 'desc' == order:
es_sort = '-' + es_sort
# Quality filter
quality_keys = [
'0', # Show all
'1', # No remakes
'2', # Only trusted
'3' # Only completed
]
if quality_filter.lower() not in quality_keys:
flask.abort(400)
quality_filter = int(quality_filter)
# Category filter
main_category = None
sub_category = None
main_cat_id = 0
sub_cat_id = 0
if category:
cat_match = re.match(r'^(\d+)_(\d+)$', category)
if not cat_match:
flask.abort(400)
main_cat_id = int(cat_match.group(1))
sub_cat_id = int(cat_match.group(2))
if main_cat_id > 0:
if sub_cat_id > 0:
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
if not sub_category:
flask.abort(400)
else:
main_category = models.MainCategory.by_id(main_cat_id)
if not main_category:
flask.abort(400)
# This might be useless since we validate users
# before coming into this method, but just to be safe...
if user:
user = models.User.by_id(user)
if not user:
flask.abort(404)
user = user.id
same_user = False
if logged_in_user:
same_user = user == logged_in_user.id
s = Search(using=es_client, index=app.config.get('ES_INDEX_NAME')) # todo, sukebei prefix
# Apply search term
if term:
s = s.query('simple_query_string',
analyzer='my_search_analyzer',
default_operator="AND",
query=term)
# User view (/user/username)
if user:
s = s.filter('term', uploader_id=user)
if not admin:
# Hide all DELETED torrents if regular user
s = s.filter('term', deleted=False)
# If logged in user is not the same as the user being viewed,
# show only torrents that aren't hidden or anonymous.
#
# If logged in user is the same as the user being viewed,
# show all torrents including hidden and anonymous ones.
#
# On RSS pages in user view, show only torrents that
# aren't hidden or anonymous no matter what
if not same_user or rss:
s = s.filter('term', hidden=False)
s = s.filter('term', anonymous=False)
# General view (homepage, general search view)
else:
if not admin:
# Hide all DELETED torrents if regular user
s = s.filter('term', deleted=False)
# If logged in, show all torrents that aren't hidden unless they belong to you
# On RSS pages, show all public torrents and nothing more.
if logged_in_user and not rss:
hiddenFilter = Q('term', hidden=False)
userFilter = Q('term', uploader_id=logged_in_user.id)
combinedFilter = hiddenFilter | userFilter
s = s.filter('bool', filter=[combinedFilter])
else:
s = s.filter('term', hidden=False)
if main_category:
s = s.filter('term', main_category_id=main_cat_id)
elif sub_category:
s = s.filter('term', main_category_id=main_cat_id)
s = s.filter('term', sub_category_id=sub_cat_id)
if quality_filter == 0:
pass
elif quality_filter == 1:
s = s.filter('term', remake=False)
elif quality_filter == 2:
s = s.filter('term', trusted=True)
elif quality_filter == 3:
s = s.filter('term', complete=True)
# Apply sort
s = s.sort(es_sort)
# Only show first RESULTS_PER_PAGE items for RSS
if rss:
s = s[0:per_page]
else:
max_page = min(page, int(math.ceil(max_search_results / float(per_page))))
from_idx = (max_page-1)*per_page
to_idx = min(max_search_results, max_page*per_page)
s = s[from_idx:to_idx]
highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT')
if highlight:
s = s.highlight_options(tags_schema='styled')
s = s.highlight("display_name")
# Return query, uncomment print line to debug query
from pprint import pprint
print(json.dumps(s.to_dict()))
return s.execute()
def search_db(term='', user=None, sort='id', order='desc', category='0_0',
quality_filter='0', page=1, rss=False, admin=False,
logged_in_user=None, per_page=75):
sort_keys = {
'id': models.Torrent.id,
'size': models.Torrent.filesize,
# 'name': models.Torrent.display_name, # Disable this because we disabled this in search_elastic, for the sake of consistency
'seeders': models.Statistic.seed_count,
'leechers': models.Statistic.leech_count,
'downloads': models.Statistic.download_count
}
sort_ = sort.lower()
if sort_ not in sort_keys:
flask.abort(400)
sort = sort_keys[sort]
order_keys = {
'desc': 'desc',
'asc': 'asc'
}
order_ = order.lower()
if order_ not in order_keys:
flask.abort(400)
filter_keys = {
'0': None,
'1': (models.TorrentFlags.REMAKE, False),
'2': (models.TorrentFlags.TRUSTED, True),
'3': (models.TorrentFlags.COMPLETE, True)
}
sentinel = object()
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
if filter_tuple is sentinel:
flask.abort(400)
if user:
user = models.User.by_id(user)
if not user:
flask.abort(404)
user = user.id
main_category = None
sub_category = None
main_cat_id = 0
sub_cat_id = 0
if category:
cat_match = re.match(r'^(\d+)_(\d+)$', category)
if not cat_match:
flask.abort(400)
main_cat_id = int(cat_match.group(1))
sub_cat_id = int(cat_match.group(2))
if main_cat_id > 0:
if sub_cat_id > 0:
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
else:
main_category = models.MainCategory.by_id(main_cat_id)
if not category:
flask.abort(400)
# Force sort by id desc if rss
if rss:
sort = sort_keys['id']
order = 'desc'
same_user = False
if logged_in_user:
same_user = logged_in_user.id == user
if term:
query = db.session.query(models.TorrentNameSearch)
else:
query = models.Torrent.query
# User view (/user/username)
if user:
query = query.filter(models.Torrent.uploader_id == user)
if not admin:
# Hide all DELETED torrents if regular user
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
# If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous
# If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones
# On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what
if not same_user or rss:
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN |
models.TorrentFlags.ANONYMOUS)).is_(False))
# General view (homepage, general search view)
else:
if not admin:
# Hide all DELETED torrents if regular user
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
# If logged in, show all torrents that aren't hidden unless they belong to you
# On RSS pages, show all public torrents and nothing more.
if logged_in_user and not rss:
query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
(models.Torrent.uploader_id == logged_in_user.id))
# Otherwise, show all torrents that aren't hidden
else:
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False))
if main_category:
query = query.filter(models.Torrent.main_category_id == main_cat_id)
elif sub_category:
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
(models.Torrent.sub_category_id == sub_cat_id))
if filter_tuple:
query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
if term:
for item in shlex.split(term, posix=False):
if len(item) >= 2:
query = query.filter(FullTextSearch(
item, models.TorrentNameSearch, FullTextMode.NATURAL))
# Sort and order
if sort.class_ != models.Torrent:
query = query.join(sort.class_)
query = query.order_by(getattr(sort, order)())
if rss:
query = query.limit(per_page)
else:
query = query.paginate_faste(page, per_page=per_page, step=5)
return query

View file

@ -4,20 +4,32 @@
<description>RSS Feed for {{ term }}</description> <description>RSS Feed for {{ term }}</description>
<link>{{ url_for('home', _external=True) }}</link> <link>{{ url_for('home', _external=True) }}</link>
<atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" /> <atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" />
{% for torrent in query %} {% for torrent in torrent_query %}
{% if torrent.has_torrent %} {% if torrent.has_torrent %}
<item> <item>
<title>{{ torrent.display_name }}</title> <title>{{ torrent.display_name }}</title>
{% if use_elastic %}
<link>{{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
{% else %}
<link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link> <link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid> <guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate> <pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
{% endif %}
</item> </item>
{% else %} {% else %}
<item> <item>
<title>{{ torrent.display_name }}</title> <title>{{ torrent.display_name }}</title>
{% if use_elastic %}
<link>{{ torrent.info_hash }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
{% else %}
<link>{{ torrent.magnet_uri }}</link> <link>{{ torrent.magnet_uri }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid> <guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate> <pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
{% endif %}
</item> </item>
{% endif %} {% endif %}
{% endfor %} {% endfor %}

View file

@ -8,7 +8,7 @@
{{ caller() }} {{ caller() }}
</th> </th>
{% endmacro %} {% endmacro %}
{% if torrent_query.hits.total > 0 %} {% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
<div class="table-responsive"> <div class="table-responsive">
<table class="table table-bordered table-hover table-striped torrent-list"> <table class="table table-bordered table-hover table-striped torrent-list">
<thead> <thead>
@ -16,7 +16,7 @@
{% call render_column_header("hdr-category", "width:80px;", center_text=True) %} {% call render_column_header("hdr-category", "width:80px;", center_text=True) %}
<div>Category</div> <div>Category</div>
{% endcall %} {% endcall %}
{% call render_column_header("hdr-name", "width:auto;", sort_key="name") %} {% call render_column_header("hdr-name", "width:auto;") %}
<div>Name</div> <div>Name</div>
{% endcall %} {% endcall %}
{% call render_column_header("hdr-link", "width:0;", center_text=True) %} {% call render_column_header("hdr-link", "width:0;", center_text=True) %}
@ -45,26 +45,46 @@
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for torrent in torrent_query %} {% set torrents = torrent_query if use_elastic else torrent_query.items %}
{% for torrent in torrents %}
<tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}"> <tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}">
{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) %} {% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) if use_elastic else (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %}
{% set icon_dir = config.SITE_FLAVOR %} {% set icon_dir = config.SITE_FLAVOR %}
<td style="padding:0 4px;"> <td style="padding:0 4px;">
{% if use_elastic %}
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category_id }} - {{ torrent.sub_category_id }}"> <a href="/?c={{ cat_id }}" title="{{ torrent.main_category_id }} - {{ torrent.sub_category_id }}">
{% else %}
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category.name }} - {{ torrent.sub_category.name }}">
{% endif %}
<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png"> <img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
</a> </a>
</td> </td>
{% if use_elastic %}
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}</a></td> <td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}</a></td>
{% else %}
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}">{{ torrent.display_name | escape }}</a></td>
{% endif %}
<td style="white-space: nowrap;text-align: center;"> <td style="white-space: nowrap;text-align: center;">
{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %} {% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a> <a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>
</td> </td>
<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td> <td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
{% if use_elastic %}
<td class="text-center" {#data-timestamp="{{ torrent.created_time|int }}"#}>{{ torrent.created_time }}</td> <td class="text-center" {#data-timestamp="{{ torrent.created_time|int }}"#}>{{ torrent.created_time }}</td>
{% else %}
<td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td>
{% endif %}
{% if config.ENABLE_SHOW_STATS %} {% if config.ENABLE_SHOW_STATS %}
{% if use_elastic %}
<td class="text-center" style="color: green;">{{ torrent.seed_count }}</td> <td class="text-center" style="color: green;">{{ torrent.seed_count }}</td>
<td class="text-center" style="color: red;">{{ torrent.leech_count }}</td> <td class="text-center" style="color: red;">{{ torrent.leech_count }}</td>
<td class="text-center">{{ torrent.download_count }}</td> <td class="text-center">{{ torrent.download_count }}</td>
{% else %}
<td class="text-center" style="color: green;">{{ torrent.stats.seed_count }}</td>
<td class="text-center" style="color: red;">{{ torrent.stats.leech_count }}</td>
<td class="text-center">{{ torrent.stats.download_count }}</td>
{% endif %}
{% endif %} {% endif %}
</tr> </tr>
{% endfor %} {% endfor %}
@ -75,9 +95,12 @@
<h3>No results found</h3> <h3>No results found</h3>
{% endif %} {% endif %}
{#
<center> <center>
{% if use_elastic %}
{{ pagination.info }}
{{ pagination.links }}
{% else %}
{% from "bootstrap/pagination.html" import render_pagination %} {% from "bootstrap/pagination.html" import render_pagination %}
{{ render_pagination(torrent_query) }} {{ render_pagination(torrent_query) }}
{% endif %}
</center> </center>
#}

View file

@ -32,7 +32,9 @@ visitor==0.1.3
webassets==0.12.1 webassets==0.12.1
Werkzeug==0.12.1 Werkzeug==0.12.1
WTForms==2.1 WTForms==2.1
## The following requirements were added by pip freeze: ## elasticsearch dependencies
elasticsearch==5.3.0 elasticsearch==5.3.0
elasticsearch-dsl==5.2.0 elasticsearch-dsl==5.2.0
progressbar2==3.20.0 progressbar2==3.20.0
mysql-replication==0.13
flask-paginate==0.4.5

View file

@ -40,7 +40,12 @@ log.setLevel(logging.INFO)
#logging.getLogger('elasticsearch').setLevel(logging.DEBUG) #logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
# in prod want in /var/lib somewhere probably # in prod want in /var/lib somewhere probably
SAVE_LOC = "/tmp/sync_es_position.json" SAVE_LOC = "/var/lib/sync_es_position.json"
MYSQL_HOST = '127.0.0.1'
MYSQL_PORT = 3306
MYSQL_USER = 'test'
MYSQL_PW = 'test123'
NT_DB = 'nyaav2'
with open(SAVE_LOC) as f: with open(SAVE_LOC) as f:
pos = json.load(f) pos = json.load(f)
@ -50,16 +55,16 @@ es = Elasticsearch()
stream = BinLogStreamReader( stream = BinLogStreamReader(
# TODO parse out from config.py or something # TODO parse out from config.py or something
connection_settings = { connection_settings = {
'host': '127.0.0.1', 'host': MYSQL_HOST,
'port': 13306, 'port': MYSQL_PORT,
'user': 'root', 'user': MYSQL_USER,
'passwd': 'dunnolol' 'passwd': MYSQL_PW
}, },
server_id=10, # arbitrary server_id=10, # arbitrary
# only care about this table currently # only care about this database currently
only_schemas=["nyaav2"], only_schemas=[NT_DB],
# TODO sukebei # these tables in the database
only_tables=["nyaa_torrents", "nyaa_statistics"], only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"],
# from our save file # from our save file
resume_stream=True, resume_stream=True,
log_file=pos['log_file'], log_file=pos['log_file'],
@ -72,7 +77,7 @@ stream = BinLogStreamReader(
# using aiomysql if anybody wants to revive that. # using aiomysql if anybody wants to revive that.
blocking=True) blocking=True)
def reindex_torrent(t): def reindex_torrent(t, index_name):
# XXX annoyingly different from import_to_es, and # XXX annoyingly different from import_to_es, and
# you need to keep them in sync manually. # you need to keep them in sync manually.
f = t['flags'] f = t['flags']
@ -103,14 +108,14 @@ def reindex_torrent(t):
} }
# update, so we don't delete the stats if present # update, so we don't delete the stats if present
es.update( es.update(
index='nyaav2', index=index_name,
doc_type='torrent', doc_type='torrent',
id=t['id'], id=t['id'],
body={"doc": doc, "doc_as_upsert": True}) body={"doc": doc, "doc_as_upsert": True})
def reindex_stats(s): def reindex_stats(s, index_name):
es.update( es.update(
index='nyaav2', index=index_name,
doc_type='torrent', doc_type='torrent',
id=s['torrent_id'], id=s['torrent_id'],
body={ body={
@ -126,21 +131,29 @@ last_save = time.time()
for event in stream: for event in stream:
for row in event.rows: for row in event.rows:
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
if event.table == "nyaa_torrents": if event.table == "nyaa_torrents":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent: if type(event) is WriteRowsEvent:
reindex_torrent(row['values']) reindex_torrent(row['values'], index_name)
elif type(event) is UpdateRowsEvent: elif type(event) is UpdateRowsEvent:
reindex_torrent(row['after_values']) reindex_torrent(row['after_values'], index_name)
elif type(event) is DeleteRowsEvent: elif type(event) is DeleteRowsEvent:
# just delete it # just delete it
es.delete(index='nyaav2', doc_type='torrent', id=row['values']['id']) es.delete(index=index_name, doc_type='torrent', id=row['values']['id'])
else: else:
raise Exception(f"unknown event {type(event)}") raise Exception(f"unknown event {type(event)}")
elif event.table == "nyaa_statistics": elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
if event.table == "nyaa_torrents":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent: if type(event) is WriteRowsEvent:
reindex_stats(row['values']) reindex_stats(row['values'], index_name)
elif type(event) is UpdateRowsEvent: elif type(event) is UpdateRowsEvent:
reindex_stats(row['after_values']) reindex_stats(row['after_values'], index_name)
elif type(event) is DeleteRowsEvent: elif type(event) is DeleteRowsEvent:
# uh ok. assume that the torrent row will get deleted later. # uh ok. assume that the torrent row will get deleted later.
pass pass