mirror of
https://gitlab.com/SIGBUS/nyaa.git
synced 2024-12-22 05:10:00 +00:00
hooked up ES... 90% done, need to figure out how to generate magnet URIs
This commit is contained in:
parent
c2c547e786
commit
899aa01473
38
README.md
38
README.md
|
@ -44,5 +44,43 @@
|
|||
- Start the dev server with `python run.py`
|
||||
- Deactivate `source deactivate`
|
||||
|
||||
# Enabling ElasticSearch
|
||||
|
||||
## Basics
|
||||
- Install jdk `sudo apt-get install openjdk-8-jdk`
|
||||
- Install elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html
|
||||
- `sudo systemctl enable elasticsearch.service`
|
||||
- `sudo systemctl start elasticsearch.service`
|
||||
- Run `curl -XGET 'localhost:9200'` and make sure ES is running
|
||||
- Optional: install Kabana as a search frontend for ES
|
||||
|
||||
## Enable MySQL Binlogging
|
||||
- Add the `[mariadb]` bin-log section to my.cnf and reload mysql server
|
||||
- Connect to mysql
|
||||
- `SHOW VARIABLES LIKE 'binlog_format';`
|
||||
- Make sure it shows ROW
|
||||
- Connect to root user
|
||||
- `GRANT REPLICATION SLAVE ON *.* TO 'test'@'localhost';` where test is the user you will be running `sync_es.py` with
|
||||
|
||||
## Setting up ES
|
||||
- Run `./create_es.sh` and this creates two indicies: `nyaa` and `sukebei`
|
||||
- The output should show `akncolwedged: true` twice
|
||||
- The safest bet is to disable the webapp here to ensure there's no database writes
|
||||
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `nyaa`
|
||||
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `sukebei`
|
||||
- These will take some time to run as it's indexing
|
||||
|
||||
## Setting up sync_es.py
|
||||
- Sync_es.py keeps the ElasticSearch index updated by reading the BinLog
|
||||
- Configure the MySQL options with the user where you granted the REPLICATION permissions
|
||||
- Connect to MySQL, run `SHOW MASTER STATUS;`.
|
||||
- Copy the output to `/var/lib/sync_es_position.json` with the contents `{"log_file": "FILE", "log_pos": POSITION}` and replace FILENAME with File (something like master1-bin.000002) in the SQL output and POSITION (something like 892528513) with Position
|
||||
- Set up `sync_es.py` as a service and run it, preferably as the system/root
|
||||
- Make sure `sync_es.py` runs within venv with the right dependencies
|
||||
|
||||
## Good to go!
|
||||
- After that, enable the `USE_ELASTIC_SEARCH` flag and restart the webapp and you're good to go
|
||||
|
||||
|
||||
## Code Quality:
|
||||
- Remember to follow PEP8 style guidelines and run `./lint.sh` before committing.
|
||||
|
|
|
@ -33,8 +33,6 @@ MAIL_FROM_ADDRESS = '***'
|
|||
SMTP_USERNAME = '***'
|
||||
SMTP_PASSWORD = '***'
|
||||
|
||||
RESULTS_PER_PAGE = 75
|
||||
|
||||
# What the site identifies itself as.
|
||||
SITE_NAME = 'Nyaa'
|
||||
|
||||
|
@ -49,3 +47,14 @@ ENFORCE_MAIN_ANNOUNCE_URL = False
|
|||
MAIN_ANNOUNCE_URL = ''
|
||||
|
||||
BACKUP_TORRENT_FOLDER = 'torrents'
|
||||
|
||||
#
|
||||
# Search Options
|
||||
#
|
||||
# Max ES search results, do not set over 10000
|
||||
RESULTS_PER_PAGE = 75
|
||||
|
||||
USE_ELASTIC_SEARCH = False
|
||||
ENABLE_ELASTIC_SEARCH_HIGHLIGHT = False
|
||||
ES_MAX_SEARCH_RESULT = 1000
|
||||
ES_INDEX_NAME = SITE_FLAVOR # we create indicies named nyaa or sukebei
|
|
@ -1,3 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
curl -v -XPUT 'localhost:9200/nyaav2?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
|
||||
# create indicies named "nyaa" and "sukebei", these are hardcoded
|
||||
curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
|
||||
curl -v -XPUT 'localhost:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
|
||||
|
|
|
@ -5,6 +5,7 @@ which is assumed to already exist.
|
|||
This is a one-shot deal, so you'd either need to complement it
|
||||
with a cron job or some binlog-reading thing (TODO)
|
||||
"""
|
||||
from nyaa import app
|
||||
from nyaa.models import Torrent
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch import helpers
|
||||
|
@ -33,7 +34,7 @@ def mk_es(t):
|
|||
return {
|
||||
"_id": t.id,
|
||||
"_type": "torrent",
|
||||
"_index": "nyaav2",
|
||||
"_index": app.config['ES_INDEX_NAME'],
|
||||
"_source": {
|
||||
# we're also indexing the id as a number so you can
|
||||
# order by it. seems like this is just equivalent to
|
||||
|
|
6
my.cnf
6
my.cnf
|
@ -4,3 +4,9 @@ ft_min_word_len=2
|
|||
innodb_ft_cache_size = 80000000
|
||||
innodb_ft_total_cache_size = 1600000000
|
||||
max_allowed_packet = 100M
|
||||
|
||||
[mariadb]
|
||||
log-bin
|
||||
server_id=1
|
||||
log-basename=master1
|
||||
binlog-format = row
|
||||
|
|
322
nyaa/routes.py
322
nyaa/routes.py
|
@ -6,18 +6,16 @@ from nyaa import bencode, utils
|
|||
from nyaa import torrents
|
||||
from nyaa import backend
|
||||
from nyaa import api_handler
|
||||
from nyaa.search import search_elastic, search_db
|
||||
import config
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
import ipaddress
|
||||
import os.path
|
||||
import base64
|
||||
from urllib.parse import quote
|
||||
import sqlalchemy_fulltext.modes as FullTextMode
|
||||
from sqlalchemy_fulltext import FullTextSearch
|
||||
import shlex
|
||||
import math
|
||||
from werkzeug import url_encode
|
||||
|
||||
from itsdangerous import URLSafeSerializer, BadSignature
|
||||
|
@ -27,12 +25,14 @@ from email.mime.multipart import MIMEMultipart
|
|||
from email.mime.text import MIMEText
|
||||
from email.utils import formatdate
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch_dsl import Search, Q
|
||||
from flask_paginate import Pagination
|
||||
|
||||
es_client = Elasticsearch()
|
||||
|
||||
DEBUG_API = False
|
||||
DEFAULT_MAX_SEARCH_RESULT = 1000
|
||||
DEFAULT_PER_PAGE = 75
|
||||
SERACH_PAGINATE_DISPLAY_MSG = '''Displaying results {start}-{end} out of {total} results.<br>
|
||||
Please refine your search results if you can't find what you were looking for.'''
|
||||
|
||||
|
||||
def redirect_url():
|
||||
|
@ -53,168 +53,13 @@ def modify_query(**new_values):
|
|||
|
||||
return '{}?{}'.format(flask.request.path, url_encode(args))
|
||||
|
||||
|
||||
@app.template_global()
|
||||
def filter_truthy(input_list):
|
||||
''' Jinja2 can't into list comprehension so this is for
|
||||
the search_results.html template '''
|
||||
return [item for item in input_list if item]
|
||||
|
||||
def search(term='', user=None, sort='id', order='desc', category='0_0', quality_filter='0', page=1, rss=False, admin=False):
|
||||
sort_keys = {
|
||||
'id': models.Torrent.id,
|
||||
'size': models.Torrent.filesize,
|
||||
'name': models.Torrent.display_name,
|
||||
'seeders': models.Statistic.seed_count,
|
||||
'leechers': models.Statistic.leech_count,
|
||||
'downloads': models.Statistic.download_count
|
||||
}
|
||||
|
||||
sort_ = sort.lower()
|
||||
if sort_ not in sort_keys:
|
||||
flask.abort(400)
|
||||
|
||||
# XXX gross why are all the names subtly different
|
||||
es_sort = ({
|
||||
'id': 'id',
|
||||
'size': 'filesize',
|
||||
'name': 'display_name',
|
||||
'seeders': 'seed_count',
|
||||
'leechers': 'leech_count',
|
||||
'downloads': 'download_count'
|
||||
})[sort]
|
||||
sort = sort_keys[sort]
|
||||
|
||||
order_keys = {
|
||||
'desc': 'desc',
|
||||
'asc': 'asc'
|
||||
}
|
||||
|
||||
order_ = order.lower()
|
||||
if order_ not in order_keys:
|
||||
flask.abort(400)
|
||||
|
||||
# funky, es sort is default asc, prefixed by '-' if desc
|
||||
if "desc" == order:
|
||||
es_sort = "-" + es_sort
|
||||
|
||||
filter_keys = {
|
||||
'0': None,
|
||||
'1': (models.TorrentFlags.REMAKE, False),
|
||||
'2': (models.TorrentFlags.TRUSTED, True),
|
||||
'3': (models.TorrentFlags.COMPLETE, True)
|
||||
}
|
||||
|
||||
sentinel = object()
|
||||
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
|
||||
if filter_tuple is sentinel:
|
||||
flask.abort(400)
|
||||
|
||||
if user:
|
||||
user = models.User.by_id(user)
|
||||
if not user:
|
||||
flask.abort(404)
|
||||
user = user.id
|
||||
|
||||
main_category = None
|
||||
sub_category = None
|
||||
main_cat_id = 0
|
||||
sub_cat_id = 0
|
||||
if category:
|
||||
cat_match = re.match(r'^(\d+)_(\d+)$', category)
|
||||
if not cat_match:
|
||||
flask.abort(400)
|
||||
|
||||
main_cat_id = int(cat_match.group(1))
|
||||
sub_cat_id = int(cat_match.group(2))
|
||||
|
||||
if main_cat_id > 0:
|
||||
if sub_cat_id > 0:
|
||||
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
|
||||
else:
|
||||
main_category = models.MainCategory.by_id(main_cat_id)
|
||||
|
||||
if not category:
|
||||
flask.abort(400)
|
||||
|
||||
# Force sort by id desc if rss
|
||||
if rss:
|
||||
sort = sort_keys['id']
|
||||
order = 'desc'
|
||||
|
||||
same_user = False
|
||||
if flask.g.user:
|
||||
same_user = flask.g.user.id == user
|
||||
|
||||
s = Search(using=es_client, index='nyaav2')
|
||||
if term:
|
||||
query = db.session.query(models.TorrentNameSearch)
|
||||
s = s.query("simple_query_string", analyzer="my_search_analyzer", default_operator="AND", query=term)
|
||||
else:
|
||||
query = models.Torrent.query
|
||||
|
||||
# Filter by user
|
||||
if user:
|
||||
s = s.filter("term", uploader_id=user)
|
||||
|
||||
query = query.filter(models.Torrent.uploader_id == user)
|
||||
# If admin, show everything
|
||||
if not admin:
|
||||
# If user is not logged in or the accessed feed doesn't belong to user,
|
||||
# hide anonymous torrents belonging to the queried user
|
||||
if not same_user:
|
||||
# TODO adapt to es syntax
|
||||
query = query.filter(models.Torrent.flags.op('&')(
|
||||
int(models.TorrentFlags.ANONYMOUS | models.TorrentFlags.DELETED)).is_(False))
|
||||
|
||||
if main_category:
|
||||
s = s.filter("term", main_category_id=main_cat_id)
|
||||
query = query.filter(models.Torrent.main_category_id == main_cat_id)
|
||||
elif sub_category:
|
||||
s = s.filter("term", main_category_id=main_cat_id)
|
||||
s = s.filter("term", sub_category_id=sub_cat_id)
|
||||
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
|
||||
(models.Torrent.sub_category_id == sub_cat_id))
|
||||
|
||||
# TODO i dunno what this means in es
|
||||
if filter_tuple:
|
||||
query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
|
||||
|
||||
# If admin, show everything
|
||||
if not admin:
|
||||
query = query.filter(models.Torrent.flags.op('&')(
|
||||
int(models.TorrentFlags.HIDDEN | models.TorrentFlags.DELETED)).is_(False))
|
||||
|
||||
if term:
|
||||
# note already handled in es
|
||||
for item in shlex.split(term, posix=False):
|
||||
if len(item) >= 2:
|
||||
query = query.filter(FullTextSearch(
|
||||
item, models.TorrentNameSearch, FullTextMode.NATURAL))
|
||||
|
||||
# Sort and order
|
||||
if sort.class_ != models.Torrent:
|
||||
query = query.join(sort.class_)
|
||||
|
||||
s = s.sort(es_sort)
|
||||
query = query.order_by(getattr(sort, order)())
|
||||
|
||||
per = app.config['RESULTS_PER_PAGE']
|
||||
if rss:
|
||||
pass
|
||||
#query = query.limit(app.config['RESULTS_PER_PAGE'])
|
||||
else:
|
||||
# page is 1-based?
|
||||
s = s[(page-1)*per:page*per]
|
||||
#query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5)
|
||||
|
||||
s = s.highlight_options(tags_schema='styled')
|
||||
s = s.highlight("display_name")
|
||||
|
||||
#return query
|
||||
from pprint import pprint
|
||||
print(json.dumps(s.to_dict()))
|
||||
return s.execute()
|
||||
|
||||
|
||||
@app.errorhandler(404)
|
||||
def not_found(error):
|
||||
|
@ -232,7 +77,6 @@ def before_request():
|
|||
flask.g.user = user
|
||||
|
||||
if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now():
|
||||
print("hio")
|
||||
flask.session['timeout'] = datetime.now() + timedelta(days=7)
|
||||
flask.session.permanent = True
|
||||
flask.session.modified = True
|
||||
|
@ -270,6 +114,10 @@ def home(rss):
|
|||
if page:
|
||||
page = int(page)
|
||||
|
||||
per_page = app.config.get('RESULTS_PER_PAGE')
|
||||
if not per_page:
|
||||
per_page = DEFAULT_PER_PAGE
|
||||
|
||||
user_id = None
|
||||
if user_name:
|
||||
user = models.User.by_username(user_name)
|
||||
|
@ -278,30 +126,72 @@ def home(rss):
|
|||
user_id = user.id
|
||||
|
||||
query_args = {
|
||||
'term': term or '',
|
||||
'user': user_id,
|
||||
'sort': sort or 'id',
|
||||
'order': order or 'desc',
|
||||
'category': category or '0_0',
|
||||
'quality_filter': quality_filter or '0',
|
||||
'page': page or 1,
|
||||
'rss': rss
|
||||
'rss': rss,
|
||||
'per_page': per_page
|
||||
}
|
||||
|
||||
# God mode
|
||||
if flask.g.user and flask.g.user.is_admin:
|
||||
query_args['admin'] = True
|
||||
if flask.g.user:
|
||||
query_args['logged_in_user'] = flask.g.user
|
||||
if flask.g.user.is_admin: # God mode
|
||||
query_args['admin'] = True
|
||||
|
||||
query = search(**query_args)
|
||||
# If searching, we get results from elastic search
|
||||
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
|
||||
if use_elastic and term:
|
||||
query_args['term'] = term
|
||||
|
||||
if rss:
|
||||
return render_rss('/', query)
|
||||
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
|
||||
if not max_search_results:
|
||||
max_search_results = DEFAULT_MAX_SEARCH_RESULT
|
||||
|
||||
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages
|
||||
|
||||
query_args['page'] = max_page
|
||||
query_args['max_search_results'] = max_search_results
|
||||
|
||||
query_results = search_elastic(**query_args)
|
||||
|
||||
if rss:
|
||||
return render_rss('/', query_results, use_elastic=True)
|
||||
else:
|
||||
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
|
||||
max_results = min(max_search_results, query_results['hits']['total'])
|
||||
# change p= argument to whatever you change page_parameter to or pagination breaks
|
||||
pagination = Pagination(p=query_args['page'], per_page=per_page,
|
||||
total=max_results, bs_version=3, page_parameter='p',
|
||||
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
|
||||
return flask.render_template('home.html',
|
||||
use_elastic=True,
|
||||
pagination=pagination,
|
||||
torrent_query=query_results,
|
||||
search=query_args,
|
||||
rss_filter=rss_query_string)
|
||||
else:
|
||||
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
|
||||
return flask.render_template('home.html',
|
||||
torrent_query=query,
|
||||
search=query_args,
|
||||
rss_filter=rss_query_string)
|
||||
# If ES is enabled, default to db search for browsing
|
||||
if use_elastic:
|
||||
query_args['term'] = ''
|
||||
else: # Otherwise, use db search for everything
|
||||
query_args['term'] = term or ''
|
||||
print(query_args)
|
||||
query = search_db(**query_args)
|
||||
if rss:
|
||||
return render_rss('/', query, use_elastic=False)
|
||||
else:
|
||||
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
|
||||
# Use elastic is always false here because we only hit this section
|
||||
# if we're browsing without a search term (which means we default to DB)
|
||||
# or if ES is disabled
|
||||
return flask.render_template('home.html',
|
||||
use_elastic=False,
|
||||
torrent_query=query,
|
||||
search=query_args,
|
||||
rss_filter=rss_query_string)
|
||||
|
||||
|
||||
@app.route('/user/<user_name>')
|
||||
|
@ -320,6 +210,10 @@ def view_user(user_name):
|
|||
if page:
|
||||
page = int(page)
|
||||
|
||||
per_page = app.config.get('RESULTS_PER_PAGE')
|
||||
if not per_page:
|
||||
per_page = DEFAULT_PER_PAGE
|
||||
|
||||
query_args = {
|
||||
'term': term or '',
|
||||
'user': user.id,
|
||||
|
@ -328,40 +222,83 @@ def view_user(user_name):
|
|||
'category': category or '0_0',
|
||||
'quality_filter': quality_filter or '0',
|
||||
'page': page or 1,
|
||||
'rss': False
|
||||
'rss': False,
|
||||
'per_page': per_page
|
||||
}
|
||||
|
||||
# God mode
|
||||
if flask.g.user and flask.g.user.is_admin:
|
||||
query_args['admin'] = True
|
||||
|
||||
query = search(**query_args)
|
||||
if flask.g.user:
|
||||
query_args['logged_in_user'] = flask.g.user
|
||||
if flask.g.user.is_admin: # God mode
|
||||
query_args['admin'] = True
|
||||
|
||||
# Use elastic search for term searching
|
||||
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
|
||||
return flask.render_template('user.html',
|
||||
torrent_query=query,
|
||||
search=query_args,
|
||||
user=user,
|
||||
user_page=True,
|
||||
rss_filter=rss_query_string)
|
||||
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
|
||||
if use_elastic and term:
|
||||
query_args['term'] = term
|
||||
|
||||
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
|
||||
if not max_search_results:
|
||||
max_search_results = DEFAULT_MAX_SEARCH_RESULT
|
||||
|
||||
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page)))) # Only allow up to (max_search_results / page) pages
|
||||
|
||||
query_args['page'] = max_page
|
||||
query_args['max_search_results'] = max_search_results
|
||||
|
||||
query_results = search_elastic(**query_args)
|
||||
|
||||
max_results = min(max_search_results, query_results['hits']['total'])
|
||||
# change p= argument to whatever you change page_parameter to or pagination breaks
|
||||
pagination = Pagination(p=query_args['page'], per_page=per_page,
|
||||
total=max_results, bs_version=3, page_parameter='p',
|
||||
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
|
||||
return flask.render_template('user.html',
|
||||
use_elastic=True,
|
||||
pagination=pagination,
|
||||
torrent_query=query_results,
|
||||
search=query_args,
|
||||
user=user,
|
||||
user_page=True,
|
||||
rss_filter=rss_query_string)
|
||||
# Similar logic as home page
|
||||
else:
|
||||
if use_elastic:
|
||||
query_args['term'] = ''
|
||||
else:
|
||||
query_args['term'] = term or ''
|
||||
query = search_db(**query_args)
|
||||
return flask.render_template('user.html',
|
||||
use_elastic=False,
|
||||
torrent_query=query,
|
||||
search=query_args,
|
||||
user=user,
|
||||
user_page=True,
|
||||
rss_filter=rss_query_string)
|
||||
|
||||
|
||||
@app.template_filter('rfc822')
|
||||
def _jinja2_filter_rfc822(date, fmt=None):
|
||||
return formatdate(float(date.strftime('%s')))
|
||||
|
||||
@app.template_filter('rfc822_es')
|
||||
def _jinja2_filter_rfc822(datestr, fmt=None):
|
||||
return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s')))
|
||||
|
||||
def render_rss(label, query):
|
||||
|
||||
def render_rss(label, query, use_elastic):
|
||||
print(query)
|
||||
rss_xml = flask.render_template('rss.xml',
|
||||
use_elastic=use_elastic,
|
||||
term=label,
|
||||
site_url=flask.request.url_root,
|
||||
query=query)
|
||||
torrent_query=query)
|
||||
response = flask.make_response(rss_xml)
|
||||
response.headers['Content-Type'] = 'application/xml'
|
||||
return response
|
||||
|
||||
|
||||
#@app.route('/about', methods=['GET'])
|
||||
# @app.route('/about', methods=['GET'])
|
||||
# def about():
|
||||
# return flask.render_template('about.html')
|
||||
|
||||
|
@ -485,7 +422,6 @@ def activate_user(payload):
|
|||
|
||||
user.status = models.UserStatusType.ACTIVE
|
||||
|
||||
|
||||
db.session.add(user)
|
||||
db.session.commit()
|
||||
|
||||
|
|
317
nyaa/search.py
Normal file
317
nyaa/search.py
Normal file
|
@ -0,0 +1,317 @@
|
|||
import flask
|
||||
import re
|
||||
import math
|
||||
import json
|
||||
import shlex
|
||||
|
||||
from nyaa import app, db
|
||||
from nyaa import models
|
||||
|
||||
import sqlalchemy_fulltext.modes as FullTextMode
|
||||
from sqlalchemy_fulltext import FullTextSearch
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch_dsl import Search, Q
|
||||
|
||||
|
||||
def search_elastic(term='', user=None, sort='id', order='desc',
|
||||
category='0_0', quality_filter='0', page=1,
|
||||
rss=False, admin=False, logged_in_user=None,
|
||||
per_page=75, max_search_results=1000):
|
||||
# This function can easily be memcached now
|
||||
|
||||
es_client = Elasticsearch()
|
||||
|
||||
es_sort_keys = {
|
||||
'id': 'id',
|
||||
'size': 'filesize',
|
||||
# 'name': 'display_name', # This is slow and buggy
|
||||
'seeders': 'seed_count',
|
||||
'leechers': 'leech_count',
|
||||
'downloads': 'download_count'
|
||||
}
|
||||
|
||||
sort_ = sort.lower()
|
||||
if sort_ not in es_sort_keys:
|
||||
flask.abort(400)
|
||||
|
||||
es_sort = es_sort_keys[sort]
|
||||
|
||||
order_keys = {
|
||||
'desc': 'desc',
|
||||
'asc': 'asc'
|
||||
}
|
||||
|
||||
order_ = order.lower()
|
||||
if order_ not in order_keys:
|
||||
flask.abort(400)
|
||||
|
||||
# Only allow ID, desc if RSS
|
||||
if rss:
|
||||
sort = es_sort_keys['id']
|
||||
order = 'desc'
|
||||
|
||||
# funky, es sort is default asc, prefixed by '-' if desc
|
||||
if 'desc' == order:
|
||||
es_sort = '-' + es_sort
|
||||
|
||||
# Quality filter
|
||||
quality_keys = [
|
||||
'0', # Show all
|
||||
'1', # No remakes
|
||||
'2', # Only trusted
|
||||
'3' # Only completed
|
||||
]
|
||||
|
||||
if quality_filter.lower() not in quality_keys:
|
||||
flask.abort(400)
|
||||
|
||||
quality_filter = int(quality_filter)
|
||||
|
||||
# Category filter
|
||||
main_category = None
|
||||
sub_category = None
|
||||
main_cat_id = 0
|
||||
sub_cat_id = 0
|
||||
if category:
|
||||
cat_match = re.match(r'^(\d+)_(\d+)$', category)
|
||||
if not cat_match:
|
||||
flask.abort(400)
|
||||
|
||||
main_cat_id = int(cat_match.group(1))
|
||||
sub_cat_id = int(cat_match.group(2))
|
||||
|
||||
if main_cat_id > 0:
|
||||
if sub_cat_id > 0:
|
||||
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
|
||||
if not sub_category:
|
||||
flask.abort(400)
|
||||
else:
|
||||
main_category = models.MainCategory.by_id(main_cat_id)
|
||||
if not main_category:
|
||||
flask.abort(400)
|
||||
|
||||
# This might be useless since we validate users
|
||||
# before coming into this method, but just to be safe...
|
||||
if user:
|
||||
user = models.User.by_id(user)
|
||||
if not user:
|
||||
flask.abort(404)
|
||||
user = user.id
|
||||
|
||||
same_user = False
|
||||
if logged_in_user:
|
||||
same_user = user == logged_in_user.id
|
||||
|
||||
s = Search(using=es_client, index=app.config.get('ES_INDEX_NAME')) # todo, sukebei prefix
|
||||
|
||||
# Apply search term
|
||||
if term:
|
||||
s = s.query('simple_query_string',
|
||||
analyzer='my_search_analyzer',
|
||||
default_operator="AND",
|
||||
query=term)
|
||||
|
||||
# User view (/user/username)
|
||||
if user:
|
||||
s = s.filter('term', uploader_id=user)
|
||||
|
||||
if not admin:
|
||||
# Hide all DELETED torrents if regular user
|
||||
s = s.filter('term', deleted=False)
|
||||
# If logged in user is not the same as the user being viewed,
|
||||
# show only torrents that aren't hidden or anonymous.
|
||||
#
|
||||
# If logged in user is the same as the user being viewed,
|
||||
# show all torrents including hidden and anonymous ones.
|
||||
#
|
||||
# On RSS pages in user view, show only torrents that
|
||||
# aren't hidden or anonymous no matter what
|
||||
if not same_user or rss:
|
||||
s = s.filter('term', hidden=False)
|
||||
s = s.filter('term', anonymous=False)
|
||||
# General view (homepage, general search view)
|
||||
else:
|
||||
if not admin:
|
||||
# Hide all DELETED torrents if regular user
|
||||
s = s.filter('term', deleted=False)
|
||||
# If logged in, show all torrents that aren't hidden unless they belong to you
|
||||
# On RSS pages, show all public torrents and nothing more.
|
||||
if logged_in_user and not rss:
|
||||
hiddenFilter = Q('term', hidden=False)
|
||||
userFilter = Q('term', uploader_id=logged_in_user.id)
|
||||
combinedFilter = hiddenFilter | userFilter
|
||||
s = s.filter('bool', filter=[combinedFilter])
|
||||
else:
|
||||
s = s.filter('term', hidden=False)
|
||||
|
||||
if main_category:
|
||||
s = s.filter('term', main_category_id=main_cat_id)
|
||||
elif sub_category:
|
||||
s = s.filter('term', main_category_id=main_cat_id)
|
||||
s = s.filter('term', sub_category_id=sub_cat_id)
|
||||
|
||||
if quality_filter == 0:
|
||||
pass
|
||||
elif quality_filter == 1:
|
||||
s = s.filter('term', remake=False)
|
||||
elif quality_filter == 2:
|
||||
s = s.filter('term', trusted=True)
|
||||
elif quality_filter == 3:
|
||||
s = s.filter('term', complete=True)
|
||||
|
||||
# Apply sort
|
||||
s = s.sort(es_sort)
|
||||
|
||||
# Only show first RESULTS_PER_PAGE items for RSS
|
||||
if rss:
|
||||
s = s[0:per_page]
|
||||
else:
|
||||
max_page = min(page, int(math.ceil(max_search_results / float(per_page))))
|
||||
from_idx = (max_page-1)*per_page
|
||||
to_idx = min(max_search_results, max_page*per_page)
|
||||
s = s[from_idx:to_idx]
|
||||
|
||||
highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT')
|
||||
if highlight:
|
||||
s = s.highlight_options(tags_schema='styled')
|
||||
s = s.highlight("display_name")
|
||||
|
||||
# Return query, uncomment print line to debug query
|
||||
from pprint import pprint
|
||||
print(json.dumps(s.to_dict()))
|
||||
return s.execute()
|
||||
|
||||
|
||||
def search_db(term='', user=None, sort='id', order='desc', category='0_0',
|
||||
quality_filter='0', page=1, rss=False, admin=False,
|
||||
logged_in_user=None, per_page=75):
|
||||
sort_keys = {
|
||||
'id': models.Torrent.id,
|
||||
'size': models.Torrent.filesize,
|
||||
# 'name': models.Torrent.display_name, # Disable this because we disabled this in search_elastic, for the sake of consistency
|
||||
'seeders': models.Statistic.seed_count,
|
||||
'leechers': models.Statistic.leech_count,
|
||||
'downloads': models.Statistic.download_count
|
||||
}
|
||||
|
||||
sort_ = sort.lower()
|
||||
if sort_ not in sort_keys:
|
||||
flask.abort(400)
|
||||
sort = sort_keys[sort]
|
||||
|
||||
order_keys = {
|
||||
'desc': 'desc',
|
||||
'asc': 'asc'
|
||||
}
|
||||
|
||||
order_ = order.lower()
|
||||
if order_ not in order_keys:
|
||||
flask.abort(400)
|
||||
|
||||
filter_keys = {
|
||||
'0': None,
|
||||
'1': (models.TorrentFlags.REMAKE, False),
|
||||
'2': (models.TorrentFlags.TRUSTED, True),
|
||||
'3': (models.TorrentFlags.COMPLETE, True)
|
||||
}
|
||||
|
||||
sentinel = object()
|
||||
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
|
||||
if filter_tuple is sentinel:
|
||||
flask.abort(400)
|
||||
|
||||
if user:
|
||||
user = models.User.by_id(user)
|
||||
if not user:
|
||||
flask.abort(404)
|
||||
user = user.id
|
||||
|
||||
main_category = None
|
||||
sub_category = None
|
||||
main_cat_id = 0
|
||||
sub_cat_id = 0
|
||||
if category:
|
||||
cat_match = re.match(r'^(\d+)_(\d+)$', category)
|
||||
if not cat_match:
|
||||
flask.abort(400)
|
||||
|
||||
main_cat_id = int(cat_match.group(1))
|
||||
sub_cat_id = int(cat_match.group(2))
|
||||
|
||||
if main_cat_id > 0:
|
||||
if sub_cat_id > 0:
|
||||
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
|
||||
else:
|
||||
main_category = models.MainCategory.by_id(main_cat_id)
|
||||
|
||||
if not category:
|
||||
flask.abort(400)
|
||||
|
||||
# Force sort by id desc if rss
|
||||
if rss:
|
||||
sort = sort_keys['id']
|
||||
order = 'desc'
|
||||
|
||||
same_user = False
|
||||
if logged_in_user:
|
||||
same_user = logged_in_user.id == user
|
||||
|
||||
if term:
|
||||
query = db.session.query(models.TorrentNameSearch)
|
||||
else:
|
||||
query = models.Torrent.query
|
||||
|
||||
# User view (/user/username)
|
||||
if user:
|
||||
query = query.filter(models.Torrent.uploader_id == user)
|
||||
|
||||
if not admin:
|
||||
# Hide all DELETED torrents if regular user
|
||||
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
|
||||
# If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous
|
||||
# If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones
|
||||
# On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what
|
||||
if not same_user or rss:
|
||||
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN |
|
||||
models.TorrentFlags.ANONYMOUS)).is_(False))
|
||||
# General view (homepage, general search view)
|
||||
else:
|
||||
if not admin:
|
||||
# Hide all DELETED torrents if regular user
|
||||
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
|
||||
# If logged in, show all torrents that aren't hidden unless they belong to you
|
||||
# On RSS pages, show all public torrents and nothing more.
|
||||
if logged_in_user and not rss:
|
||||
query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
|
||||
(models.Torrent.uploader_id == logged_in_user.id))
|
||||
# Otherwise, show all torrents that aren't hidden
|
||||
else:
|
||||
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False))
|
||||
|
||||
if main_category:
|
||||
query = query.filter(models.Torrent.main_category_id == main_cat_id)
|
||||
elif sub_category:
|
||||
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
|
||||
(models.Torrent.sub_category_id == sub_cat_id))
|
||||
|
||||
if filter_tuple:
|
||||
query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
|
||||
|
||||
if term:
|
||||
for item in shlex.split(term, posix=False):
|
||||
if len(item) >= 2:
|
||||
query = query.filter(FullTextSearch(
|
||||
item, models.TorrentNameSearch, FullTextMode.NATURAL))
|
||||
|
||||
# Sort and order
|
||||
if sort.class_ != models.Torrent:
|
||||
query = query.join(sort.class_)
|
||||
|
||||
query = query.order_by(getattr(sort, order)())
|
||||
|
||||
if rss:
|
||||
query = query.limit(per_page)
|
||||
else:
|
||||
query = query.paginate_faste(page, per_page=per_page, step=5)
|
||||
|
||||
return query
|
|
@ -4,20 +4,32 @@
|
|||
<description>RSS Feed for {{ term }}</description>
|
||||
<link>{{ url_for('home', _external=True) }}</link>
|
||||
<atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" />
|
||||
{% for torrent in query %}
|
||||
{% for torrent in torrent_query %}
|
||||
{% if torrent.has_torrent %}
|
||||
<item>
|
||||
<title>{{ torrent.display_name }}</title>
|
||||
{% if use_elastic %}
|
||||
<link>{{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }}</link>
|
||||
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
|
||||
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
|
||||
{% else %}
|
||||
<link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link>
|
||||
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
|
||||
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
|
||||
{% endif %}
|
||||
</item>
|
||||
{% else %}
|
||||
<item>
|
||||
<title>{{ torrent.display_name }}</title>
|
||||
{% if use_elastic %}
|
||||
<link>{{ torrent.info_hash }}</link>
|
||||
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
|
||||
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
|
||||
{% else %}
|
||||
<link>{{ torrent.magnet_uri }}</link>
|
||||
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
|
||||
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
|
||||
{% endif %}
|
||||
</item>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
{{ caller() }}
|
||||
</th>
|
||||
{% endmacro %}
|
||||
{% if torrent_query.hits.total > 0 %}
|
||||
{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
|
||||
<div class="table-responsive">
|
||||
<table class="table table-bordered table-hover table-striped torrent-list">
|
||||
<thead>
|
||||
|
@ -16,7 +16,7 @@
|
|||
{% call render_column_header("hdr-category", "width:80px;", center_text=True) %}
|
||||
<div>Category</div>
|
||||
{% endcall %}
|
||||
{% call render_column_header("hdr-name", "width:auto;", sort_key="name") %}
|
||||
{% call render_column_header("hdr-name", "width:auto;") %}
|
||||
<div>Name</div>
|
||||
{% endcall %}
|
||||
{% call render_column_header("hdr-link", "width:0;", center_text=True) %}
|
||||
|
@ -45,26 +45,46 @@
|
|||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for torrent in torrent_query %}
|
||||
{% set torrents = torrent_query if use_elastic else torrent_query.items %}
|
||||
{% for torrent in torrents %}
|
||||
<tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}">
|
||||
{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) %}
|
||||
{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) if use_elastic else (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %}
|
||||
{% set icon_dir = config.SITE_FLAVOR %}
|
||||
<td style="padding:0 4px;">
|
||||
{% if use_elastic %}
|
||||
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category_id }} - {{ torrent.sub_category_id }}">
|
||||
{% else %}
|
||||
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category.name }} - {{ torrent.sub_category.name }}">
|
||||
{% endif %}
|
||||
<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
|
||||
</a>
|
||||
</td>
|
||||
{% if use_elastic %}
|
||||
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}">{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}</a></td>
|
||||
{% else %}
|
||||
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}">{{ torrent.display_name | escape }}</a></td>
|
||||
{% endif %}
|
||||
<td style="white-space: nowrap;text-align: center;">
|
||||
{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
|
||||
<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>
|
||||
</td>
|
||||
<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
|
||||
{% if use_elastic %}
|
||||
<td class="text-center" {#data-timestamp="{{ torrent.created_time|int }}"#}>{{ torrent.created_time }}</td>
|
||||
{% else %}
|
||||
<td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td>
|
||||
{% endif %}
|
||||
|
||||
{% if config.ENABLE_SHOW_STATS %}
|
||||
{% if use_elastic %}
|
||||
<td class="text-center" style="color: green;">{{ torrent.seed_count }}</td>
|
||||
<td class="text-center" style="color: red;">{{ torrent.leech_count }}</td>
|
||||
<td class="text-center">{{ torrent.download_count }}</td>
|
||||
{% else %}
|
||||
<td class="text-center" style="color: green;">{{ torrent.stats.seed_count }}</td>
|
||||
<td class="text-center" style="color: red;">{{ torrent.stats.leech_count }}</td>
|
||||
<td class="text-center">{{ torrent.stats.download_count }}</td>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
@ -75,9 +95,12 @@
|
|||
<h3>No results found</h3>
|
||||
{% endif %}
|
||||
|
||||
{#
|
||||
<center>
|
||||
{% if use_elastic %}
|
||||
{{ pagination.info }}
|
||||
{{ pagination.links }}
|
||||
{% else %}
|
||||
{% from "bootstrap/pagination.html" import render_pagination %}
|
||||
{{ render_pagination(torrent_query) }}
|
||||
{% endif %}
|
||||
</center>
|
||||
#}
|
||||
|
|
|
@ -32,7 +32,9 @@ visitor==0.1.3
|
|||
webassets==0.12.1
|
||||
Werkzeug==0.12.1
|
||||
WTForms==2.1
|
||||
## The following requirements were added by pip freeze:
|
||||
## elasticsearch dependencies
|
||||
elasticsearch==5.3.0
|
||||
elasticsearch-dsl==5.2.0
|
||||
progressbar2==3.20.0
|
||||
progressbar2==3.20.0
|
||||
mysql-replication==0.13
|
||||
flask-paginate==0.4.5
|
53
sync_es.py
53
sync_es.py
|
@ -40,7 +40,12 @@ log.setLevel(logging.INFO)
|
|||
#logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
|
||||
|
||||
# in prod want in /var/lib somewhere probably
|
||||
SAVE_LOC = "/tmp/sync_es_position.json"
|
||||
SAVE_LOC = "/var/lib/sync_es_position.json"
|
||||
MYSQL_HOST = '127.0.0.1'
|
||||
MYSQL_PORT = 3306
|
||||
MYSQL_USER = 'test'
|
||||
MYSQL_PW = 'test123'
|
||||
NT_DB = 'nyaav2'
|
||||
|
||||
with open(SAVE_LOC) as f:
|
||||
pos = json.load(f)
|
||||
|
@ -50,16 +55,16 @@ es = Elasticsearch()
|
|||
stream = BinLogStreamReader(
|
||||
# TODO parse out from config.py or something
|
||||
connection_settings = {
|
||||
'host': '127.0.0.1',
|
||||
'port': 13306,
|
||||
'user': 'root',
|
||||
'passwd': 'dunnolol'
|
||||
'host': MYSQL_HOST,
|
||||
'port': MYSQL_PORT,
|
||||
'user': MYSQL_USER,
|
||||
'passwd': MYSQL_PW
|
||||
},
|
||||
server_id=10, # arbitrary
|
||||
# only care about this table currently
|
||||
only_schemas=["nyaav2"],
|
||||
# TODO sukebei
|
||||
only_tables=["nyaa_torrents", "nyaa_statistics"],
|
||||
# only care about this database currently
|
||||
only_schemas=[NT_DB],
|
||||
# these tables in the database
|
||||
only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"],
|
||||
# from our save file
|
||||
resume_stream=True,
|
||||
log_file=pos['log_file'],
|
||||
|
@ -72,7 +77,7 @@ stream = BinLogStreamReader(
|
|||
# using aiomysql if anybody wants to revive that.
|
||||
blocking=True)
|
||||
|
||||
def reindex_torrent(t):
|
||||
def reindex_torrent(t, index_name):
|
||||
# XXX annoyingly different from import_to_es, and
|
||||
# you need to keep them in sync manually.
|
||||
f = t['flags']
|
||||
|
@ -103,14 +108,14 @@ def reindex_torrent(t):
|
|||
}
|
||||
# update, so we don't delete the stats if present
|
||||
es.update(
|
||||
index='nyaav2',
|
||||
index=index_name,
|
||||
doc_type='torrent',
|
||||
id=t['id'],
|
||||
body={"doc": doc, "doc_as_upsert": True})
|
||||
|
||||
def reindex_stats(s):
|
||||
def reindex_stats(s, index_name):
|
||||
es.update(
|
||||
index='nyaav2',
|
||||
index=index_name,
|
||||
doc_type='torrent',
|
||||
id=s['torrent_id'],
|
||||
body={
|
||||
|
@ -126,21 +131,29 @@ last_save = time.time()
|
|||
|
||||
for event in stream:
|
||||
for row in event.rows:
|
||||
if event.table == "nyaa_torrents":
|
||||
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
|
||||
if event.table == "nyaa_torrents":
|
||||
index_name = "nyaa"
|
||||
else:
|
||||
index_name = "sukebei"
|
||||
if type(event) is WriteRowsEvent:
|
||||
reindex_torrent(row['values'])
|
||||
reindex_torrent(row['values'], index_name)
|
||||
elif type(event) is UpdateRowsEvent:
|
||||
reindex_torrent(row['after_values'])
|
||||
reindex_torrent(row['after_values'], index_name)
|
||||
elif type(event) is DeleteRowsEvent:
|
||||
# just delete it
|
||||
es.delete(index='nyaav2', doc_type='torrent', id=row['values']['id'])
|
||||
es.delete(index=index_name, doc_type='torrent', id=row['values']['id'])
|
||||
else:
|
||||
raise Exception(f"unknown event {type(event)}")
|
||||
elif event.table == "nyaa_statistics":
|
||||
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
|
||||
if event.table == "nyaa_torrents":
|
||||
index_name = "nyaa"
|
||||
else:
|
||||
index_name = "sukebei"
|
||||
if type(event) is WriteRowsEvent:
|
||||
reindex_stats(row['values'])
|
||||
reindex_stats(row['values'], index_name)
|
||||
elif type(event) is UpdateRowsEvent:
|
||||
reindex_stats(row['after_values'])
|
||||
reindex_stats(row['after_values'], index_name)
|
||||
elif type(event) is DeleteRowsEvent:
|
||||
# uh ok. assume that the torrent row will get deleted later.
|
||||
pass
|
||||
|
|
Loading…
Reference in a new issue