1
0
Fork 0
mirror of https://gitlab.com/SIGBUS/nyaa.git synced 2024-12-22 08:49:59 +00:00

added user class display and editing

This commit is contained in:
martstern 2017-05-17 06:02:15 -04:00
commit 931b2b0b83
31 changed files with 1318 additions and 317 deletions

View file

@ -44,5 +44,43 @@
- Start the dev server with `python run.py`
- Deactivate `source deactivate`
# Enabling ElasticSearch
## Basics
- Install jdk `sudo apt-get install openjdk-8-jdk`
- Install elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html
- `sudo systemctl enable elasticsearch.service`
- `sudo systemctl start elasticsearch.service`
- Run `curl -XGET 'localhost:9200'` and make sure ES is running
- Optional: install Kabana as a search frontend for ES
## Enable MySQL Binlogging
- Add the `[mariadb]` bin-log section to my.cnf and reload mysql server
- Connect to mysql
- `SHOW VARIABLES LIKE 'binlog_format';`
- Make sure it shows ROW
- Connect to root user
- `GRANT REPLICATION SLAVE ON *.* TO 'test'@'localhost';` where test is the user you will be running `sync_es.py` with
## Setting up ES
- Run `./create_es.sh` and this creates two indicies: `nyaa` and `sukebei`
- The output should show `akncolwedged: true` twice
- The safest bet is to disable the webapp here to ensure there's no database writes
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `nyaa`
- Run `python import_to_es.py` with `SITE_FLAVOR` set to `sukebei`
- These will take some time to run as it's indexing
## Setting up sync_es.py
- Sync_es.py keeps the ElasticSearch index updated by reading the BinLog
- Configure the MySQL options with the user where you granted the REPLICATION permissions
- Connect to MySQL, run `SHOW MASTER STATUS;`.
- Copy the output to `/var/lib/sync_es_position.json` with the contents `{"log_file": "FILE", "log_pos": POSITION}` and replace FILENAME with File (something like master1-bin.000002) in the SQL output and POSITION (something like 892528513) with Position
- Set up `sync_es.py` as a service and run it, preferably as the system/root
- Make sure `sync_es.py` runs within venv with the right dependencies
## Good to go!
- After that, enable the `USE_ELASTIC_SEARCH` flag and restart the webapp and you're good to go
## Code Quality:
- Remember to follow PEP8 style guidelines and run `./lint.sh` before committing.

View file

@ -33,8 +33,6 @@ MAIL_FROM_ADDRESS = '***'
SMTP_USERNAME = '***'
SMTP_PASSWORD = '***'
RESULTS_PER_PAGE = 75
# What the site identifies itself as.
SITE_NAME = 'Nyaa'
@ -49,3 +47,14 @@ ENFORCE_MAIN_ANNOUNCE_URL = False
MAIN_ANNOUNCE_URL = ''
BACKUP_TORRENT_FOLDER = 'torrents'
#
# Search Options
#
# Max ES search results, do not set over 10000
RESULTS_PER_PAGE = 75
USE_ELASTIC_SEARCH = False
ENABLE_ELASTIC_SEARCH_HIGHLIGHT = False
ES_MAX_SEARCH_RESULT = 1000
ES_INDEX_NAME = SITE_FLAVOR # we create indicies named nyaa or sukebei

View file

@ -4,3 +4,9 @@ ft_min_word_len=2
innodb_ft_cache_size = 80000000
innodb_ft_total_cache_size = 1600000000
max_allowed_packet = 100M
[mariadb]
log-bin
server_id=1
log-basename=master1
binlog-format = row

5
create_es.sh Executable file
View file

@ -0,0 +1,5 @@
#!/usr/bin/env bash
# create indicies named "nyaa" and "sukebei", these are hardcoded
curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml
curl -v -XPUT 'localhost:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml

91
es_mapping.yml Normal file
View file

@ -0,0 +1,91 @@
---
# CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml
# fo inline comments.
settings:
analysis:
analyzer:
my_search_analyzer:
type: custom
tokenizer: standard
char_filter:
- my_char_filter
filter:
- standard
- lowercase
my_index_analyzer:
type: custom
tokenizer: standard
char_filter:
- my_char_filter
filter:
- lowercase
- my_ngram
filter:
my_ngram:
type: edgeNGram
min_gram: 1
max_gram: 15
char_filter:
my_char_filter:
type: mapping
mappings: ["-=>_", "!=>_"]
index:
# we're running a single es node, so no sharding necessary,
# plus replicas don't really help either.
number_of_shards: 1
number_of_replicas : 0
mapper:
# disable elasticsearch's "helpful" autoschema
dynamic: false
# since we disabled the _all field, default query the
# name of the torrent.
query:
default_field: display_name
mappings:
torrent:
# don't want everything concatenated
_all:
enabled: false
properties:
id:
type: long
display_name:
# TODO could do a fancier tokenizer here to parse out the
# the scene convention of stuff in brackets, plus stuff like k-on
type: text
analyzer: my_index_analyzer
fielddata: true
created_time:
type: date
# Only in the ES index for generating magnet links
info_hash:
enabled: false
filesize:
type: long
anonymous:
type: boolean
trusted:
type: boolean
remake:
type: boolean
complete:
type: boolean
hidden:
type: boolean
deleted:
type: boolean
has_torrent:
type: boolean
download_count:
type: long
leech_count:
type: long
seed_count:
type: long
# these ids are really only for filtering, thus keyword
uploader_id:
type: keyword
main_category_id:
type: keyword
sub_category_id:
type: keyword

100
import_to_es.py Normal file
View file

@ -0,0 +1,100 @@
#!/usr/bin/env python
"""
Bulk load torents from mysql into elasticsearch `nyaav2` index,
which is assumed to already exist.
This is a one-shot deal, so you'd either need to complement it
with a cron job or some binlog-reading thing (TODO)
"""
from nyaa import app
from nyaa.models import Torrent
from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch import helpers
import progressbar
import sys
bar = progressbar.ProgressBar(
max_value=Torrent.query.count(),
widgets=[
progressbar.SimpleProgress(),
' [', progressbar.Timer(), '] ',
progressbar.Bar(),
' (', progressbar.ETA(), ') ',
])
es = Elasticsearch(timeout=30)
ic = IndicesClient(es)
# turn into thing that elasticsearch indexes. We flatten in
# the stats (seeders/leechers) so we can order by them in es naturally.
# we _don't_ dereference uploader_id to the user's display name however,
# instead doing that at query time. I _think_ this is right because
# we don't want to reindex all the user's torrents just because they
# changed their name, and we don't really want to FTS search on the user anyway.
# Maybe it's more convenient to derefence though.
def mk_es(t):
return {
"_id": t.id,
"_type": "torrent",
"_index": app.config['ES_INDEX_NAME'],
"_source": {
# we're also indexing the id as a number so you can
# order by it. seems like this is just equivalent to
# order by created_time, but oh well
"id": t.id,
"display_name": t.display_name,
"created_time": t.created_time,
# not analyzed but included so we can render magnet links
# without querying sql again.
"info_hash": t.info_hash.hex(),
"filesize": t.filesize,
"uploader_id": t.uploader_id,
"main_category_id": t.main_category_id,
"sub_category_id": t.sub_category_id,
# XXX all the bitflags are numbers
"anonymous": bool(t.anonymous),
"trusted": bool(t.trusted),
"remake": bool(t.remake),
"complete": bool(t.complete),
# TODO instead of indexing and filtering later
# could delete from es entirely. Probably won't matter
# for at least a few months.
"hidden": bool(t.hidden),
"deleted": bool(t.deleted),
"has_torrent": t.has_torrent,
# Stats
"download_count": t.stats.download_count,
"leech_count": t.stats.leech_count,
"seed_count": t.stats.seed_count,
}
}
# page through an sqlalchemy query, like the per_fetch but
# doesn't break the eager joins its doing against the stats table.
# annoying that this isn't built in somehow.
def page_query(query, limit=sys.maxsize, batch_size=10000):
start = 0
while True:
# XXX very inelegant way to do this, i'm confus
stop = min(limit, start + batch_size)
if stop == start:
break
things = query.slice(start, stop)
if not things:
break
had_things = False
for thing in things:
had_things = True
yield(thing)
if not had_things or stop == limit:
break
bar.update(start)
start = min(limit, start + batch_size)
# turn off refreshes while bulk loading
ic.put_settings(body={'index': {'refresh_interval': '-1'}}, index=app.config['ES_INDEX_NAME'])
helpers.bulk(es, (mk_es(t) for t in page_query(Torrent.query)), chunk_size=10000)
# restore to near-enough real time
ic.put_settings(body={'index': {'refresh_interval': '30s'}}, index=app.config['ES_INDEX_NAME'])

View file

@ -60,4 +60,4 @@ assets = Environment(app)
# output='style.css', depends='**/*.scss')
# assets.register('style_all', css)
from nyaa import routes
from nyaa import routes # noqa

View file

@ -10,7 +10,7 @@ from orderedset import OrderedSet
from werkzeug import secure_filename
DEBUG_API = False
#################################### API ROUTES ####################################
# #################################### API ROUTES ####################################
CATEGORIES = [
('Anime', ['Anime Music Video', 'English-translated', 'Non-English-translated', 'Raw']),
('Audio', ['Lossless', 'Lossy']),
@ -30,7 +30,7 @@ def validate_main_sub_cat(main_cat_name, sub_cat_name):
cat_id = main_cat.id_as_string
sub_cat_id = sub_cat.id_as_string
cat_sub_cat = sub_cat_id.split('_')
#print('cat: {0} sub_cat: {1}'.format(cat_sub_cat[0], cat_sub_cat[1]))
# print('cat: {0} sub_cat: {1}'.format(cat_sub_cat[0], cat_sub_cat[1]))
return True, cat_sub_cat[0], cat_sub_cat[1]
@ -112,17 +112,22 @@ def api_upload(upload_request):
if DEBUG_API:
print(json.dumps(j, indent=4))
_json_keys = ['username', 'password',
'display_name', 'main_cat', 'sub_cat', 'flags'] # 'information' and 'description' are not required
_json_keys = ['username',
'password',
'display_name',
'main_cat',
'sub_cat',
'flags'] # 'information' and 'description' are not required
# Check that required fields are present
for _k in _json_keys:
if _k not in j.keys():
return flask.make_response(flask.jsonify({"Error": "Missing JSON field: {0}.".format(_k)}), 400)
return flask.make_response(flask.jsonify(
{"Error": "Missing JSON field: {0}.".format(_k)}), 400)
# Check that no extra fields are present
for k in j.keys():
if k not in ['username', 'password',
'display_name', 'main_cat', 'sub_cat', 'information', 'description', 'flags']:
return flask.make_response(flask.jsonify({"Error": "Incorrect JSON field(s)."}), 400)
if k not in set(_json_keys + ['information', 'description']):
return flask.make_response(flask.jsonify(
{"Error": "Incorrect JSON field(s)."}), 400)
else:
return flask.make_response(flask.jsonify({"Error": "No metadata."}), 400)
if 'torrent' in upload_request.files:
@ -143,14 +148,17 @@ def api_upload(upload_request):
if not user:
user = models.User.by_email(username)
if not user or password != user.password_hash or user.status == models.UserStatusType.INACTIVE:
return flask.make_response(flask.jsonify({"Error": "Incorrect username or password"}), 403)
if (not user or password != user.password_hash
or user.status == models.UserStatusType.INACTIVE):
return flask.make_response(flask.jsonify(
{"Error": "Incorrect username or password"}), 403)
current_user = user
display_name = j['display_name']
if (len(display_name) < 3) or (len(display_name) > 1024):
return flask.make_response(flask.jsonify({"Error": "Torrent name must be between 3 and 1024 characters."}), 400)
return flask.make_response(flask.jsonify(
{"Error": "Torrent name must be between 3 and 1024 characters."}), 400)
main_cat_name = j['main_cat']
sub_cat_name = j['sub_cat']
@ -158,14 +166,16 @@ def api_upload(upload_request):
cat_subcat_status, cat_id, sub_cat_id = validate_main_sub_cat(
main_cat_name, sub_cat_name)
if not cat_subcat_status:
return flask.make_response(flask.jsonify({"Error": "Incorrect Category / Sub-Category."}), 400)
return flask.make_response(flask.jsonify(
{"Error": "Incorrect Category / Sub-Category."}), 400)
# TODO Sanitize information
information = None
try:
information = j['information']
if len(information) > 255:
return flask.make_response(flask.jsonify({"Error": "Information is limited to 255 characters."}), 400)
return flask.make_response(flask.jsonify(
{"Error": "Information is limited to 255 characters."}), 400)
except Exception as e:
information = ''
@ -173,8 +183,10 @@ def api_upload(upload_request):
description = None
try:
description = j['description']
if len(description) > (10 * 1024):
return flask.make_response(flask.jsonify({"Error": "Description is limited to {0} characters.".format(10 * 1024)}), 403)
limit = 10 * 1024
if len(description) > limit:
return flask.make_response(flask.jsonify(
{"Error": "Description is limited to {0} characters.".format(limit)}), 403)
except Exception as e:
description = ''
@ -182,13 +194,15 @@ def api_upload(upload_request):
if v_flags:
torrent_flags = j['flags']
else:
return flask.make_response(flask.jsonify({"Error": "Incorrect torrent flags."}), 400)
return flask.make_response(flask.jsonify(
{"Error": "Incorrect torrent flags."}), 400)
torrent_status, torrent_data = validate_torrent_file(
torrent_file.filename, torrent_file.read()) # Needs validation
if not torrent_status:
return flask.make_response(flask.jsonify({"Error": "Invalid or Duplicate torrent file."}), 400)
return flask.make_response(flask.jsonify(
{"Error": "Invalid or Duplicate torrent file."}), 400)
# The torrent has been validated and is safe to access with ['foo'] etc - all relevant
# keys and values have been checked for (see UploadForm in forms.py for details)
@ -297,21 +311,24 @@ def api_upload(upload_request):
# Store tracker refs in DB
for order, tracker in enumerate(db_trackers):
torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id,
tracker_id=tracker.id, order=order)
tracker_id=tracker.id, order=order)
db.session.add(torrent_tracker)
db.session.commit()
if app.config.get('BACKUP_TORRENT_FOLDER'):
torrent_file.seek(0, 0)
torrent_path = os.path.join(app.config['BACKUP_TORRENT_FOLDER'], '{}.{}'.format(torrent.id, secure_filename(torrent_file.filename)))
torrent_path = os.path.join(app.config['BACKUP_TORRENT_FOLDER'], '{}.{}'.format(
torrent.id, secure_filename(torrent_file.filename)))
torrent_file.save(torrent_path)
torrent_file.close()
#print('Success? {0}'.format(torrent.id))
return flask.make_response(flask.jsonify({"Success": "Request was processed {0}".format(torrent.id)}), 200)
# print('Success? {0}'.format(torrent.id))
return flask.make_response(flask.jsonify(
{"Success": "Request was processed {0}".format(torrent.id)}), 200)
except Exception as e:
print('Exception: {0}'.format(e))
return flask.make_response(flask.jsonify({"Error": "Incorrect JSON. Please see HELP page for examples."}), 400)
return flask.make_response(flask.jsonify(
{"Error": "Incorrect JSON. Please see HELP page for examples."}), 400)
else:
return flask.make_response(flask.jsonify({"Error": "Bad request"}), 400)

View file

@ -72,7 +72,8 @@ def handle_torrent_upload(upload_form, uploading_user=None):
models.UserLevelType.TRUSTED) if uploading_user else False
# Set category ids
torrent.main_category_id, torrent.sub_category_id = upload_form.category.parsed_data.get_category_ids()
torrent.main_category_id, torrent.sub_category_id = \
upload_form.category.parsed_data.get_category_ids()
# print('Main cat id: {0}, Sub cat id: {1}'.format(
# torrent.main_category_id, torrent.sub_category_id))
@ -142,7 +143,7 @@ def handle_torrent_upload(upload_form, uploading_user=None):
# Store tracker refs in DB
for order, tracker in enumerate(db_trackers):
torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id,
tracker_id=tracker.id, order=order)
tracker_id=tracker.id, order=order)
db.session.add(torrent_tracker)
db.session.commit()
@ -156,8 +157,9 @@ def handle_torrent_upload(upload_form, uploading_user=None):
if not os.path.exists(torrent_dir):
os.makedirs(torrent_dir)
torrent_path = os.path.join(torrent_dir, '{}.{}'.format(torrent.id, secure_filename(torrent_file.filename)))
torrent_path = os.path.join(torrent_dir, '{}.{}'.format(
torrent.id, secure_filename(torrent_file.filename)))
torrent_file.save(torrent_path)
torrent_file.close()
return torrent
return torrent

View file

@ -1,6 +1,7 @@
from flask_sqlalchemy import Pagination, BaseQuery
from flask import abort
def paginate_faste(self, page=1, per_page=50, max_page=None, step=5):
if page < 1:
abort(404)
@ -25,4 +26,5 @@ def paginate_faste(self, page=1, per_page=50, max_page=None, step=5):
return Pagination(self, page, per_page, total, items)
BaseQuery.paginate_faste = paginate_faste

View file

@ -72,23 +72,23 @@ class RegisterForm(FlaskForm):
class ProfileForm(FlaskForm):
email = TextField('New email address', [
email = TextField('New Email Address', [
Email(),
Optional(),
Length(min=5, max=128),
Unique(User, User.email, 'Email is taken')
Unique(User, User.email, 'This email address has been taken')
])
current_password = PasswordField('Current password', [Optional()])
current_password = PasswordField('Current Password', [Required()])
new_password = PasswordField('New password (confirm)', [
new_password = PasswordField('New Password', [
Optional(),
EqualTo('password_confirm', message='Passwords must match'),
EqualTo('password_confirm', message='Two passwords must match'),
Length(min=6, max=1024,
message='Password must be at least %(min)d characters long.')
])
password_confirm = PasswordField('Repeat Password')
password_confirm = PasswordField('Repeat New Password')
# Classes for a SelectField that can be set to disable options (id, name, disabled)
@ -126,7 +126,8 @@ class DisabledSelectField(SelectField):
class EditForm(FlaskForm):
display_name = TextField('Torrent display name', [
Length(min=3, max=255,
message='Torrent display name must be at least %(min)d characters long and %(max)d at most.')
message='Torrent display name must be at least %(min)d characters long '
'and %(max)d at most.')
])
category = DisabledSelectField('Category')
@ -172,7 +173,8 @@ class UploadForm(FlaskForm):
display_name = TextField('Torrent display name (optional)', [
Optional(),
Length(min=3, max=255,
message='Torrent display name must be at least %(min)d characters long and %(max)d at most.')
message='Torrent display name must be at least %(min)d characters long and '
'%(max)d at most.')
])
# category = SelectField('Category')
@ -209,7 +211,7 @@ class UploadForm(FlaskForm):
# Decode and ensure data is bencoded data
try:
torrent_dict = bencode.decode(field.data)
#field.data.close()
# field.data.close()
except (bencode.MalformedBencodeException, UnicodeError):
raise ValidationError('Malformed torrent file')
@ -221,7 +223,6 @@ class UploadForm(FlaskForm):
except AssertionError as e:
raise ValidationError('Malformed torrent metadata ({})'.format(e.args[0]))
site_tracker = app.config.get('MAIN_ANNOUNCE_URL')
ensure_tracker = app.config.get('ENFORCE_MAIN_ANNOUNCE_URL')
@ -233,11 +234,12 @@ class UploadForm(FlaskForm):
# Ensure private torrents are using our tracker
if torrent_dict['info'].get('private') == 1:
if torrent_dict['announce'].decode('utf-8') != site_tracker:
raise ValidationError('Private torrent: please set {} as the main tracker'.format(site_tracker))
raise ValidationError(
'Private torrent: please set {} as the main tracker'.format(site_tracker))
elif ensure_tracker and not tracker_found:
raise ValidationError('Please include {} in the trackers of the torrent'.format(site_tracker))
raise ValidationError(
'Please include {} in the trackers of the torrent'.format(site_tracker))
# Note! bencode will sort dict keys, as per the spec
# This may result in a different hash if the uploaded torrent does not match the
@ -274,11 +276,13 @@ class TorrentFileData(object):
# https://wiki.theory.org/BitTorrentSpecification#Metainfo_File_Structure
def _validate_trackers(torrent_dict, tracker_to_check_for=None):
announce = torrent_dict.get('announce')
announce_string = _validate_bytes(announce, 'announce', 'utf-8')
tracker_found = tracker_to_check_for and (announce_string.lower() == tracker_to_check_for.lower()) or False
tracker_found = tracker_to_check_for and (
announce_string.lower() == tracker_to_check_for.lower()) or False
announce_list = torrent_dict.get('announce-list')
if announce_list is not None:

View file

@ -41,8 +41,10 @@ class TorrentFlags(IntEnum):
COMPLETE = 16
DELETED = 32
DB_TABLE_PREFIX = app.config['TABLE_PREFIX']
class Torrent(db.Model):
__tablename__ = DB_TABLE_PREFIX + 'torrents'
@ -83,8 +85,9 @@ class Torrent(db.Model):
main_category = db.relationship('MainCategory', uselist=False,
back_populates='torrents', lazy="joined")
sub_category = db.relationship('SubCategory', uselist=False, backref='torrents', lazy="joined",
primaryjoin="and_(SubCategory.id == foreign(Torrent.sub_category_id), "
"SubCategory.main_category_id == Torrent.main_category_id)")
primaryjoin=(
"and_(SubCategory.id == foreign(Torrent.sub_category_id), "
"SubCategory.main_category_id == Torrent.main_category_id)"))
info = db.relationship('TorrentInfo', uselist=False, back_populates='torrent')
filelist = db.relationship('TorrentFilelist', uselist=False, back_populates='torrent')
stats = db.relationship('Statistic', uselist=False, back_populates='torrent', lazy='joined')
@ -118,7 +121,6 @@ class Torrent(db.Model):
# Escaped
return escape_markup(self.information)
@property
def magnet_uri(self):
return create_magnet(self)
@ -224,7 +226,8 @@ class Trackers(db.Model):
__tablename__ = 'trackers'
id = db.Column(db.Integer, primary_key=True)
uri = db.Column(db.String(length=255, collation=COL_UTF8_GENERAL_CI), nullable=False, unique=True)
uri = db.Column(db.String(length=255, collation=COL_UTF8_GENERAL_CI),
nullable=False, unique=True)
disabled = db.Column(db.Boolean, nullable=False, default=False)
@classmethod
@ -235,8 +238,10 @@ class Trackers(db.Model):
class TorrentTrackers(db.Model):
__tablename__ = DB_TABLE_PREFIX + 'torrent_trackers'
torrent_id = db.Column(db.Integer, db.ForeignKey(DB_TABLE_PREFIX + 'torrents.id', ondelete="CASCADE"), primary_key=True)
tracker_id = db.Column(db.Integer, db.ForeignKey('trackers.id', ondelete="CASCADE"), primary_key=True)
torrent_id = db.Column(db.Integer, db.ForeignKey(
DB_TABLE_PREFIX + 'torrents.id', ondelete="CASCADE"), primary_key=True)
tracker_id = db.Column(db.Integer, db.ForeignKey(
'trackers.id', ondelete="CASCADE"), primary_key=True)
order = db.Column(db.Integer, nullable=False, index=True)
tracker = db.relationship('Trackers', uselist=False, lazy='joined')

View file

@ -6,18 +6,16 @@ from nyaa import bencode, utils
from nyaa import torrents
from nyaa import backend
from nyaa import api_handler
from nyaa.search import search_elastic, search_db
import config
import json
import re
from datetime import datetime, timedelta
import ipaddress
import os.path
import base64
from urllib.parse import quote
import sqlalchemy_fulltext.modes as FullTextMode
from sqlalchemy_fulltext import FullTextSearch
import shlex
import math
from werkzeug import url_encode
from itsdangerous import URLSafeSerializer, BadSignature
@ -27,7 +25,15 @@ from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.utils import formatdate
from flask_paginate import Pagination
DEBUG_API = False
DEFAULT_MAX_SEARCH_RESULT = 1000
DEFAULT_PER_PAGE = 75
SERACH_PAGINATE_DISPLAY_MSG = ('Displaying results {start}-{end} out of {total} results.<br>\n'
'Please refine your search results if you can\'t find '
'what you were looking for.')
def redirect_url():
@ -48,144 +54,13 @@ def modify_query(**new_values):
return '{}?{}'.format(flask.request.path, url_encode(args))
@app.template_global()
def filter_truthy(input_list):
''' Jinja2 can't into list comprehension so this is for
the search_results.html template '''
return [item for item in input_list if item]
def search(term='', user=None, sort='id', order='desc', category='0_0', quality_filter='0', page=1, rss=False, admin=False):
sort_keys = {
'id': models.Torrent.id,
'size': models.Torrent.filesize,
'name': models.Torrent.display_name,
'seeders': models.Statistic.seed_count,
'leechers': models.Statistic.leech_count,
'downloads': models.Statistic.download_count
}
sort_ = sort.lower()
if sort_ not in sort_keys:
flask.abort(400)
sort = sort_keys[sort]
order_keys = {
'desc': 'desc',
'asc': 'asc'
}
order_ = order.lower()
if order_ not in order_keys:
flask.abort(400)
filter_keys = {
'0': None,
'1': (models.TorrentFlags.REMAKE, False),
'2': (models.TorrentFlags.TRUSTED, True),
'3': (models.TorrentFlags.COMPLETE, True)
}
sentinel = object()
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
if filter_tuple is sentinel:
flask.abort(400)
if user:
user = models.User.by_id(user)
if not user:
flask.abort(404)
user = user.id
main_category = None
sub_category = None
main_cat_id = 0
sub_cat_id = 0
if category:
cat_match = re.match(r'^(\d+)_(\d+)$', category)
if not cat_match:
flask.abort(400)
main_cat_id = int(cat_match.group(1))
sub_cat_id = int(cat_match.group(2))
if main_cat_id > 0:
if sub_cat_id > 0:
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
else:
main_category = models.MainCategory.by_id(main_cat_id)
if not category:
flask.abort(400)
# Force sort by id desc if rss
if rss:
sort = sort_keys['id']
order = 'desc'
same_user = False
if flask.g.user:
same_user = flask.g.user.id == user
if term:
query = db.session.query(models.TorrentNameSearch)
else:
query = models.Torrent.query
# User view (/user/username)
if user:
query = query.filter(models.Torrent.uploader_id == user)
if not admin:
# Hide all DELETED torrents if regular user
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
# If logged in user is not the same as the user being viewed, show only torrents that aren't hidden or anonymous
# If logged in user is the same as the user being viewed, show all torrents including hidden and anonymous ones
# On RSS pages in user view, show only torrents that aren't hidden or anonymous no matter what
if not same_user or rss:
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN |
models.TorrentFlags.ANONYMOUS)).is_(False))
# General view (homepage, general search view)
else:
if not admin:
# Hide all DELETED torrents if regular user
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.DELETED)).is_(False))
# If logged in, show all torrents that aren't hidden unless they belong to you
# On RSS pages, show all public torrents and nothing more.
if flask.g.user and not rss:
query = query.filter((models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
(models.Torrent.uploader_id == flask.g.user.id))
# Otherwise, show all torrents that aren't hidden
else:
query = query.filter(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False))
if main_category:
query = query.filter(models.Torrent.main_category_id == main_cat_id)
elif sub_category:
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
(models.Torrent.sub_category_id == sub_cat_id))
if filter_tuple:
query = query.filter(models.Torrent.flags.op('&')(int(filter_tuple[0])).is_(filter_tuple[1]))
if term:
for item in shlex.split(term, posix=False):
if len(item) >= 2:
query = query.filter(FullTextSearch(
item, models.TorrentNameSearch, FullTextMode.NATURAL))
# Sort and order
if sort.class_ != models.Torrent:
query = query.join(sort.class_)
query = query.order_by(getattr(sort, order)())
if rss:
query = query.limit(app.config['RESULTS_PER_PAGE'])
else:
query = query.paginate_faste(page, per_page=app.config['RESULTS_PER_PAGE'], step=5)
return query
@app.errorhandler(404)
def not_found(error):
@ -202,8 +77,7 @@ def before_request():
flask.g.user = user
if not 'timeout' in flask.session or flask.session['timeout'] < datetime.now():
print("hio")
if 'timeout' not in flask.session or flask.session['timeout'] < datetime.now():
flask.session['timeout'] = datetime.now() + timedelta(days=7)
flask.session.permanent = True
flask.session.modified = True
@ -225,21 +99,35 @@ def _generate_query_string(term, category, filter, user):
return params
@app.template_filter('utc_time')
def get_utc_timestamp(datetime_str):
''' Returns a UTC POSIX timestamp, as seconds '''
UTC_EPOCH = datetime.utcfromtimestamp(0)
return int((datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S') - UTC_EPOCH).total_seconds())
@app.template_filter('display_time')
def get_display_time(datetime_str):
return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d %H:%M')
@app.route('/rss', defaults={'rss': True})
@app.route('/', defaults={'rss': False})
def home(rss):
if flask.request.args.get('page') == 'rss':
rss = True
term = flask.request.args.get('q')
term = flask.request.args.get('q', flask.request.args.get('term'))
sort = flask.request.args.get('s')
order = flask.request.args.get('o')
category = flask.request.args.get('c')
quality_filter = flask.request.args.get('f')
user_name = flask.request.args.get('u')
page = flask.request.args.get('p')
if page:
page = int(page)
category = flask.request.args.get('c', flask.request.args.get('cats'))
quality_filter = flask.request.args.get('f', flask.request.args.get('filter'))
user_name = flask.request.args.get('u', flask.request.args.get('user'))
page = flask.request.args.get('p', flask.request.args.get('offset', 1, int), int)
per_page = app.config.get('RESULTS_PER_PAGE')
if not per_page:
per_page = DEFAULT_PER_PAGE
user_id = None
if user_name:
@ -249,33 +137,76 @@ def home(rss):
user_id = user.id
query_args = {
'term': term or '',
'user': user_id,
'sort': sort or 'id',
'order': order or 'desc',
'category': category or '0_0',
'quality_filter': quality_filter or '0',
'page': page or 1,
'rss': rss
'page': page,
'rss': rss,
'per_page': per_page
}
# God mode
if flask.g.user and flask.g.user.is_admin:
query_args['admin'] = True
if flask.g.user:
query_args['logged_in_user'] = flask.g.user
if flask.g.user.is_admin: # God mode
query_args['admin'] = True
query = search(**query_args)
# If searching, we get results from elastic search
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
if use_elastic and term:
query_args['term'] = term
if rss:
return render_rss('/', query)
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
if not max_search_results:
max_search_results = DEFAULT_MAX_SEARCH_RESULT
# Only allow up to (max_search_results / page) pages
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page))))
query_args['page'] = max_page
query_args['max_search_results'] = max_search_results
query_results = search_elastic(**query_args)
if rss:
return render_rss('/', query_results, use_elastic=True)
else:
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
max_results = min(max_search_results, query_results['hits']['total'])
# change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=per_page,
total=max_results, bs_version=3, page_parameter='p',
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
return flask.render_template('home.html',
use_elastic=True,
pagination=pagination,
torrent_query=query_results,
search=query_args,
rss_filter=rss_query_string)
else:
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
return flask.render_template('home.html',
torrent_query=query,
search=query_args,
rss_filter=rss_query_string)
# If ES is enabled, default to db search for browsing
if use_elastic:
query_args['term'] = ''
else: # Otherwise, use db search for everything
query_args['term'] = term or ''
query = search_db(**query_args)
if rss:
return render_rss('/', query, use_elastic=False)
else:
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
# Use elastic is always false here because we only hit this section
# if we're browsing without a search term (which means we default to DB)
# or if ES is disabled
return flask.render_template('home.html',
use_elastic=False,
torrent_query=query,
search=query_args,
rss_filter=rss_query_string)
@app.route('/user/<user_name>', methods=['GET', 'POST'])
@app.route('/user/<user_name>')
def view_user(user_name):
user = models.User.by_username(user_name)
@ -320,6 +251,10 @@ def view_user(user_name):
if page:
page = int(page)
per_page = app.config.get('RESULTS_PER_PAGE')
if not per_page:
per_page = DEFAULT_PER_PAGE
query_args = {
'term': term or '',
'user': user.id,
@ -328,27 +263,68 @@ def view_user(user_name):
'category': category or '0_0',
'quality_filter': quality_filter or '0',
'page': page or 1,
'rss': False
'rss': False,
'per_page': per_page
}
# God mode
if flask.g.user and flask.g.user.is_admin:
query_args['admin'] = True
query = search(**query_args)
if flask.g.user:
query_args['logged_in_user'] = flask.g.user
if flask.g.user.is_admin: # God mode
query_args['admin'] = True
# Use elastic search for term searching
rss_query_string = _generate_query_string(term, category, quality_filter, user_name)
use_elastic = app.config.get('USE_ELASTIC_SEARCH')
if use_elastic and term:
query_args['term'] = term
return flask.render_template('user.html',
form=form,
torrent_query=query,
search=query_args,
user=user,
user_page=True,
rss_filter=rss_query_string,
level=level,
admin=admin,
superadmin=superadmin)
max_search_results = app.config.get('ES_MAX_SEARCH_RESULT')
if not max_search_results:
max_search_results = DEFAULT_MAX_SEARCH_RESULT
# Only allow up to (max_search_results / page) pages
max_page = min(query_args['page'], int(math.ceil(max_search_results / float(per_page))))
query_args['page'] = max_page
query_args['max_search_results'] = max_search_results
query_results = search_elastic(**query_args)
max_results = min(max_search_results, query_results['hits']['total'])
# change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=per_page,
total=max_results, bs_version=3, page_parameter='p',
display_msg=SERACH_PAGINATE_DISPLAY_MSG)
return flask.render_template('user.html',
use_elastic=True,
pagination=pagination,
torrent_query=query_results,
search=query_args,
user=user,
user_page=True,
rss_filter=rss_query_string,
level=level,
admin=admin,
superadmin=superadmin,
form=form)
# Similar logic as home page
else:
if use_elastic:
query_args['term'] = ''
else:
query_args['term'] = term or ''
query = search_db(**query_args)
return flask.render_template('user.html',
use_elastic=False,
torrent_query=query,
search=query_args,
user=user,
user_page=True,
rss_filter=rss_query_string,
level=level,
admin=admin,
superadmin=superadmin,
form=form)
@app.template_filter('rfc822')
@ -356,19 +332,27 @@ def _jinja2_filter_rfc822(date, fmt=None):
return formatdate(float(date.strftime('%s')))
def render_rss(label, query):
@app.template_filter('rfc822_es')
def _jinja2_filter_rfc822(datestr, fmt=None):
return formatdate(float(datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%s')))
def render_rss(label, query, use_elastic):
rss_xml = flask.render_template('rss.xml',
use_elastic=use_elastic,
term=label,
site_url=flask.request.url_root,
query=query)
torrent_query=query)
response = flask.make_response(rss_xml)
response.headers['Content-Type'] = 'application/xml'
# Cache for an hour
response.headers['Cache-Control'] = 'max-age={}'.format(1*5*60)
return response
#@app.route('/about', methods=['GET'])
# @app.route('/about', methods=['GET'])
# def about():
# return flask.render_template('about.html')
# return flask.render_template('about.html')
@app.route('/login', methods=['GET', 'POST'])
@ -385,7 +369,8 @@ def login():
if not user:
user = models.User.by_email(username)
if not user or password != user.password_hash or user.status == models.UserStatusType.INACTIVE:
if (not user or password != user.password_hash
or user.status == models.UserStatusType.INACTIVE):
flask.flash(flask.Markup(
'<strong>Login failed!</strong> Incorrect username or password.'), 'danger')
return flask.redirect(flask.url_for('login'))
@ -463,25 +448,36 @@ def profile():
if flask.request.method == 'POST' and form.validate():
user = flask.g.user
new_email = form.email.data
new_email = form.email.data.strip()
new_password = form.new_password.data
if new_email:
# enforce password check on email change too
if form.current_password.data != user.password_hash:
flask.flash(flask.Markup(
'<strong>Email change failed!</strong> Incorrect password.'), 'danger')
return flask.redirect('/profile')
user.email = form.email.data
flask.flash(flask.Markup(
'<strong>Email successfully changed!</strong>'), 'info')
if new_password:
if form.current_password.data != user.password_hash:
flask.flash(flask.Markup(
'<strong>Password change failed!</strong> Incorrect password.'), 'danger')
return flask.redirect('/profile')
user.password_hash = form.new_password.data
flask.flash(flask.Markup(
'<strong>Password successfully changed!</strong>'), 'info')
db.session.add(user)
db.session.commit()
flask.g.user = user
return flask.redirect('/profile')
return flask.render_template('profile.html', form=form, level=level)
current_email = models.User.by_id(flask.g.user.id).email
return flask.render_template('profile.html', form=form, email=current_email, level=level)
@app.route('/user/activate/<payload>')
@ -572,7 +568,8 @@ def edit_torrent(torrent_id):
if flask.request.method == 'POST' and form.validate():
# Form has been sent, edit torrent with data.
torrent.main_category_id, torrent.sub_category_id = form.category.parsed_data.get_category_ids()
torrent.main_category_id, torrent.sub_category_id = \
form.category.parsed_data.get_category_ids()
torrent.display_name = (form.display_name.data or '').strip()
torrent.information = (form.information.data or '').strip()
torrent.description = (form.description.data or '').strip()
@ -585,6 +582,9 @@ def edit_torrent(torrent_id):
db.session.commit()
flask.flash(flask.Markup(
'Torrent has been successfully edited! Changes might take a few minutes to show up.'), 'info')
return flask.redirect('/view/' + str(torrent_id))
else:
# Setup form with pre-formatted form.
@ -599,7 +599,10 @@ def edit_torrent(torrent_id):
form.is_complete.data = torrent.complete
form.is_anonymous.data = torrent.anonymous
return flask.render_template('edit.html', form=form, torrent=torrent, admin=flask.g.user.is_admin)
return flask.render_template('edit.html',
form=form,
torrent=torrent,
admin=flask.g.user.is_admin)
@app.route('/view/<int:torrent_id>/magnet')
@ -651,8 +654,10 @@ def get_activation_link(user):
def send_verification_email(to_address, activ_link):
''' this is until we have our own mail server, obviously. This can be greatly cut down if on same machine.
probably can get rid of all but msg formatting/building, init line and sendmail line if local SMTP server '''
''' this is until we have our own mail server, obviously.
This can be greatly cut down if on same machine.
probably can get rid of all but msg formatting/building,
init line and sendmail line if local SMTP server '''
msg_body = 'Please click on: ' + activ_link + ' to activate your account.\n\n\nUnsubscribe:'
@ -679,7 +684,7 @@ def _create_user_class_choices():
return choices
#################################### STATIC PAGES ####################################
# #################################### STATIC PAGES ####################################
@app.route('/rules', methods=['GET'])
def site_rules():
return flask.render_template('rules.html')
@ -690,9 +695,9 @@ def site_help():
return flask.render_template('help.html')
#################################### API ROUTES ####################################
# #################################### API ROUTES ####################################
# DISABLED FOR NOW
@app.route('/api/upload', methods = ['POST'])
@app.route('/api/upload', methods=['POST'])
def api_upload():
api_response = api_handler.api_upload(flask.request)
return api_response

328
nyaa/search.py Normal file
View file

@ -0,0 +1,328 @@
import flask
import re
import math
import json
import shlex
from nyaa import app, db
from nyaa import models
import sqlalchemy_fulltext.modes as FullTextMode
from sqlalchemy_fulltext import FullTextSearch
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q
def search_elastic(term='', user=None, sort='id', order='desc',
category='0_0', quality_filter='0', page=1,
rss=False, admin=False, logged_in_user=None,
per_page=75, max_search_results=1000):
# This function can easily be memcached now
es_client = Elasticsearch()
es_sort_keys = {
'id': 'id',
'size': 'filesize',
# 'name': 'display_name', # This is slow and buggy
'seeders': 'seed_count',
'leechers': 'leech_count',
'downloads': 'download_count'
}
sort_ = sort.lower()
if sort_ not in es_sort_keys:
flask.abort(400)
es_sort = es_sort_keys[sort]
order_keys = {
'desc': 'desc',
'asc': 'asc'
}
order_ = order.lower()
if order_ not in order_keys:
flask.abort(400)
# Only allow ID, desc if RSS
if rss:
sort = es_sort_keys['id']
order = 'desc'
# funky, es sort is default asc, prefixed by '-' if desc
if 'desc' == order:
es_sort = '-' + es_sort
# Quality filter
quality_keys = [
'0', # Show all
'1', # No remakes
'2', # Only trusted
'3' # Only completed
]
if quality_filter.lower() not in quality_keys:
flask.abort(400)
quality_filter = int(quality_filter)
# Category filter
main_category = None
sub_category = None
main_cat_id = 0
sub_cat_id = 0
if category:
cat_match = re.match(r'^(\d+)_(\d+)$', category)
if not cat_match:
flask.abort(400)
main_cat_id = int(cat_match.group(1))
sub_cat_id = int(cat_match.group(2))
if main_cat_id > 0:
if sub_cat_id > 0:
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
if not sub_category:
flask.abort(400)
else:
main_category = models.MainCategory.by_id(main_cat_id)
if not main_category:
flask.abort(400)
# This might be useless since we validate users
# before coming into this method, but just to be safe...
if user:
user = models.User.by_id(user)
if not user:
flask.abort(404)
user = user.id
same_user = False
if logged_in_user:
same_user = user == logged_in_user.id
s = Search(using=es_client, index=app.config.get('ES_INDEX_NAME')) # todo, sukebei prefix
# Apply search term
if term:
s = s.query('simple_query_string',
analyzer='my_search_analyzer',
default_operator="AND",
query=term)
# User view (/user/username)
if user:
s = s.filter('term', uploader_id=user)
if not admin:
# Hide all DELETED torrents if regular user
s = s.filter('term', deleted=False)
# If logged in user is not the same as the user being viewed,
# show only torrents that aren't hidden or anonymous.
#
# If logged in user is the same as the user being viewed,
# show all torrents including hidden and anonymous ones.
#
# On RSS pages in user view, show only torrents that
# aren't hidden or anonymous no matter what
if not same_user or rss:
s = s.filter('term', hidden=False)
s = s.filter('term', anonymous=False)
# General view (homepage, general search view)
else:
if not admin:
# Hide all DELETED torrents if regular user
s = s.filter('term', deleted=False)
# If logged in, show all torrents that aren't hidden unless they belong to you
# On RSS pages, show all public torrents and nothing more.
if logged_in_user and not rss:
hiddenFilter = Q('term', hidden=False)
userFilter = Q('term', uploader_id=logged_in_user.id)
combinedFilter = hiddenFilter | userFilter
s = s.filter('bool', filter=[combinedFilter])
else:
s = s.filter('term', hidden=False)
if main_category:
s = s.filter('term', main_category_id=main_cat_id)
elif sub_category:
s = s.filter('term', main_category_id=main_cat_id)
s = s.filter('term', sub_category_id=sub_cat_id)
if quality_filter == 0:
pass
elif quality_filter == 1:
s = s.filter('term', remake=False)
elif quality_filter == 2:
s = s.filter('term', trusted=True)
elif quality_filter == 3:
s = s.filter('term', complete=True)
# Apply sort
s = s.sort(es_sort)
# Only show first RESULTS_PER_PAGE items for RSS
if rss:
s = s[0:per_page]
else:
max_page = min(page, int(math.ceil(max_search_results / float(per_page))))
from_idx = (max_page - 1) * per_page
to_idx = min(max_search_results, max_page * per_page)
s = s[from_idx:to_idx]
highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT')
if highlight:
s = s.highlight_options(tags_schema='styled')
s = s.highlight("display_name")
# Return query, uncomment print line to debug query
# from pprint import pprint
# print(json.dumps(s.to_dict()))
return s.execute()
def search_db(term='', user=None, sort='id', order='desc', category='0_0',
quality_filter='0', page=1, rss=False, admin=False,
logged_in_user=None, per_page=75):
sort_keys = {
'id': models.Torrent.id,
'size': models.Torrent.filesize,
# Disable this because we disabled this in search_elastic, for the sake of consistency:
# 'name': models.Torrent.display_name,
'seeders': models.Statistic.seed_count,
'leechers': models.Statistic.leech_count,
'downloads': models.Statistic.download_count
}
sort_ = sort.lower()
if sort_ not in sort_keys:
flask.abort(400)
sort = sort_keys[sort]
order_keys = {
'desc': 'desc',
'asc': 'asc'
}
order_ = order.lower()
if order_ not in order_keys:
flask.abort(400)
filter_keys = {
'0': None,
'1': (models.TorrentFlags.REMAKE, False),
'2': (models.TorrentFlags.TRUSTED, True),
'3': (models.TorrentFlags.COMPLETE, True)
}
sentinel = object()
filter_tuple = filter_keys.get(quality_filter.lower(), sentinel)
if filter_tuple is sentinel:
flask.abort(400)
if user:
user = models.User.by_id(user)
if not user:
flask.abort(404)
user = user.id
main_category = None
sub_category = None
main_cat_id = 0
sub_cat_id = 0
if category:
cat_match = re.match(r'^(\d+)_(\d+)$', category)
if not cat_match:
flask.abort(400)
main_cat_id = int(cat_match.group(1))
sub_cat_id = int(cat_match.group(2))
if main_cat_id > 0:
if sub_cat_id > 0:
sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
else:
main_category = models.MainCategory.by_id(main_cat_id)
if not category:
flask.abort(400)
# Force sort by id desc if rss
if rss:
sort = sort_keys['id']
order = 'desc'
same_user = False
if logged_in_user:
same_user = logged_in_user.id == user
if term:
query = db.session.query(models.TorrentNameSearch)
else:
query = models.Torrent.query
# User view (/user/username)
if user:
query = query.filter(models.Torrent.uploader_id == user)
if not admin:
# Hide all DELETED torrents if regular user
query = query.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.DELETED)).is_(False))
# If logged in user is not the same as the user being viewed,
# show only torrents that aren't hidden or anonymous
#
# If logged in user is the same as the user being viewed,
# show all torrents including hidden and anonymous ones
#
# On RSS pages in user view,
# show only torrents that aren't hidden or anonymous no matter what
if not same_user or rss:
query = query.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.HIDDEN | models.TorrentFlags.ANONYMOUS)).is_(False))
# General view (homepage, general search view)
else:
if not admin:
# Hide all DELETED torrents if regular user
query = query.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.DELETED)).is_(False))
# If logged in, show all torrents that aren't hidden unless they belong to you
# On RSS pages, show all public torrents and nothing more.
if logged_in_user and not rss:
query = query.filter(
(models.Torrent.flags.op('&')(int(models.TorrentFlags.HIDDEN)).is_(False)) |
(models.Torrent.uploader_id == logged_in_user.id))
# Otherwise, show all torrents that aren't hidden
else:
query = query.filter(models.Torrent.flags.op('&')(
int(models.TorrentFlags.HIDDEN)).is_(False))
if main_category:
query = query.filter(models.Torrent.main_category_id == main_cat_id)
elif sub_category:
query = query.filter((models.Torrent.main_category_id == main_cat_id) &
(models.Torrent.sub_category_id == sub_cat_id))
if filter_tuple:
query = query.filter(models.Torrent.flags.op('&')(
int(filter_tuple[0])).is_(filter_tuple[1]))
if term:
for item in shlex.split(term, posix=False):
if len(item) >= 2:
query = query.filter(FullTextSearch(
item, models.TorrentNameSearch, FullTextMode.NATURAL))
# Sort and order
if sort.class_ != models.Torrent:
query = query.join(sort.class_)
query = query.order_by(getattr(sort, order)())
if rss:
query = query.limit(per_page)
else:
query = query.paginate_faste(page, per_page=per_page, step=5)
return query

Binary file not shown.

View file

@ -97,4 +97,86 @@ table.torrent-list thead th.sorting_desc:after {
margin-left: 20px;
margin-bottom: 10px;
}
}
}
.search-container {
display: -webkit-box;
display: -ms-flexbox;
display: flex;
-webkit-box-orient: vertical;
-webkit-box-direction: normal;
-ms-flex-direction: column;
flex-direction: column;
}
.form-control.search-bar {
-webkit-box-ordinal-group: 2;
-ms-flex-order: 1;
order: 1;
width: 99%;
padding-right: 4em;
}
.search-btn {
-webkit-box-ordinal-group: 3;
-ms-flex-order: 2;
order: 2;
-ms-flex-item-align: end;
align-self: flex-end;
top: -34px;
height: 0;
width: auto;
z-index: 3;
}
#navFilter-criteria {
-webkit-box-ordinal-group: 4;
-ms-flex-order: 3;
order: 3;
}
#navFilter-category {
-webkit-box-ordinal-group: 5;
-ms-flex-order: 4;
order: 4;
}
.nav-filter {
width: 100%;
padding: 1em 0;
}
.bootstrap-select > button {
margin-top: 1em;
}
/* Allows the bootstrap selects on nav show outside the
collapsible section of the navigation */
.navbar-collapse.in {
overflow-y: visible;
}
@media (min-width: 991px) {
.search-btn {
top: 0;
width: auto;
}
.bootstrap-select > button {
margin-top: auto;
}
}
/* elasticsearch term highlight */
.hlt1 {
font-style: normal;
display: inline-block;
padding: 0 3px;
border-radius: 3px;
border: 1px solid rgba(100, 56, 0, 0.8);
background: rgba(200,127,0,0.3);
}
ul.nav-tabs#profileTabs {
margin-bottom: 15px;
}

View file

@ -105,8 +105,13 @@ document.addEventListener("DOMContentLoaded", function() {
var previewTabEl = markdownEditor.querySelector(previewTabSelector);
var targetEl = markdownEditor.querySelector(targetSelector);
var reader = new commonmark.Parser({safe: true});
var writer = new commonmark.HtmlRenderer({safe: true});
writer.softbreak = '<br />';
previewTabEl.addEventListener('click', function () {
targetEl.innerHTML = marked(sourceSelector.value.trim(), { sanitize: true, breaks:true });
var parsed = reader.parse(sourceSelector.value.trim());
targetEl.innerHTML = writer.render(parsed);
});
});
});

BIN
nyaa/static/pinned-tab.svg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

View file

@ -1,9 +1,10 @@
{% extends "layout.html" %}
{% block title %}Browse :: {{ config.SITE_NAME }}{% endblock %}
{% block title %}{% if search.term %}{{ search.term | e}}{% else %}Browse{% endif %} :: {{ config.SITE_NAME }}{% endblock %}
{% block body %}
<div class="alert alert-info">
<p><strong>Hello!</strong> This site is still a work in progress and new features (faster and actually more accurate search, comments etc.) will be added in the coming days.</p>
<p><strong>5/17 Update:</strong> We've added faster and more accurate search! In addition to your typical keyword search in both English and other languages, you can also now use powerful operators
like <kbd>clockwork planet -horrible</kbd> or <kbd>commie|horrible|cartel yowamushi</kbd> to search. For all supported operators, please visit <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html#_simple_query_string_syntax">here</a>. More features are coming soon!</p><br>
<p>We welcome you to provide feedback at <a href="irc://irc.rizon.net/nyaa-dev">#nyaa-dev@irc.rizon.net</a></p>
<p>Our GitHub: <a href="https://github.com/nyaadevs" target="_blank">https://github.com/nyaadevs</a> - creating <a href="https://github.com/nyaadevs/nyaa/issues">issues</a> for features and faults is recommendable!</p>
</div>

View file

@ -8,6 +8,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<link rel="shortcut icon" type="image/png" href="/static/favicon.png">
<link rel="icon" type="image/png" href="/static/favicon.png">
<link rel="mask-icon" href="/static/pinned-tab.svg" color="#3582F7">
<link rel="alternate" type="application/rss+xml" href="{% if rss_filter %}{{ url_for('home', page='rss', _external=True, **rss_filter) }}{% else %}{{ url_for('home', page='rss', _external=True) }}{% endif %}" />
<!-- Bootstrap core CSS -->
@ -34,7 +35,7 @@
<!-- Modified to not apply border-radius to selectpickers and stuff so our navbar looks cool -->
<script src="/static/js/bootstrap-select.js"></script>
<script src="/static/js/main.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/marked/0.3.6/marked.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/commonmark/0.27.0/commonmark.min.js"></script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!--[if lt IE 9]>
@ -144,16 +145,16 @@
{% else %}
<form class="navbar-form navbar-right form" action="/" method="get">
{% endif %}
<div class="input-group">
<input type="text" class="form-control" name="q" placeholder="Search..." value="{{ search["term"] if search is defined else '' }}">
<div class="input-group-btn" id="navFilter">
<div class="input-group search-container">
<input type="text" class="form-control search-bar" name="q" placeholder="Search..." value="{{ search["term"] if search is defined else '' }}">
<div class="input-group-btn nav-filter" id="navFilter-criteria">
<select class="selectpicker show-tick" title="Filter" data-width="120px" name="f">
<option value="0" title="Show all" {% if search is defined and search["quality_filter"] == "0" %}selected{% else %}selected{% endif %}>Show all</option>
<option value="0" title="No filter" {% if search is defined and search["quality_filter"] == "0" %}selected{% else %}selected{% endif %}>No filter</option>
<option value="1" title="No remakes" {% if search is defined and search["quality_filter"] == "1" %}selected{% endif %}>No remakes</option>
<option value="2" title="Trusted only" {% if search is defined and search["quality_filter"] == "2" %}selected{% endif %}>Trusted only</option>
</select>
</div>
<div class="input-group-btn" id="navFilter">
<div class="input-group-btn nav-filter" id="navFilter-category">
{% set nyaa_cats = [('1_0', 'Anime', 'Anime'),
('1_1', '- Anime Music Video', 'Anime - AMV'),
('1_2', '- English-translated', 'Anime - English'),
@ -177,7 +178,7 @@
('6_0', 'Software', 'Software'),
('6_1', '- Applications', 'Software - Apps'),
('6_2', '- Games', 'Software - Games')] %}
{% set suke_cats = [('1_0', 'Art', 'Art'),
{% set suke_cats = [('1_0', 'Art', 'Art'),
('1_1', '- Anime', 'Art - Anime'),
('1_2', '- Doujinshi', 'Art - Doujinshi'),
('1_3', '- Games', 'Art - Games'),
@ -192,8 +193,8 @@
{% set used_cats = suke_cats %}
{% endif %}
<select class="selectpicker show-tick" title="Category" data-width="170px" name="c">
<option value="0_0" title="Show all" {% if search is defined and search["category"] == "0_0" %}selected{% else %}selected{% endif %}>
Show all
<option value="0_0" title="All categories" {% if search is defined and search["category"] == "0_0" %}selected{% else %}selected{% endif %}>
All categories
</option>
{% for cat_id, cat_name, cat_title in used_cats %}
<option value="{{ cat_id }}" title="{{ cat_title }}" {% if search is defined and search.category == cat_id %}selected{% endif %}>
@ -202,7 +203,7 @@
{% endfor %}
</select>
</div>
<div class="input-group-btn">
<div class="input-group-btn search-btn">
<button class="btn btn-primary" type="submit">
<i class="fa fa-search fa-fw"></i>
</button>

View file

@ -3,55 +3,83 @@
{% block body %}
{% from "_formhelpers.html" import render_field %}
{% if g.user %}
<h1>My Account</h1>
<dl class="dl-horizontal">
<h1>Edit Profile</h1>
<ul class="nav nav-tabs" id="profileTabs" role="tablist">
<li role="presentation" class="active">
<a href="#password-change" id="password-change-tab" role="tab" data-toggle="tab" aria-controls="profile" aria-expanded="true">Password</a>
</li>
<li role="presentation">
<a href="#email-change" id="email-change-tab" role="tab" data-toggle="tab" aria-controls="profile" aria-expanded="false">Email</a>
</li>
<li role="presentation">
<a href="#general-info" id="general-info-tab" role="tab" data-toggle="tab" aria-controls="profile" aria-expanded="false">My Info</a>
</li>
</ul>
<div class="tab-content">
<div class="tab-pane fade active in" role="tabpanel" id="password-change" aria-labelledby="password-change-tab">
<form method="POST">
{{ form.csrf_token }}
<div class="row">
<div class="form-group col-md-4">
{{ render_field(form.current_password, class_='form-control', placeholder='Current password') }}
</div>
</div>
<div class="row">
<div class="form-group col-md-4">
{{ render_field(form.new_password, class_='form-control', placeholder='New password') }}
</div>
</div>
<div class="row">
<div class="form-group col-md-4">
{{ render_field(form.password_confirm, class_='form-control', placeholder='New password (confirm)') }}
</div>
</div>
<br>
<div class="row">
<div class="col-md-4">
<input type="submit" value="Update" class="btn btn-primary">
</div>
</div>
</form>
</div>
<div class="tab-pane fade" role="tabpanel" id="email-change" aria-labelledby="email-change-tab">
<form method="POST">
{{ form.csrf_token }}
<div class="row">
<div class="form-group col-md-4">
<label class="control-label" for="current_email">Current Email</label>
<div>{{email}}</div>
</div>
</div>
<div class="row">
<div class="form-group col-md-4">
{{ render_field(form.email, class_='form-control', placeholder='New email address') }}
</div>
</div>
<div class="row">
<div class="form-group col-md-4">
{{ render_field(form.current_password, class_='form-control', placeholder='Current password') }}
</div>
</div>
<br>
<div class="row">
<div class="col-md-4">
<input type="submit" value="Update" class="btn btn-primary">
</div>
</div>
</form>
</div>
<div class="tab-pane fade" role="tabpanel" id="general-info" aria-labelledby="general-info-tab">
<dl class="dl-horizontal">
<dt>User ID:</dt>
<dd>{{g.user.id}}</dd>
<dt>Account created on:</dt>
<dd>{{g.user.created_time}}</dd>
<dt>Email address:</dt>
<dd>{{g.user.email}}</dd>
<dt>User class:</dt>
<dd>{{level}}</dd><br>
</dl>
{% endif %}
</div>
</div>
<h2>Edit Profile</h2>
<form method="POST">
{{ form.csrf_token }}
<div class="row">
<div class="form-group col-md-4">
{{ render_field(form.email, class_='form-control', placeholder='New email address') }}
</div>
</div>
<div class="row">
<div class="form-group col-md-4">
{{ render_field(form.current_password, class_='form-control', placeholder='Current password') }}
</div>
</div>
<div class="row">
<div class="form-group col-md-4">
{{ render_field(form.new_password, class_='form-control', placeholder='New password') }}
</div>
</div>
<div class="row">
<div class="form-group col-md-4">
{{ render_field(form.password_confirm, class_='form-control', placeholder='New password (confirm)') }}
</div>
</div>
<br>
<div class="row">
<div class="col-md-4">
<input type="submit" value="Update" class="btn btn-primary">
</div>
</div>
</form>
{% endblock %}

View file

@ -4,20 +4,32 @@
<description>RSS Feed for {{ term }}</description>
<link>{{ url_for('home', _external=True) }}</link>
<atom:link href="{{ url_for('home', page='rss', _external=True) }}" rel="self" type="application/rss+xml" />
{% for torrent in query %}
{% for torrent in torrent_query %}
{% if torrent.has_torrent %}
<item>
<title>{{ torrent.display_name }}</title>
{% if use_elastic %}
<link>{{ url_for('download_torrent', torrent_id=torrent.meta.id, _external=True) }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
{% else %}
<link>{{ url_for('download_torrent', torrent_id=torrent.id, _external=True) }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
{% endif %}
</item>
{% else %}
<item>
<title>{{ torrent.display_name }}</title>
{% if use_elastic %}
<link>{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.meta.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822_es }}</pubDate>
{% else %}
<link>{{ torrent.magnet_uri }}</link>
<guid isPermaLink="true">{{ url_for('view_torrent', torrent_id=torrent.id, _external=True) }}</guid>
<pubDate>{{ torrent.created_time|rfc822 }}</pubDate>
{% endif %}
</item>
{% endif %}
{% endfor %}

View file

@ -8,7 +8,7 @@
{{ caller() }}
</th>
{% endmacro %}
{% if torrent_query.items %}
{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
<div class="table-responsive">
<table class="table table-bordered table-hover table-striped torrent-list">
<thead>
@ -16,7 +16,7 @@
{% call render_column_header("hdr-category", "width:80px;", center_text=True) %}
<div>Category</div>
{% endcall %}
{% call render_column_header("hdr-name", "width:auto;", sort_key="name") %}
{% call render_column_header("hdr-name", "width:auto;") %}
<div>Name</div>
{% endcall %}
{% call render_column_header("hdr-link", "width:70px;", center_text=True) %}
@ -45,27 +45,51 @@
</tr>
</thead>
<tbody>
{% for torrent in torrent_query.items %}
{% set torrents = torrent_query if use_elastic else torrent_query.items %}
{% for torrent in torrents %}
<tr class="{% if torrent.deleted %}deleted{% elif torrent.hidden %}warning{% elif torrent.remake %}danger{% elif torrent.trusted %}success{% else %}default{% endif %}">
{% set cat_id = (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %}
{% set cat_id = (torrent.main_category_id|string) + '_' + (torrent.sub_category_id|string) if use_elastic else (torrent.main_category.id|string) + '_' + (torrent.sub_category.id|string) %}
{% set icon_dir = config.SITE_FLAVOR %}
<td style="padding:0 4px;">
{% if use_elastic %}
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category_id }} - {{ torrent.sub_category_id }}">
{% else %}
<a href="/?c={{ cat_id }}" title="{{ torrent.main_category.name }} - {{ torrent.sub_category.name }}">
{% endif %}
<img src="/static/img/icons/{{ icon_dir }}/{{ cat_id }}.png">
</a>
</td>
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}">{{ torrent.display_name | escape }}</a></td>
{% if use_elastic %}
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.meta.id) }}" title="{{ torrent.display_name | escape }}">{%if "highlight" in torrent.meta %}{{ torrent.meta.highlight.display_name[0] | safe }}{% else %}{{torrent.display_name}}{%endif%}</a></td>
{% else %}
<td><a href="{{ url_for('view_torrent', torrent_id=torrent.id) }}" title="{{ torrent.display_name | escape }}">{{ torrent.display_name | escape }}</a></td>
{% endif %}
<td style="white-space: nowrap;text-align: center;">
{% if torrent.has_torrent %}<a href="{{ url_for('download_torrent', torrent_id=torrent.id) }}"><i class="fa fa-fw fa-download"></i></a>{% endif %}
{% if use_elastic %}
<a href="{{ create_magnet_from_info(torrent.display_name, torrent.info_hash) }}"><i class="fa fa-fw fa-magnet"></i></a>
{% else %}
<a href="{{ torrent.magnet_uri }}"><i class="fa fa-fw fa-magnet"></i></a>
{% endif %}
</td>
<td class="text-center">{{ torrent.filesize | filesizeformat(True) }}</td>
{% if use_elastic %}
<td class="text-center" data-timestamp="{{ torrent.created_time | utc_time }}">{{ torrent.created_time | display_time }}</td>
{% else %}
<td class="text-center" data-timestamp="{{ torrent.created_utc_timestamp|int }}">{{ torrent.created_time.strftime('%Y-%m-%d %H:%M') }}</td>
{% endif %}
{% if config.ENABLE_SHOW_STATS %}
{% if use_elastic %}
<td class="text-center" style="color: green;">{{ torrent.seed_count }}</td>
<td class="text-center" style="color: red;">{{ torrent.leech_count }}</td>
<td class="text-center">{{ torrent.download_count }}</td>
{% else %}
<td class="text-center" style="color: green;">{{ torrent.stats.seed_count }}</td>
<td class="text-center" style="color: red;">{{ torrent.stats.leech_count }}</td>
<td class="text-center">{{ torrent.stats.download_count }}</td>
{% endif %}
{% endif %}
</tr>
{% endfor %}
</tbody>
@ -76,6 +100,11 @@
{% endif %}
<center>
{% if use_elastic %}
{{ pagination.info }}
{{ pagination.links }}
{% else %}
{% from "bootstrap/pagination.html" import render_pagination %}
{{ render_pagination(torrent_query) }}
{% endif %}
</center>

View file

@ -13,7 +13,7 @@
<form method="POST" enctype="multipart/form-data">
{% if config.ENFORCE_MAIN_ANNOUNCE_URL %}<p><strong>Important:</strong> Please include <i>{{config.MAIN_ANNOUNCE_URL}}</i> in your trackers</p>{% endif %}
{% if config.ENFORCE_MAIN_ANNOUNCE_URL %}<p><strong>Important:</strong> Please include <kbd>{{config.MAIN_ANNOUNCE_URL}}</kbd> in your trackers</p>{% endif %}
<div class="row">
<div class="form-group col-md-6">
{{ render_upload(form.torrent_file, accept=".torrent") }}

View file

@ -92,7 +92,7 @@
<i class="glyphicon glyphicon-folder-open"></i>&nbsp;&nbsp;<b>{{ key }}</b></td>
{{ loop(value.items()) }}
{%- else %}
<td style="padding-left: {{ loop.depth0 * 20 }}px">
<td{% if loop.depth0 is greaterthan 0 %} style="padding-left: {{ loop.depth0 * 20 }}px"{% endif %}>
<i class="glyphicon glyphicon-file"></i>&nbsp;{{ key }}</td>
<td class="col-md-2">{{ value | filesizeformat(True) }}</td>
{%- endif %}
@ -122,8 +122,11 @@
<script>
var target = document.getElementById('torrent-description');
var text = target.innerHTML;
var html = marked(text.trim(), { sanitize: true, breaks:true });
target.innerHTML = html;
var reader = new commonmark.Parser({safe: true});
var writer = new commonmark.HtmlRenderer({safe: true});
writer.softbreak = '<br />';
var parsed = reader.parse(text.trim());
target.innerHTML = writer.render(parsed);
</script>
{% endblock %}

View file

@ -3,6 +3,7 @@ import base64
import time
from urllib.parse import urlencode
from orderedset import OrderedSet
from nyaa import app
from nyaa import bencode
from nyaa import app
@ -54,9 +55,23 @@ def get_trackers(torrent):
return list(trackers)
def get_trackers_magnet():
trackers = OrderedSet()
# Our main one first
main_announce_url = app.config.get('MAIN_ANNOUNCE_URL')
if main_announce_url:
trackers.add(main_announce_url)
# and finally our tracker list
trackers.update(default_trackers())
return list(trackers)
def create_magnet(torrent, max_trackers=5, trackers=None):
if trackers is None:
trackers = get_trackers(torrent)
trackers = get_trackers_magnet()
magnet_parts = [
('dn', torrent.display_name)
@ -68,6 +83,24 @@ def create_magnet(torrent, max_trackers=5, trackers=None):
return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts)
# For processing ES links
@app.context_processor
def create_magnet_from_info():
def _create_magnet_from_info(display_name, info_hash, max_trackers=5, trackers=None):
if trackers is None:
trackers = get_trackers_magnet()
magnet_parts = [
('dn', display_name)
]
for tracker in trackers[:max_trackers]:
magnet_parts.append(('tr', tracker))
b32_info_hash = base64.b32encode(bytes.fromhex(info_hash)).decode('utf-8')
return 'magnet:?xt=urn:btih:' + b32_info_hash + '&' + urlencode(magnet_parts)
return dict(create_magnet_from_info=_create_magnet_from_info)
def create_default_metadata_base(torrent, trackers=None):
if trackers is None:
trackers = get_trackers(torrent)

View file

@ -24,11 +24,17 @@ pycodestyle==2.3.1
pycparser==2.17
pyparsing==2.2.0
six==1.10.0
SQLAlchemy>=1.1.9
SQLAlchemy==1.1.9
SQLAlchemy-FullText-Search==0.2.3
SQLAlchemy-Utils>=0.32.14
SQLAlchemy-Utils==0.32.14
uWSGI==2.0.15
visitor==0.1.3
webassets==0.12.1
Werkzeug==0.12.1
WTForms==2.1
## elasticsearch dependencies
elasticsearch==5.3.0
elasticsearch-dsl==5.2.0
progressbar2==3.20.0
mysql-replication==0.13
flask-paginate==0.4.5

185
sync_es.py Normal file
View file

@ -0,0 +1,185 @@
#!/usr/bin/env python
"""
stream changes in mysql (on the torrents and statistics table) into
elasticsearch as they happen on the binlog. This keeps elasticsearch in sync
with whatever you do to the database, including stuff like admin queries. Also,
because mysql keeps the binlog around for N days before deleting old stuff, you
can survive a hiccup of elasticsearch or this script dying and pick up where
you left off.
For that "picking up" part, this script depends on one piece of external state:
its last known binlog filename and position. This is saved off as a JSON file
to a configurable location on the filesystem periodically. If the file is not
present then you can initialize it with the values from `SHOW MASTER STATUS`
from the mysql repl, which will start the sync from current state.
In the case of catastrophic elasticsearch meltdown where you need to
reconstruct the index, you'll want to be a bit careful with coordinating
sync_es and import_to_es scripts. If you run import_to_es first than run
sync_es against SHOW MASTER STATUS, anything that changed the database between
when import_to_es and sync_es will be lost. Instead, you can run SHOW MASTER
STATUS _before_ you run import_to_es. That way you'll definitely pick up any
changes that happen while the import_to_es script is dumping stuff from the
database into es, at the expense of redoing a (small) amount of indexing.
"""
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from pymysqlreplication import BinLogStreamReader
from pymysqlreplication.row_event import UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent
from datetime import datetime
from nyaa.models import TorrentFlags
import sys
import json
import time
import logging
logging.basicConfig()
log = logging.getLogger('sync_es')
log.setLevel(logging.INFO)
#logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
# in prod want in /var/lib somewhere probably
SAVE_LOC = "/var/lib/sync_es_position.json"
MYSQL_HOST = '127.0.0.1'
MYSQL_PORT = 3306
MYSQL_USER = 'test'
MYSQL_PW = 'test123'
NT_DB = 'nyaav2'
with open(SAVE_LOC) as f:
pos = json.load(f)
es = Elasticsearch(timeout=30)
stream = BinLogStreamReader(
# TODO parse out from config.py or something
connection_settings = {
'host': MYSQL_HOST,
'port': MYSQL_PORT,
'user': MYSQL_USER,
'passwd': MYSQL_PW
},
server_id=10, # arbitrary
# only care about this database currently
only_schemas=[NT_DB],
# these tables in the database
only_tables=["nyaa_torrents", "nyaa_statistics", "sukebei_torrents", "sukebei_statistics"],
# from our save file
resume_stream=True,
log_file=pos['log_file'],
log_pos=pos['log_pos'],
# skip the other stuff like table mapping
only_events=[UpdateRowsEvent, DeleteRowsEvent, WriteRowsEvent],
# if we're at the head of the log, block until something happens
# note it'd be nice to block async-style instead, but the mainline
# binlogreader is synchronous. there is an (unmaintained?) fork
# using aiomysql if anybody wants to revive that.
blocking=True)
def reindex_torrent(t, index_name):
# XXX annoyingly different from import_to_es, and
# you need to keep them in sync manually.
f = t['flags']
doc = {
"id": t['id'],
"display_name": t['display_name'],
"created_time": t['created_time'],
"updated_time": t['updated_time'],
"description": t['description'],
# not analyzed but included so we can render magnet links
# without querying sql again.
"info_hash": t['info_hash'].hex(),
"filesize": t['filesize'],
"uploader_id": t['uploader_id'],
"main_category_id": t['main_category_id'],
"sub_category_id": t['sub_category_id'],
# XXX all the bitflags are numbers
"anonymous": bool(f & TorrentFlags.ANONYMOUS),
"trusted": bool(f & TorrentFlags.TRUSTED),
"remake": bool(f & TorrentFlags.REMAKE),
"complete": bool(f & TorrentFlags.COMPLETE),
# TODO instead of indexing and filtering later
# could delete from es entirely. Probably won't matter
# for at least a few months.
"hidden": bool(f & TorrentFlags.HIDDEN),
"deleted": bool(f & TorrentFlags.DELETED),
"has_torrent": bool(t['has_torrent']),
}
# update, so we don't delete the stats if present
return {
'_op_type': 'update',
'_index': index_name,
'_type': 'torrent',
'_id': str(t['id']),
"doc": doc,
"doc_as_upsert": True
}
def reindex_stats(s, index_name):
# update the torrent at torrent_id, assumed to exist;
# this will always be the case if you're reading the binlog
# in order; the foreign key constraint on torrrent_id prevents
# the stats row rom existing if the torrent isn't around.
return {
'_op_type': 'update',
'_index': index_name,
'_type': 'torrent',
'_id': str(s['torrent_id']),
"doc": {
"stats_last_updated": s["last_updated"],
"download_count": s["download_count"],
"leech_count": s['leech_count'],
"seed_count": s['seed_count'],
}}
def delet_this(row, index_name):
return {
"_op_type": 'delete',
'_index': index_name,
'_type': 'torrent',
'_id': str(row['values']['id'])}
n = 0
last_save = time.time()
for event in stream:
if event.table == "nyaa_torrents" or event.table == "sukebei_torrents":
if event.table == "nyaa_torrents":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
bulk(es, (reindex_torrent(row['values'], index_name) for row in event.rows))
elif type(event) is UpdateRowsEvent:
# UpdateRowsEvent includes the old values too, but we don't care
bulk(es, (reindex_torrent(row['after_values'], index_name) for row in event.rows))
elif type(event) is DeleteRowsEvent:
# ok, bye
bulk(es, (delet_this(row, index_name) for row in event.rows))
else:
raise Exception(f"unknown event {type(event)}")
elif event.table == "nyaa_statistics" or event.table == "sukebei_statistics":
if event.table == "nyaa_statistics":
index_name = "nyaa"
else:
index_name = "sukebei"
if type(event) is WriteRowsEvent:
bulk(es, (reindex_stats(row['values'], index_name) for row in event.rows))
elif type(event) is UpdateRowsEvent:
bulk(es, (reindex_stats(row['after_values'], index_name) for row in event.rows))
elif type(event) is DeleteRowsEvent:
# uh ok. assume that the torrent row will get deleted later,
# which will clean up the entire es "torrent" document
pass
else:
raise Exception(f"unknown event {type(event)}")
else:
raise Exception(f"unknown table {s.table}")
n += 1
if n % 100 == 0 or time.time() - last_save > 30:
log.info(f"saving position {stream.log_file}/{stream.log_pos}")
with open(SAVE_LOC, 'w') as f:
json.dump({"log_file": stream.log_file, "log_pos": stream.log_pos}, f)

View file

@ -1,9 +1,13 @@
udp://tracker.internetwarriors.net:1337/announce
udp://tracker.leechers-paradise.org:6969/announce
udp://tracker.coppersurfer.tk:6969/announce
udp://exodus.desync.com:6969/announce
udp://tracker.sktorrent.net:6969/announce
udp://tracker.zer0day.to:1337/announce
udp://tracker.pirateparty.gr:6969/announce
udp://oscar.reyesleon.xyz:6969/announce
udp://tracker.cyberia.is:6969/announce
udp://tracker.doko.moe:6969
http://tracker.baka-sub.cf:80/announce
udp://tracker.coppersurfer.tk:6969/announce
udp://tracker.torrent.eu.org:451
udp://tracker.opentrackr.org:1337/announce
udp://tracker.zer0day.to:1337/announce
http://t.nyaatracker.com:80/announce
https://open.kickasstracker.com:443/announce
udp://tracker.safe.moe:6969/announce
udp://p4p.arenabg.ch:1337/announce
udp://tracker.justseed.it:1337/announce