Merge pull request #349 from nyaadevs/remove_info_mysql

Move bencoded info dicts from mysql torrent_info table to info_dict directory.
This commit is contained in:
A nyaa developer 2018-02-03 20:22:03 +01:00 committed by GitHub
commit e7f412eb8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 152 additions and 204 deletions

9
.gitignore vendored
View File

@ -14,16 +14,15 @@ __pycache__
# Databases # Databases
*.sql *.sql
test.db /test.db
# Webserver # Webserver
uwsgi.sock /uwsgi.sock
# Application # Application
install/* /install/*
config.py /config.py
/test_torrent_batch /test_torrent_batch
torrents
# Other # Other
*.swp *.swp

2
info_dicts/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -0,0 +1,57 @@
"""Remove bencoded info dicts from mysql
Revision ID: b61e4f6a88cc
Revises: cf7bf6d0e6bd
Create Date: 2017-08-29 01:45:08.357936
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
import sys
# revision identifiers, used by Alembic.
revision = 'b61e4f6a88cc'
down_revision = 'cf7bf6d0e6bd'
branch_labels = None
depends_on = None
def upgrade():
print("--- WARNING ---")
print("This migration drops the torrent_info tables.")
print("You will lose all of your .torrent files if you have not converted them beforehand.")
print("Use the migration script at utils/infodict_mysql2file.py")
print("Type OKAY and hit Enter to continue, CTRL-C to abort.")
print("--- WARNING ---")
try:
if input() != "OKAY":
sys.exit(1)
except KeyboardInterrupt:
sys.exit(1)
op.drop_table('sukebei_torrents_info')
op.drop_table('nyaa_torrents_info')
def downgrade():
op.create_table('nyaa_torrents_info',
sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True),
sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False),
sa.ForeignKeyConstraint(['torrent_id'], ['nyaa_torrents.id'], name='nyaa_torrents_info_ibfk_1', ondelete='CASCADE'),
sa.PrimaryKeyConstraint('torrent_id'),
mysql_collate='utf8_bin',
mysql_default_charset='utf8',
mysql_engine='InnoDB',
mysql_row_format='COMPRESSED'
)
op.create_table('sukebei_torrents_info',
sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True),
sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False),
sa.ForeignKeyConstraint(['torrent_id'], ['sukebei_torrents.id'], name='sukebei_torrents_info_ibfk_1', ondelete='CASCADE'),
sa.PrimaryKeyConstraint('torrent_id'),
mysql_collate='utf8_bin',
mysql_default_charset='utf8',
mysql_engine='InnoDB',
mysql_row_format='COMPRESSED'
)

View File

@ -1,13 +1,11 @@
import binascii import binascii
import functools import functools
import json import json
import os.path
import re import re
import flask import flask
from nyaa import backend, bencode, forms, models, utils from nyaa import backend, forms, models
from nyaa.extensions import db
from nyaa.views.torrents import _create_upload_category_choices from nyaa.views.torrents import _create_upload_category_choices
api_blueprint = flask.Blueprint('api', __name__, url_prefix='/api') api_blueprint = flask.Blueprint('api', __name__, url_prefix='/api')
@ -120,142 +118,6 @@ def v2_api_upload():
return flask.jsonify({'errors': mapped_errors}), 400 return flask.jsonify({'errors': mapped_errors}), 400
# #################################### TEMPORARY ####################################
from orderedset import OrderedSet # noqa: E402 isort:skip
@api_blueprint.route('/ghetto_import', methods=['POST'])
def ghetto_import():
if flask.request.remote_addr != '127.0.0.1':
return flask.error(403)
torrent_file = flask.request.files.get('torrent')
try:
torrent_dict = bencode.decode(torrent_file)
# field.data.close()
except (bencode.MalformedBencodeException, UnicodeError):
return 'Malformed torrent file', 500
try:
forms._validate_torrent_metadata(torrent_dict)
except AssertionError as e:
return 'Malformed torrent metadata ({})'.format(e.args[0]), 500
try:
tracker_found = forms._validate_trackers(torrent_dict) # noqa F841
except AssertionError as e:
return 'Malformed torrent trackers ({})'.format(e.args[0]), 500
bencoded_info_dict = bencode.encode(torrent_dict['info'])
info_hash = utils.sha1_hash(bencoded_info_dict)
# Check if the info_hash exists already in the database
torrent = models.Torrent.by_info_hash(info_hash)
if not torrent:
return 'This torrent does not exists', 500
if torrent.has_torrent:
return 'This torrent already has_torrent', 500
# Torrent is legit, pass original filename and dict along
torrent_data = forms.TorrentFileData(filename=os.path.basename(torrent_file.filename),
torrent_dict=torrent_dict,
info_hash=info_hash,
bencoded_info_dict=bencoded_info_dict)
# The torrent has been validated and is safe to access with ['foo'] etc - all relevant
# keys and values have been checked for (see UploadForm in forms.py for details)
info_dict = torrent_data.torrent_dict['info']
changed_to_utf8 = backend._replace_utf8_values(torrent_data.torrent_dict)
torrent_filesize = info_dict.get('length') or sum(
f['length'] for f in info_dict.get('files'))
# In case no encoding, assume UTF-8.
torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8')
# Store bencoded info_dict
torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict)
torrent.has_torrent = True
# To simplify parsing the filelist, turn single-file torrent into a list
torrent_filelist = info_dict.get('files')
used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding
parsed_file_tree = dict()
if not torrent_filelist:
# If single-file, the root will be the file-tree (no directory)
file_tree_root = parsed_file_tree
torrent_filelist = [{'length': torrent_filesize, 'path': [info_dict['name']]}]
else:
# If multi-file, use the directory name as root for files
file_tree_root = parsed_file_tree.setdefault(
info_dict['name'].decode(used_path_encoding), {})
# Parse file dicts into a tree
for file_dict in torrent_filelist:
# Decode path parts from utf8-bytes
path_parts = [path_part.decode(used_path_encoding) for path_part in file_dict['path']]
filename = path_parts.pop()
current_directory = file_tree_root
for directory in path_parts:
current_directory = current_directory.setdefault(directory, {})
# Don't add empty filenames (BitComet directory)
if filename:
current_directory[filename] = file_dict['length']
parsed_file_tree = utils.sorted_pathdict(parsed_file_tree)
json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8')
torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes)
db.session.add(torrent)
db.session.flush()
# Store the users trackers
trackers = OrderedSet()
announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii')
if announce:
trackers.add(announce)
# List of lists with single item
announce_list = torrent_data.torrent_dict.get('announce-list', [])
for announce in announce_list:
trackers.add(announce[0].decode('ascii'))
# Remove our trackers, maybe? TODO ?
# Search for/Add trackers in DB
db_trackers = OrderedSet()
for announce in trackers:
tracker = models.Trackers.by_uri(announce)
# Insert new tracker if not found
if not tracker:
tracker = models.Trackers(uri=announce)
db.session.add(tracker)
db.session.flush()
db_trackers.add(tracker)
# Store tracker refs in DB
for order, tracker in enumerate(db_trackers):
torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id,
tracker_id=tracker.id, order=order)
db.session.add(torrent_tracker)
db.session.commit()
return 'success'
# ####################################### INFO ####################################### # ####################################### INFO #######################################
ID_PATTERN = '^[0-9]+$' ID_PATTERN = '^[0-9]+$'
INFO_HASH_PATTERN = '^[0-9a-fA-F]{40}$' # INFO_HASH as string INFO_HASH_PATTERN = '^[0-9a-fA-F]{40}$' # INFO_HASH as string

View File

@ -162,9 +162,10 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False):
# Delete exisiting torrent which is marked as deleted # Delete exisiting torrent which is marked as deleted
if torrent_data.db_id is not None: if torrent_data.db_id is not None:
models.Torrent.query.filter_by(id=torrent_data.db_id).delete() old_torrent = models.Torrent.by_id(torrent_data.db_id)
_delete_torrent_file(old_torrent)
db.session.delete(old_torrent)
db.session.commit() db.session.commit()
_delete_cached_torrent_file(torrent_data.db_id)
# The torrent has been validated and is safe to access with ['foo'] etc - all relevant # The torrent has been validated and is safe to access with ['foo'] etc - all relevant
# keys and values have been checked for (see UploadForm in forms.py for details) # keys and values have been checked for (see UploadForm in forms.py for details)
@ -195,7 +196,15 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False):
uploader_ip=ip_address(flask.request.remote_addr).packed) uploader_ip=ip_address(flask.request.remote_addr).packed)
# Store bencoded info_dict # Store bencoded info_dict
torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict) info_dict_path = torrent.info_dict_path
info_dict_dir = os.path.dirname(info_dict_path)
if not os.path.exists(info_dict_dir):
os.makedirs(info_dict_dir)
with open(info_dict_path, 'wb') as out_file:
out_file.write(torrent_data.bencoded_info_dict)
torrent.stats = models.Statistic() torrent.stats = models.Statistic()
torrent.has_torrent = True torrent.has_torrent = True
@ -361,9 +370,7 @@ def tracker_api(info_hashes, method):
return True return True
def _delete_cached_torrent_file(torrent_id): def _delete_torrent_file(torrent):
# Note: obviously temporary info_dict_path = torrent.info_dict_path
cached_torrent = os.path.join(app.config['BASE_DIR'], if os.path.exists(info_dict_path):
'torrent_cache', str(torrent_id) + '.torrent') os.remove(info_dict_path)
if os.path.exists(cached_torrent):
os.remove(cached_torrent)

View File

@ -1,4 +1,5 @@
import base64 import base64
import os.path
import re import re
from datetime import datetime from datetime import datetime
from enum import Enum, IntEnum from enum import Enum, IntEnum
@ -170,11 +171,6 @@ class TorrentBase(DeclarativeHelperBase):
backref='torrents', lazy="joined", backref='torrents', lazy="joined",
primaryjoin=join_sql.format(cls.__flavor__)) primaryjoin=join_sql.format(cls.__flavor__))
@declarative.declared_attr
def info(cls):
return db.relationship(cls._flavor_prefix('TorrentInfo'), uselist=False,
cascade="all, delete-orphan", back_populates='torrent')
@declarative.declared_attr @declarative.declared_attr
def filelist(cls): def filelist(cls):
return db.relationship(cls._flavor_prefix('TorrentFilelist'), uselist=False, return db.relationship(cls._flavor_prefix('TorrentFilelist'), uselist=False,
@ -229,13 +225,21 @@ class TorrentBase(DeclarativeHelperBase):
# Escaped # Escaped
return escape_markup(self.information) return escape_markup(self.information)
@property
def info_dict_path(self):
''' Returns a path to the info_dict file in form of 'info_dicts/aa/bb/aabbccddee...' '''
info_hash = self.info_hash_as_hex
info_dict_dir = os.path.join(app.config['BASE_DIR'], 'info_dicts',
info_hash[0:2], info_hash[2:4])
return os.path.join(info_dict_dir, info_hash)
@property @property
def info_hash_as_b32(self): def info_hash_as_b32(self):
return base64.b32encode(self.info_hash).decode('utf-8') return base64.b32encode(self.info_hash).decode('utf-8')
@property @property
def info_hash_as_hex(self): def info_hash_as_hex(self):
return self.info_hash.hex() return self.info_hash.hex().lower()
@property @property
def magnet_uri(self): def magnet_uri(self):
@ -290,22 +294,6 @@ class TorrentFilelistBase(DeclarativeHelperBase):
back_populates='filelist') back_populates='filelist')
class TorrentInfoBase(DeclarativeHelperBase):
__tablename_base__ = 'torrents_info'
__table_args__ = {'mysql_row_format': 'COMPRESSED'}
@declarative.declared_attr
def torrent_id(cls):
return db.Column(db.Integer, db.ForeignKey(
cls._table_prefix('torrents.id'), ondelete="CASCADE"), primary_key=True)
info_dict = db.Column(MediumBlobType, nullable=True)
@declarative.declared_attr
def torrent(cls):
return db.relationship(cls._flavor_prefix('Torrent'), uselist=False, back_populates='info')
class StatisticBase(DeclarativeHelperBase): class StatisticBase(DeclarativeHelperBase):
__tablename_base__ = 'statistics' __tablename_base__ = 'statistics'
@ -806,15 +794,6 @@ class SukebeiTorrentFilelist(TorrentFilelistBase, db.Model):
__flavor__ = 'Sukebei' __flavor__ = 'Sukebei'
# TorrentInfo
class NyaaTorrentInfo(TorrentInfoBase, db.Model):
__flavor__ = 'Nyaa'
class SukebeiTorrentInfo(TorrentInfoBase, db.Model):
__flavor__ = 'Sukebei'
# Statistic # Statistic
class NyaaStatistic(StatisticBase, db.Model): class NyaaStatistic(StatisticBase, db.Model):
__flavor__ = 'Nyaa' __flavor__ = 'Nyaa'
@ -882,7 +861,6 @@ class SukebeiReport(ReportBase, db.Model):
if config['SITE_FLAVOR'] == 'nyaa': if config['SITE_FLAVOR'] == 'nyaa':
Torrent = NyaaTorrent Torrent = NyaaTorrent
TorrentFilelist = NyaaTorrentFilelist TorrentFilelist = NyaaTorrentFilelist
TorrentInfo = NyaaTorrentInfo
Statistic = NyaaStatistic Statistic = NyaaStatistic
TorrentTrackers = NyaaTorrentTrackers TorrentTrackers = NyaaTorrentTrackers
MainCategory = NyaaMainCategory MainCategory = NyaaMainCategory
@ -895,7 +873,6 @@ if config['SITE_FLAVOR'] == 'nyaa':
elif config['SITE_FLAVOR'] == 'sukebei': elif config['SITE_FLAVOR'] == 'sukebei':
Torrent = SukebeiTorrent Torrent = SukebeiTorrent
TorrentFilelist = SukebeiTorrentFilelist TorrentFilelist = SukebeiTorrentFilelist
TorrentInfo = SukebeiTorrentInfo
Statistic = SukebeiStatistic Statistic = SukebeiStatistic
TorrentTrackers = SukebeiTorrentTrackers TorrentTrackers = SukebeiTorrentTrackers
MainCategory = SukebeiMainCategory MainCategory = SukebeiMainCategory

View File

@ -118,7 +118,7 @@ def create_default_metadata_base(torrent, trackers=None, webseeds=None):
return metadata_base return metadata_base
def create_bencoded_torrent(torrent, metadata_base=None): def create_bencoded_torrent(torrent, bencoded_info, metadata_base=None):
''' Creates a bencoded torrent metadata for a given torrent, ''' Creates a bencoded torrent metadata for a given torrent,
optionally using a given metadata_base dict (note: 'info' key will be optionally using a given metadata_base dict (note: 'info' key will be
popped off the dict) ''' popped off the dict) '''
@ -135,7 +135,6 @@ def create_bencoded_torrent(torrent, metadata_base=None):
prefix = bencode.encode(prefixed_dict) prefix = bencode.encode(prefixed_dict)
suffix = bencode.encode(suffixed_dict) suffix = bencode.encode(suffixed_dict)
bencoded_info = torrent.info.info_dict
bencoded_torrent = prefix[:-1] + b'4:info' + bencoded_info + suffix[1:] bencoded_torrent = prefix[:-1] + b'4:info' + bencoded_info + suffix[1:]
return bencoded_torrent return bencoded_torrent

View File

@ -1,5 +1,4 @@
import json import json
import os.path
from ipaddress import ip_address from ipaddress import ip_address
from urllib.parse import quote from urllib.parse import quote
@ -319,7 +318,7 @@ def download_torrent(torrent_id):
if torrent.deleted and not (flask.g.user and flask.g.user.is_moderator): if torrent.deleted and not (flask.g.user and flask.g.user.is_moderator):
flask.abort(404) flask.abort(404)
torrent_file, torrent_file_size = _get_cached_torrent_file(torrent) torrent_file, torrent_file_size = _make_torrent_file(torrent)
disposition = 'inline; filename="{0}"; filename*=UTF-8\'\'{0}'.format( disposition = 'inline; filename="{0}"; filename*=UTF-8\'\'{0}'.format(
quote(torrent.torrent_name.encode('utf-8'))) quote(torrent.torrent_name.encode('utf-8')))
@ -472,18 +471,10 @@ def _create_upload_category_choices():
return choices return choices
def _get_cached_torrent_file(torrent): def _make_torrent_file(torrent):
# Note: obviously temporary with open(torrent.info_dict_path, 'rb') as in_file:
cached_torrent = os.path.join(app.config['BASE_DIR'], bencoded_info = in_file.read()
'torrent_cache', str(torrent.id) + '.torrent')
if not os.path.exists(cached_torrent):
with open(cached_torrent, 'wb') as out_file:
metadata_base = torrents.create_default_metadata_base(torrent)
# Replace the default comment with url to the torrent page
metadata_base['comment'] = flask.url_for('torrents.view',
torrent_id=torrent.id,
_external=True)
out_file.write(torrents.create_bencoded_torrent(torrent, metadata_base)) bencoded_torrent_data = torrents.create_bencoded_torrent(torrent, bencoded_info)
return open(cached_torrent, 'rb'), os.path.getsize(cached_torrent) return bencoded_torrent_data, len(bencoded_torrent_data)

View File

@ -1 +0,0 @@
*.torrent

2
torrents/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

53
utils/infodict_mysql2file.py Executable file
View File

@ -0,0 +1,53 @@
#!/usr/bin/python
import os
import sys
import MySQLdb
import MySQLdb.cursors
if len(sys.argv) < 3 or len(sys.argv) > 4:
print("Usage: {0} <prefix(nyaa|sukebei)> <outdir> [offset]".format(sys.argv[0]))
sys.exit(1)
ofs = 0
prefix = sys.argv[1]
outdir = sys.argv[2]
if not os.path.exists(outdir):
os.makedirs(outdir)
if len(sys.argv) == 4:
ofs = int(sys.argv[3])
db = MySQLdb.connect(host='localhost',
user='test',
passwd='test123',
db='nyaav2',
cursorclass=MySQLdb.cursors.SSCursor)
cur = db.cursor()
cur.execute(
"""SELECT
id,
info_hash,
info_dict
FROM
{0}_torrents
JOIN {0}_torrents_info ON torrent_id = id
LIMIT 18446744073709551610 OFFSET {1}
""".format(prefix, ofs))
for row in cur:
id = row[0]
info_hash = row[1].hex().lower()
info_dict = row[2]
path = os.path.join(outdir, info_hash[0:2], info_hash[2:4])
if not os.path.exists(path):
os.makedirs(path)
path = os.path.join(path, info_hash)
with open(path, 'wb') as fp:
fp.write(info_dict)
ofs += 1
print(ofs)