From fd0a02b95c2476ae816548078916bb351631d91a Mon Sep 17 00:00:00 2001 From: nyaadev Date: Tue, 29 Aug 2017 02:17:39 +0200 Subject: [PATCH] Move bencoded info dicts from mysql torrent_info table to info_dict directory. DB change! IMPORTANT!!! Make sure to run utils/infodict_mysql2file.py before upgrading the database. --- .gitignore | 9 +- info_dicts/.gitignore | 2 + .../b61e4f6a88cc_del_torrents_info.py | 57 +++++++ nyaa/api_handler.py | 140 +----------------- nyaa/backend.py | 28 ++-- nyaa/models.py | 34 +---- nyaa/torrents.py | 3 +- nyaa/views/torrents.py | 25 ++-- torrent_cache/.gitignore | 1 - torrents/.gitignore | 2 + utils/infodict_mysql2file.py | 53 +++++++ 11 files changed, 151 insertions(+), 203 deletions(-) create mode 100644 info_dicts/.gitignore create mode 100644 migrations/versions/b61e4f6a88cc_del_torrents_info.py delete mode 100644 torrent_cache/.gitignore create mode 100644 torrents/.gitignore create mode 100755 utils/infodict_mysql2file.py diff --git a/.gitignore b/.gitignore index 9f5059a..3e7e4a4 100644 --- a/.gitignore +++ b/.gitignore @@ -14,16 +14,15 @@ __pycache__ # Databases *.sql -test.db +/test.db # Webserver -uwsgi.sock +/uwsgi.sock # Application -install/* -config.py +/install/* +/config.py /test_torrent_batch -torrents # Other *.swp diff --git a/info_dicts/.gitignore b/info_dicts/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/info_dicts/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/migrations/versions/b61e4f6a88cc_del_torrents_info.py b/migrations/versions/b61e4f6a88cc_del_torrents_info.py new file mode 100644 index 0000000..cd491f7 --- /dev/null +++ b/migrations/versions/b61e4f6a88cc_del_torrents_info.py @@ -0,0 +1,57 @@ +"""Remove bencoded info dicts from mysql + +Revision ID: b61e4f6a88cc +Revises: 500117641608 +Create Date: 2017-08-29 01:45:08.357936 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql +import sys + +# revision identifiers, used by Alembic. +revision = 'b61e4f6a88cc' +down_revision = '500117641608' +branch_labels = None +depends_on = None + + +def upgrade(): + print("--- WARNING ---") + print("This migration drops the torrent_info tables.") + print("You will lose all of your .torrent files if you have not converted them beforehand.") + print("Use the migration script at utils/infodict_mysql2file.py") + print("Type OKAY and hit Enter to continue, CTRL-C to abort.") + print("--- WARNING ---") + try: + if input() != "OKAY": + sys.exit(1) + except KeyboardInterrupt: + sys.exit(1) + + op.drop_table('sukebei_torrents_info') + op.drop_table('nyaa_torrents_info') + + +def downgrade(): + op.create_table('nyaa_torrents_info', + sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True), + sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint(['torrent_id'], ['nyaa_torrents.id'], name='nyaa_torrents_info_ibfk_1', ondelete='CASCADE'), + sa.PrimaryKeyConstraint('torrent_id'), + mysql_collate='utf8_bin', + mysql_default_charset='utf8', + mysql_engine='InnoDB', + mysql_row_format='COMPRESSED' + ) + op.create_table('sukebei_torrents_info', + sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True), + sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint(['torrent_id'], ['sukebei_torrents.id'], name='sukebei_torrents_info_ibfk_1', ondelete='CASCADE'), + sa.PrimaryKeyConstraint('torrent_id'), + mysql_collate='utf8_bin', + mysql_default_charset='utf8', + mysql_engine='InnoDB', + mysql_row_format='COMPRESSED' + ) diff --git a/nyaa/api_handler.py b/nyaa/api_handler.py index 10a8608..058b420 100644 --- a/nyaa/api_handler.py +++ b/nyaa/api_handler.py @@ -1,13 +1,11 @@ import binascii import functools import json -import os.path import re import flask -from nyaa import backend, bencode, forms, models, utils -from nyaa.extensions import db +from nyaa import backend, forms, models from nyaa.views.torrents import _create_upload_category_choices api_blueprint = flask.Blueprint('api', __name__, url_prefix='/api') @@ -120,142 +118,6 @@ def v2_api_upload(): return flask.jsonify({'errors': mapped_errors}), 400 -# #################################### TEMPORARY #################################### - -from orderedset import OrderedSet # noqa: E402 isort:skip - - -@api_blueprint.route('/ghetto_import', methods=['POST']) -def ghetto_import(): - if flask.request.remote_addr != '127.0.0.1': - return flask.error(403) - - torrent_file = flask.request.files.get('torrent') - - try: - torrent_dict = bencode.decode(torrent_file) - # field.data.close() - except (bencode.MalformedBencodeException, UnicodeError): - return 'Malformed torrent file', 500 - - try: - forms._validate_torrent_metadata(torrent_dict) - except AssertionError as e: - return 'Malformed torrent metadata ({})'.format(e.args[0]), 500 - - try: - tracker_found = forms._validate_trackers(torrent_dict) # noqa F841 - except AssertionError as e: - return 'Malformed torrent trackers ({})'.format(e.args[0]), 500 - - bencoded_info_dict = bencode.encode(torrent_dict['info']) - info_hash = utils.sha1_hash(bencoded_info_dict) - - # Check if the info_hash exists already in the database - torrent = models.Torrent.by_info_hash(info_hash) - if not torrent: - return 'This torrent does not exists', 500 - - if torrent.has_torrent: - return 'This torrent already has_torrent', 500 - - # Torrent is legit, pass original filename and dict along - torrent_data = forms.TorrentFileData(filename=os.path.basename(torrent_file.filename), - torrent_dict=torrent_dict, - info_hash=info_hash, - bencoded_info_dict=bencoded_info_dict) - - # The torrent has been validated and is safe to access with ['foo'] etc - all relevant - # keys and values have been checked for (see UploadForm in forms.py for details) - info_dict = torrent_data.torrent_dict['info'] - - changed_to_utf8 = backend._replace_utf8_values(torrent_data.torrent_dict) - - torrent_filesize = info_dict.get('length') or sum( - f['length'] for f in info_dict.get('files')) - - # In case no encoding, assume UTF-8. - torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8') - - # Store bencoded info_dict - torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict) - torrent.has_torrent = True - - # To simplify parsing the filelist, turn single-file torrent into a list - torrent_filelist = info_dict.get('files') - - used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding - - parsed_file_tree = dict() - if not torrent_filelist: - # If single-file, the root will be the file-tree (no directory) - file_tree_root = parsed_file_tree - torrent_filelist = [{'length': torrent_filesize, 'path': [info_dict['name']]}] - else: - # If multi-file, use the directory name as root for files - file_tree_root = parsed_file_tree.setdefault( - info_dict['name'].decode(used_path_encoding), {}) - - # Parse file dicts into a tree - for file_dict in torrent_filelist: - # Decode path parts from utf8-bytes - path_parts = [path_part.decode(used_path_encoding) for path_part in file_dict['path']] - - filename = path_parts.pop() - current_directory = file_tree_root - - for directory in path_parts: - current_directory = current_directory.setdefault(directory, {}) - - # Don't add empty filenames (BitComet directory) - if filename: - current_directory[filename] = file_dict['length'] - - parsed_file_tree = utils.sorted_pathdict(parsed_file_tree) - - json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8') - torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes) - - db.session.add(torrent) - db.session.flush() - - # Store the users trackers - trackers = OrderedSet() - announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii') - if announce: - trackers.add(announce) - - # List of lists with single item - announce_list = torrent_data.torrent_dict.get('announce-list', []) - for announce in announce_list: - trackers.add(announce[0].decode('ascii')) - - # Remove our trackers, maybe? TODO ? - - # Search for/Add trackers in DB - db_trackers = OrderedSet() - for announce in trackers: - tracker = models.Trackers.by_uri(announce) - - # Insert new tracker if not found - if not tracker: - tracker = models.Trackers(uri=announce) - db.session.add(tracker) - db.session.flush() - - db_trackers.add(tracker) - - # Store tracker refs in DB - for order, tracker in enumerate(db_trackers): - torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, - tracker_id=tracker.id, order=order) - db.session.add(torrent_tracker) - - db.session.commit() - - return 'success' - - # ####################################### INFO ####################################### ID_PATTERN = '^[0-9]+$' INFO_HASH_PATTERN = '^[0-9a-fA-F]{40}$' # INFO_HASH as string diff --git a/nyaa/backend.py b/nyaa/backend.py index 523a8b6..95ecef8 100644 --- a/nyaa/backend.py +++ b/nyaa/backend.py @@ -162,9 +162,10 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): # Delete exisiting torrent which is marked as deleted if torrent_data.db_id is not None: - models.Torrent.query.filter_by(id=torrent_data.db_id).delete() + oldtorrent = models.Torrent.by_id(torrent_data.db_id) + _delete_torrent_file(oldtorrent) + db.session.delete(oldtorrent) db.session.commit() - _delete_cached_torrent_file(torrent_data.db_id) # The torrent has been validated and is safe to access with ['foo'] etc - all relevant # keys and values have been checked for (see UploadForm in forms.py for details) @@ -195,7 +196,15 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): uploader_ip=ip_address(flask.request.remote_addr).packed) # Store bencoded info_dict - torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict) + info_hash = torrent_data.info_hash.hex().lower() + path = os.path.join(app.config['BASE_DIR'], 'info_dicts', + info_hash[0:2], info_hash[2:4]) + if not os.path.exists(path): + os.makedirs(path) + path = os.path.join(path, info_hash) + with open(path, 'wb') as fp: + fp.write(torrent_data.bencoded_info_dict) + torrent.stats = models.Statistic() torrent.has_torrent = True @@ -361,9 +370,10 @@ def tracker_api(info_hashes, method): return True -def _delete_cached_torrent_file(torrent_id): - # Note: obviously temporary - cached_torrent = os.path.join(app.config['BASE_DIR'], - 'torrent_cache', str(torrent_id) + '.torrent') - if os.path.exists(cached_torrent): - os.remove(cached_torrent) +def _delete_torrent_file(torrent): + info_hash = torrent.info_hash_as_hex + path = os.path.join(app.config['BASE_DIR'], 'info_dicts', + info_hash[0:2], info_hash[2:4], info_hash) + + if os.path.exists(path): + os.remove(path) diff --git a/nyaa/models.py b/nyaa/models.py index 8508b7b..b2fc216 100644 --- a/nyaa/models.py +++ b/nyaa/models.py @@ -170,11 +170,6 @@ class TorrentBase(DeclarativeHelperBase): backref='torrents', lazy="joined", primaryjoin=join_sql.format(cls.__flavor__)) - @declarative.declared_attr - def info(cls): - return db.relationship(cls._flavor_prefix('TorrentInfo'), uselist=False, - cascade="all, delete-orphan", back_populates='torrent') - @declarative.declared_attr def filelist(cls): return db.relationship(cls._flavor_prefix('TorrentFilelist'), uselist=False, @@ -235,7 +230,7 @@ class TorrentBase(DeclarativeHelperBase): @property def info_hash_as_hex(self): - return self.info_hash.hex() + return self.info_hash.hex().lower() @property def magnet_uri(self): @@ -290,22 +285,6 @@ class TorrentFilelistBase(DeclarativeHelperBase): back_populates='filelist') -class TorrentInfoBase(DeclarativeHelperBase): - __tablename_base__ = 'torrents_info' - - __table_args__ = {'mysql_row_format': 'COMPRESSED'} - - @declarative.declared_attr - def torrent_id(cls): - return db.Column(db.Integer, db.ForeignKey( - cls._table_prefix('torrents.id'), ondelete="CASCADE"), primary_key=True) - info_dict = db.Column(MediumBlobType, nullable=True) - - @declarative.declared_attr - def torrent(cls): - return db.relationship(cls._flavor_prefix('Torrent'), uselist=False, back_populates='info') - - class StatisticBase(DeclarativeHelperBase): __tablename_base__ = 'statistics' @@ -806,15 +785,6 @@ class SukebeiTorrentFilelist(TorrentFilelistBase, db.Model): __flavor__ = 'Sukebei' -# TorrentInfo -class NyaaTorrentInfo(TorrentInfoBase, db.Model): - __flavor__ = 'Nyaa' - - -class SukebeiTorrentInfo(TorrentInfoBase, db.Model): - __flavor__ = 'Sukebei' - - # Statistic class NyaaStatistic(StatisticBase, db.Model): __flavor__ = 'Nyaa' @@ -882,7 +852,6 @@ class SukebeiReport(ReportBase, db.Model): if config['SITE_FLAVOR'] == 'nyaa': Torrent = NyaaTorrent TorrentFilelist = NyaaTorrentFilelist - TorrentInfo = NyaaTorrentInfo Statistic = NyaaStatistic TorrentTrackers = NyaaTorrentTrackers MainCategory = NyaaMainCategory @@ -895,7 +864,6 @@ if config['SITE_FLAVOR'] == 'nyaa': elif config['SITE_FLAVOR'] == 'sukebei': Torrent = SukebeiTorrent TorrentFilelist = SukebeiTorrentFilelist - TorrentInfo = SukebeiTorrentInfo Statistic = SukebeiStatistic TorrentTrackers = SukebeiTorrentTrackers MainCategory = SukebeiMainCategory diff --git a/nyaa/torrents.py b/nyaa/torrents.py index 1644123..1a8e277 100644 --- a/nyaa/torrents.py +++ b/nyaa/torrents.py @@ -118,7 +118,7 @@ def create_default_metadata_base(torrent, trackers=None, webseeds=None): return metadata_base -def create_bencoded_torrent(torrent, metadata_base=None): +def create_bencoded_torrent(torrent, bencoded_info, metadata_base=None): ''' Creates a bencoded torrent metadata for a given torrent, optionally using a given metadata_base dict (note: 'info' key will be popped off the dict) ''' @@ -135,7 +135,6 @@ def create_bencoded_torrent(torrent, metadata_base=None): prefix = bencode.encode(prefixed_dict) suffix = bencode.encode(suffixed_dict) - bencoded_info = torrent.info.info_dict bencoded_torrent = prefix[:-1] + b'4:info' + bencoded_info + suffix[1:] return bencoded_torrent diff --git a/nyaa/views/torrents.py b/nyaa/views/torrents.py index 93c9243..ac377f0 100644 --- a/nyaa/views/torrents.py +++ b/nyaa/views/torrents.py @@ -319,7 +319,7 @@ def download_torrent(torrent_id): if torrent.deleted and not (flask.g.user and flask.g.user.is_moderator): flask.abort(404) - torrent_file, torrent_file_size = _get_cached_torrent_file(torrent) + torrent_file, torrent_file_size = _make_torrent_file(torrent) disposition = 'inline; filename="{0}"; filename*=UTF-8\'\'{0}'.format( quote(torrent.torrent_name.encode('utf-8'))) @@ -472,18 +472,15 @@ def _create_upload_category_choices(): return choices -def _get_cached_torrent_file(torrent): - # Note: obviously temporary - cached_torrent = os.path.join(app.config['BASE_DIR'], - 'torrent_cache', str(torrent.id) + '.torrent') - if not os.path.exists(cached_torrent): - with open(cached_torrent, 'wb') as out_file: - metadata_base = torrents.create_default_metadata_base(torrent) - # Replace the default comment with url to the torrent page - metadata_base['comment'] = flask.url_for('torrents.view', - torrent_id=torrent.id, - _external=True) - out_file.write(torrents.create_bencoded_torrent(torrent, metadata_base)) +def _make_torrent_file(torrent): + info_hash = torrent.info_hash_as_hex + path = os.path.join(app.config['BASE_DIR'], 'info_dicts', + info_hash[0:2], info_hash[2:4], info_hash) - return open(cached_torrent, 'rb'), os.path.getsize(cached_torrent) + with open(path, 'rb') as fp: + bencoded_info = fp.read() + + data = torrents.create_bencoded_torrent(torrent, bencoded_info) + + return data, len(data) diff --git a/torrent_cache/.gitignore b/torrent_cache/.gitignore deleted file mode 100644 index f85d4dd..0000000 --- a/torrent_cache/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.torrent diff --git a/torrents/.gitignore b/torrents/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/torrents/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/utils/infodict_mysql2file.py b/utils/infodict_mysql2file.py new file mode 100755 index 0000000..3969935 --- /dev/null +++ b/utils/infodict_mysql2file.py @@ -0,0 +1,53 @@ +#!/usr/bin/python +import os +import sys + +import MySQLdb +import MySQLdb.cursors + +if len(sys.argv) < 3 or len(sys.argv) > 4: + print("Usage: {0} [offset]".format(sys.argv[0])) + sys.exit(1) + +ofs = 0 +prefix = sys.argv[1] +outdir = sys.argv[2] +if not os.path.exists(outdir): + os.makedirs(outdir) +if len(sys.argv) == 4: + ofs = int(sys.argv[3]) + + +db = MySQLdb.connect(host='localhost', + user='test', + passwd='test123', + db='nyaav2', + cursorclass=MySQLdb.cursors.SSCursor) +cur = db.cursor() + +cur.execute( + """SELECT + id, + info_hash, + info_dict + FROM + {0}_torrents + JOIN {0}_torrents_info ON torrent_id = id + LIMIT 18446744073709551610 OFFSET {1} + """.format(prefix, ofs)) + +for row in cur: + id = row[0] + info_hash = row[1].hex().lower() + info_dict = row[2] + + path = os.path.join(outdir, info_hash[0:2], info_hash[2:4]) + if not os.path.exists(path): + os.makedirs(path) + path = os.path.join(path, info_hash) + + with open(path, 'wb') as fp: + fp.write(info_dict) + + ofs += 1 + print(ofs)