Merge pull request #349 from nyaadevs/remove_info_mysql

Move bencoded info dicts from mysql torrent_info table to info_dict directory.
This commit is contained in:
A nyaa developer 2018-02-03 20:22:03 +01:00 committed by GitHub
commit e7f412eb8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 152 additions and 204 deletions

9
.gitignore vendored
View File

@ -14,16 +14,15 @@ __pycache__
# Databases
*.sql
test.db
/test.db
# Webserver
uwsgi.sock
/uwsgi.sock
# Application
install/*
config.py
/install/*
/config.py
/test_torrent_batch
torrents
# Other
*.swp

2
info_dicts/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -0,0 +1,57 @@
"""Remove bencoded info dicts from mysql
Revision ID: b61e4f6a88cc
Revises: cf7bf6d0e6bd
Create Date: 2017-08-29 01:45:08.357936
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
import sys
# revision identifiers, used by Alembic.
revision = 'b61e4f6a88cc'
down_revision = 'cf7bf6d0e6bd'
branch_labels = None
depends_on = None
def upgrade():
print("--- WARNING ---")
print("This migration drops the torrent_info tables.")
print("You will lose all of your .torrent files if you have not converted them beforehand.")
print("Use the migration script at utils/infodict_mysql2file.py")
print("Type OKAY and hit Enter to continue, CTRL-C to abort.")
print("--- WARNING ---")
try:
if input() != "OKAY":
sys.exit(1)
except KeyboardInterrupt:
sys.exit(1)
op.drop_table('sukebei_torrents_info')
op.drop_table('nyaa_torrents_info')
def downgrade():
op.create_table('nyaa_torrents_info',
sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True),
sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False),
sa.ForeignKeyConstraint(['torrent_id'], ['nyaa_torrents.id'], name='nyaa_torrents_info_ibfk_1', ondelete='CASCADE'),
sa.PrimaryKeyConstraint('torrent_id'),
mysql_collate='utf8_bin',
mysql_default_charset='utf8',
mysql_engine='InnoDB',
mysql_row_format='COMPRESSED'
)
op.create_table('sukebei_torrents_info',
sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True),
sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False),
sa.ForeignKeyConstraint(['torrent_id'], ['sukebei_torrents.id'], name='sukebei_torrents_info_ibfk_1', ondelete='CASCADE'),
sa.PrimaryKeyConstraint('torrent_id'),
mysql_collate='utf8_bin',
mysql_default_charset='utf8',
mysql_engine='InnoDB',
mysql_row_format='COMPRESSED'
)

View File

@ -1,13 +1,11 @@
import binascii
import functools
import json
import os.path
import re
import flask
from nyaa import backend, bencode, forms, models, utils
from nyaa.extensions import db
from nyaa import backend, forms, models
from nyaa.views.torrents import _create_upload_category_choices
api_blueprint = flask.Blueprint('api', __name__, url_prefix='/api')
@ -120,142 +118,6 @@ def v2_api_upload():
return flask.jsonify({'errors': mapped_errors}), 400
# #################################### TEMPORARY ####################################
from orderedset import OrderedSet # noqa: E402 isort:skip
@api_blueprint.route('/ghetto_import', methods=['POST'])
def ghetto_import():
if flask.request.remote_addr != '127.0.0.1':
return flask.error(403)
torrent_file = flask.request.files.get('torrent')
try:
torrent_dict = bencode.decode(torrent_file)
# field.data.close()
except (bencode.MalformedBencodeException, UnicodeError):
return 'Malformed torrent file', 500
try:
forms._validate_torrent_metadata(torrent_dict)
except AssertionError as e:
return 'Malformed torrent metadata ({})'.format(e.args[0]), 500
try:
tracker_found = forms._validate_trackers(torrent_dict) # noqa F841
except AssertionError as e:
return 'Malformed torrent trackers ({})'.format(e.args[0]), 500
bencoded_info_dict = bencode.encode(torrent_dict['info'])
info_hash = utils.sha1_hash(bencoded_info_dict)
# Check if the info_hash exists already in the database
torrent = models.Torrent.by_info_hash(info_hash)
if not torrent:
return 'This torrent does not exists', 500
if torrent.has_torrent:
return 'This torrent already has_torrent', 500
# Torrent is legit, pass original filename and dict along
torrent_data = forms.TorrentFileData(filename=os.path.basename(torrent_file.filename),
torrent_dict=torrent_dict,
info_hash=info_hash,
bencoded_info_dict=bencoded_info_dict)
# The torrent has been validated and is safe to access with ['foo'] etc - all relevant
# keys and values have been checked for (see UploadForm in forms.py for details)
info_dict = torrent_data.torrent_dict['info']
changed_to_utf8 = backend._replace_utf8_values(torrent_data.torrent_dict)
torrent_filesize = info_dict.get('length') or sum(
f['length'] for f in info_dict.get('files'))
# In case no encoding, assume UTF-8.
torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8')
# Store bencoded info_dict
torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict)
torrent.has_torrent = True
# To simplify parsing the filelist, turn single-file torrent into a list
torrent_filelist = info_dict.get('files')
used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding
parsed_file_tree = dict()
if not torrent_filelist:
# If single-file, the root will be the file-tree (no directory)
file_tree_root = parsed_file_tree
torrent_filelist = [{'length': torrent_filesize, 'path': [info_dict['name']]}]
else:
# If multi-file, use the directory name as root for files
file_tree_root = parsed_file_tree.setdefault(
info_dict['name'].decode(used_path_encoding), {})
# Parse file dicts into a tree
for file_dict in torrent_filelist:
# Decode path parts from utf8-bytes
path_parts = [path_part.decode(used_path_encoding) for path_part in file_dict['path']]
filename = path_parts.pop()
current_directory = file_tree_root
for directory in path_parts:
current_directory = current_directory.setdefault(directory, {})
# Don't add empty filenames (BitComet directory)
if filename:
current_directory[filename] = file_dict['length']
parsed_file_tree = utils.sorted_pathdict(parsed_file_tree)
json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8')
torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes)
db.session.add(torrent)
db.session.flush()
# Store the users trackers
trackers = OrderedSet()
announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii')
if announce:
trackers.add(announce)
# List of lists with single item
announce_list = torrent_data.torrent_dict.get('announce-list', [])
for announce in announce_list:
trackers.add(announce[0].decode('ascii'))
# Remove our trackers, maybe? TODO ?
# Search for/Add trackers in DB
db_trackers = OrderedSet()
for announce in trackers:
tracker = models.Trackers.by_uri(announce)
# Insert new tracker if not found
if not tracker:
tracker = models.Trackers(uri=announce)
db.session.add(tracker)
db.session.flush()
db_trackers.add(tracker)
# Store tracker refs in DB
for order, tracker in enumerate(db_trackers):
torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id,
tracker_id=tracker.id, order=order)
db.session.add(torrent_tracker)
db.session.commit()
return 'success'
# ####################################### INFO #######################################
ID_PATTERN = '^[0-9]+$'
INFO_HASH_PATTERN = '^[0-9a-fA-F]{40}$' # INFO_HASH as string

View File

@ -162,9 +162,10 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False):
# Delete exisiting torrent which is marked as deleted
if torrent_data.db_id is not None:
models.Torrent.query.filter_by(id=torrent_data.db_id).delete()
old_torrent = models.Torrent.by_id(torrent_data.db_id)
_delete_torrent_file(old_torrent)
db.session.delete(old_torrent)
db.session.commit()
_delete_cached_torrent_file(torrent_data.db_id)
# The torrent has been validated and is safe to access with ['foo'] etc - all relevant
# keys and values have been checked for (see UploadForm in forms.py for details)
@ -195,7 +196,15 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False):
uploader_ip=ip_address(flask.request.remote_addr).packed)
# Store bencoded info_dict
torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict)
info_dict_path = torrent.info_dict_path
info_dict_dir = os.path.dirname(info_dict_path)
if not os.path.exists(info_dict_dir):
os.makedirs(info_dict_dir)
with open(info_dict_path, 'wb') as out_file:
out_file.write(torrent_data.bencoded_info_dict)
torrent.stats = models.Statistic()
torrent.has_torrent = True
@ -361,9 +370,7 @@ def tracker_api(info_hashes, method):
return True
def _delete_cached_torrent_file(torrent_id):
# Note: obviously temporary
cached_torrent = os.path.join(app.config['BASE_DIR'],
'torrent_cache', str(torrent_id) + '.torrent')
if os.path.exists(cached_torrent):
os.remove(cached_torrent)
def _delete_torrent_file(torrent):
info_dict_path = torrent.info_dict_path
if os.path.exists(info_dict_path):
os.remove(info_dict_path)

View File

@ -1,4 +1,5 @@
import base64
import os.path
import re
from datetime import datetime
from enum import Enum, IntEnum
@ -170,11 +171,6 @@ class TorrentBase(DeclarativeHelperBase):
backref='torrents', lazy="joined",
primaryjoin=join_sql.format(cls.__flavor__))
@declarative.declared_attr
def info(cls):
return db.relationship(cls._flavor_prefix('TorrentInfo'), uselist=False,
cascade="all, delete-orphan", back_populates='torrent')
@declarative.declared_attr
def filelist(cls):
return db.relationship(cls._flavor_prefix('TorrentFilelist'), uselist=False,
@ -229,13 +225,21 @@ class TorrentBase(DeclarativeHelperBase):
# Escaped
return escape_markup(self.information)
@property
def info_dict_path(self):
''' Returns a path to the info_dict file in form of 'info_dicts/aa/bb/aabbccddee...' '''
info_hash = self.info_hash_as_hex
info_dict_dir = os.path.join(app.config['BASE_DIR'], 'info_dicts',
info_hash[0:2], info_hash[2:4])
return os.path.join(info_dict_dir, info_hash)
@property
def info_hash_as_b32(self):
return base64.b32encode(self.info_hash).decode('utf-8')
@property
def info_hash_as_hex(self):
return self.info_hash.hex()
return self.info_hash.hex().lower()
@property
def magnet_uri(self):
@ -290,22 +294,6 @@ class TorrentFilelistBase(DeclarativeHelperBase):
back_populates='filelist')
class TorrentInfoBase(DeclarativeHelperBase):
__tablename_base__ = 'torrents_info'
__table_args__ = {'mysql_row_format': 'COMPRESSED'}
@declarative.declared_attr
def torrent_id(cls):
return db.Column(db.Integer, db.ForeignKey(
cls._table_prefix('torrents.id'), ondelete="CASCADE"), primary_key=True)
info_dict = db.Column(MediumBlobType, nullable=True)
@declarative.declared_attr
def torrent(cls):
return db.relationship(cls._flavor_prefix('Torrent'), uselist=False, back_populates='info')
class StatisticBase(DeclarativeHelperBase):
__tablename_base__ = 'statistics'
@ -806,15 +794,6 @@ class SukebeiTorrentFilelist(TorrentFilelistBase, db.Model):
__flavor__ = 'Sukebei'
# TorrentInfo
class NyaaTorrentInfo(TorrentInfoBase, db.Model):
__flavor__ = 'Nyaa'
class SukebeiTorrentInfo(TorrentInfoBase, db.Model):
__flavor__ = 'Sukebei'
# Statistic
class NyaaStatistic(StatisticBase, db.Model):
__flavor__ = 'Nyaa'
@ -882,7 +861,6 @@ class SukebeiReport(ReportBase, db.Model):
if config['SITE_FLAVOR'] == 'nyaa':
Torrent = NyaaTorrent
TorrentFilelist = NyaaTorrentFilelist
TorrentInfo = NyaaTorrentInfo
Statistic = NyaaStatistic
TorrentTrackers = NyaaTorrentTrackers
MainCategory = NyaaMainCategory
@ -895,7 +873,6 @@ if config['SITE_FLAVOR'] == 'nyaa':
elif config['SITE_FLAVOR'] == 'sukebei':
Torrent = SukebeiTorrent
TorrentFilelist = SukebeiTorrentFilelist
TorrentInfo = SukebeiTorrentInfo
Statistic = SukebeiStatistic
TorrentTrackers = SukebeiTorrentTrackers
MainCategory = SukebeiMainCategory

View File

@ -118,7 +118,7 @@ def create_default_metadata_base(torrent, trackers=None, webseeds=None):
return metadata_base
def create_bencoded_torrent(torrent, metadata_base=None):
def create_bencoded_torrent(torrent, bencoded_info, metadata_base=None):
''' Creates a bencoded torrent metadata for a given torrent,
optionally using a given metadata_base dict (note: 'info' key will be
popped off the dict) '''
@ -135,7 +135,6 @@ def create_bencoded_torrent(torrent, metadata_base=None):
prefix = bencode.encode(prefixed_dict)
suffix = bencode.encode(suffixed_dict)
bencoded_info = torrent.info.info_dict
bencoded_torrent = prefix[:-1] + b'4:info' + bencoded_info + suffix[1:]
return bencoded_torrent

View File

@ -1,5 +1,4 @@
import json
import os.path
from ipaddress import ip_address
from urllib.parse import quote
@ -319,7 +318,7 @@ def download_torrent(torrent_id):
if torrent.deleted and not (flask.g.user and flask.g.user.is_moderator):
flask.abort(404)
torrent_file, torrent_file_size = _get_cached_torrent_file(torrent)
torrent_file, torrent_file_size = _make_torrent_file(torrent)
disposition = 'inline; filename="{0}"; filename*=UTF-8\'\'{0}'.format(
quote(torrent.torrent_name.encode('utf-8')))
@ -472,18 +471,10 @@ def _create_upload_category_choices():
return choices
def _get_cached_torrent_file(torrent):
# Note: obviously temporary
cached_torrent = os.path.join(app.config['BASE_DIR'],
'torrent_cache', str(torrent.id) + '.torrent')
if not os.path.exists(cached_torrent):
with open(cached_torrent, 'wb') as out_file:
metadata_base = torrents.create_default_metadata_base(torrent)
# Replace the default comment with url to the torrent page
metadata_base['comment'] = flask.url_for('torrents.view',
torrent_id=torrent.id,
_external=True)
def _make_torrent_file(torrent):
with open(torrent.info_dict_path, 'rb') as in_file:
bencoded_info = in_file.read()
out_file.write(torrents.create_bencoded_torrent(torrent, metadata_base))
bencoded_torrent_data = torrents.create_bencoded_torrent(torrent, bencoded_info)
return open(cached_torrent, 'rb'), os.path.getsize(cached_torrent)
return bencoded_torrent_data, len(bencoded_torrent_data)

View File

@ -1 +0,0 @@
*.torrent

2
torrents/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

53
utils/infodict_mysql2file.py Executable file
View File

@ -0,0 +1,53 @@
#!/usr/bin/python
import os
import sys
import MySQLdb
import MySQLdb.cursors
if len(sys.argv) < 3 or len(sys.argv) > 4:
print("Usage: {0} <prefix(nyaa|sukebei)> <outdir> [offset]".format(sys.argv[0]))
sys.exit(1)
ofs = 0
prefix = sys.argv[1]
outdir = sys.argv[2]
if not os.path.exists(outdir):
os.makedirs(outdir)
if len(sys.argv) == 4:
ofs = int(sys.argv[3])
db = MySQLdb.connect(host='localhost',
user='test',
passwd='test123',
db='nyaav2',
cursorclass=MySQLdb.cursors.SSCursor)
cur = db.cursor()
cur.execute(
"""SELECT
id,
info_hash,
info_dict
FROM
{0}_torrents
JOIN {0}_torrents_info ON torrent_id = id
LIMIT 18446744073709551610 OFFSET {1}
""".format(prefix, ofs))
for row in cur:
id = row[0]
info_hash = row[1].hex().lower()
info_dict = row[2]
path = os.path.join(outdir, info_hash[0:2], info_hash[2:4])
if not os.path.exists(path):
os.makedirs(path)
path = os.path.join(path, info_hash)
with open(path, 'wb') as fp:
fp.write(info_dict)
ofs += 1
print(ofs)