mirror of
https://gitlab.com/SIGBUS/nyaa.git
synced 2025-01-25 00:35:12 +00:00
Merge pull request #349 from nyaadevs/remove_info_mysql
Move bencoded info dicts from mysql torrent_info table to info_dict directory.
This commit is contained in:
commit
e7f412eb8f
9
.gitignore
vendored
9
.gitignore
vendored
|
@ -14,16 +14,15 @@ __pycache__
|
|||
|
||||
# Databases
|
||||
*.sql
|
||||
test.db
|
||||
/test.db
|
||||
|
||||
# Webserver
|
||||
uwsgi.sock
|
||||
/uwsgi.sock
|
||||
|
||||
# Application
|
||||
install/*
|
||||
config.py
|
||||
/install/*
|
||||
/config.py
|
||||
/test_torrent_batch
|
||||
torrents
|
||||
|
||||
# Other
|
||||
*.swp
|
||||
|
|
2
info_dicts/.gitignore
vendored
Normal file
2
info_dicts/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
57
migrations/versions/b61e4f6a88cc_del_torrents_info.py
Normal file
57
migrations/versions/b61e4f6a88cc_del_torrents_info.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
"""Remove bencoded info dicts from mysql
|
||||
|
||||
Revision ID: b61e4f6a88cc
|
||||
Revises: cf7bf6d0e6bd
|
||||
Create Date: 2017-08-29 01:45:08.357936
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import mysql
|
||||
import sys
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'b61e4f6a88cc'
|
||||
down_revision = 'cf7bf6d0e6bd'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
print("--- WARNING ---")
|
||||
print("This migration drops the torrent_info tables.")
|
||||
print("You will lose all of your .torrent files if you have not converted them beforehand.")
|
||||
print("Use the migration script at utils/infodict_mysql2file.py")
|
||||
print("Type OKAY and hit Enter to continue, CTRL-C to abort.")
|
||||
print("--- WARNING ---")
|
||||
try:
|
||||
if input() != "OKAY":
|
||||
sys.exit(1)
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(1)
|
||||
|
||||
op.drop_table('sukebei_torrents_info')
|
||||
op.drop_table('nyaa_torrents_info')
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.create_table('nyaa_torrents_info',
|
||||
sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True),
|
||||
sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False),
|
||||
sa.ForeignKeyConstraint(['torrent_id'], ['nyaa_torrents.id'], name='nyaa_torrents_info_ibfk_1', ondelete='CASCADE'),
|
||||
sa.PrimaryKeyConstraint('torrent_id'),
|
||||
mysql_collate='utf8_bin',
|
||||
mysql_default_charset='utf8',
|
||||
mysql_engine='InnoDB',
|
||||
mysql_row_format='COMPRESSED'
|
||||
)
|
||||
op.create_table('sukebei_torrents_info',
|
||||
sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True),
|
||||
sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False),
|
||||
sa.ForeignKeyConstraint(['torrent_id'], ['sukebei_torrents.id'], name='sukebei_torrents_info_ibfk_1', ondelete='CASCADE'),
|
||||
sa.PrimaryKeyConstraint('torrent_id'),
|
||||
mysql_collate='utf8_bin',
|
||||
mysql_default_charset='utf8',
|
||||
mysql_engine='InnoDB',
|
||||
mysql_row_format='COMPRESSED'
|
||||
)
|
|
@ -1,13 +1,11 @@
|
|||
import binascii
|
||||
import functools
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
|
||||
import flask
|
||||
|
||||
from nyaa import backend, bencode, forms, models, utils
|
||||
from nyaa.extensions import db
|
||||
from nyaa import backend, forms, models
|
||||
from nyaa.views.torrents import _create_upload_category_choices
|
||||
|
||||
api_blueprint = flask.Blueprint('api', __name__, url_prefix='/api')
|
||||
|
@ -120,142 +118,6 @@ def v2_api_upload():
|
|||
return flask.jsonify({'errors': mapped_errors}), 400
|
||||
|
||||
|
||||
# #################################### TEMPORARY ####################################
|
||||
|
||||
from orderedset import OrderedSet # noqa: E402 isort:skip
|
||||
|
||||
|
||||
@api_blueprint.route('/ghetto_import', methods=['POST'])
|
||||
def ghetto_import():
|
||||
if flask.request.remote_addr != '127.0.0.1':
|
||||
return flask.error(403)
|
||||
|
||||
torrent_file = flask.request.files.get('torrent')
|
||||
|
||||
try:
|
||||
torrent_dict = bencode.decode(torrent_file)
|
||||
# field.data.close()
|
||||
except (bencode.MalformedBencodeException, UnicodeError):
|
||||
return 'Malformed torrent file', 500
|
||||
|
||||
try:
|
||||
forms._validate_torrent_metadata(torrent_dict)
|
||||
except AssertionError as e:
|
||||
return 'Malformed torrent metadata ({})'.format(e.args[0]), 500
|
||||
|
||||
try:
|
||||
tracker_found = forms._validate_trackers(torrent_dict) # noqa F841
|
||||
except AssertionError as e:
|
||||
return 'Malformed torrent trackers ({})'.format(e.args[0]), 500
|
||||
|
||||
bencoded_info_dict = bencode.encode(torrent_dict['info'])
|
||||
info_hash = utils.sha1_hash(bencoded_info_dict)
|
||||
|
||||
# Check if the info_hash exists already in the database
|
||||
torrent = models.Torrent.by_info_hash(info_hash)
|
||||
if not torrent:
|
||||
return 'This torrent does not exists', 500
|
||||
|
||||
if torrent.has_torrent:
|
||||
return 'This torrent already has_torrent', 500
|
||||
|
||||
# Torrent is legit, pass original filename and dict along
|
||||
torrent_data = forms.TorrentFileData(filename=os.path.basename(torrent_file.filename),
|
||||
torrent_dict=torrent_dict,
|
||||
info_hash=info_hash,
|
||||
bencoded_info_dict=bencoded_info_dict)
|
||||
|
||||
# The torrent has been validated and is safe to access with ['foo'] etc - all relevant
|
||||
# keys and values have been checked for (see UploadForm in forms.py for details)
|
||||
info_dict = torrent_data.torrent_dict['info']
|
||||
|
||||
changed_to_utf8 = backend._replace_utf8_values(torrent_data.torrent_dict)
|
||||
|
||||
torrent_filesize = info_dict.get('length') or sum(
|
||||
f['length'] for f in info_dict.get('files'))
|
||||
|
||||
# In case no encoding, assume UTF-8.
|
||||
torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8')
|
||||
|
||||
# Store bencoded info_dict
|
||||
torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict)
|
||||
torrent.has_torrent = True
|
||||
|
||||
# To simplify parsing the filelist, turn single-file torrent into a list
|
||||
torrent_filelist = info_dict.get('files')
|
||||
|
||||
used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding
|
||||
|
||||
parsed_file_tree = dict()
|
||||
if not torrent_filelist:
|
||||
# If single-file, the root will be the file-tree (no directory)
|
||||
file_tree_root = parsed_file_tree
|
||||
torrent_filelist = [{'length': torrent_filesize, 'path': [info_dict['name']]}]
|
||||
else:
|
||||
# If multi-file, use the directory name as root for files
|
||||
file_tree_root = parsed_file_tree.setdefault(
|
||||
info_dict['name'].decode(used_path_encoding), {})
|
||||
|
||||
# Parse file dicts into a tree
|
||||
for file_dict in torrent_filelist:
|
||||
# Decode path parts from utf8-bytes
|
||||
path_parts = [path_part.decode(used_path_encoding) for path_part in file_dict['path']]
|
||||
|
||||
filename = path_parts.pop()
|
||||
current_directory = file_tree_root
|
||||
|
||||
for directory in path_parts:
|
||||
current_directory = current_directory.setdefault(directory, {})
|
||||
|
||||
# Don't add empty filenames (BitComet directory)
|
||||
if filename:
|
||||
current_directory[filename] = file_dict['length']
|
||||
|
||||
parsed_file_tree = utils.sorted_pathdict(parsed_file_tree)
|
||||
|
||||
json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8')
|
||||
torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes)
|
||||
|
||||
db.session.add(torrent)
|
||||
db.session.flush()
|
||||
|
||||
# Store the users trackers
|
||||
trackers = OrderedSet()
|
||||
announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii')
|
||||
if announce:
|
||||
trackers.add(announce)
|
||||
|
||||
# List of lists with single item
|
||||
announce_list = torrent_data.torrent_dict.get('announce-list', [])
|
||||
for announce in announce_list:
|
||||
trackers.add(announce[0].decode('ascii'))
|
||||
|
||||
# Remove our trackers, maybe? TODO ?
|
||||
|
||||
# Search for/Add trackers in DB
|
||||
db_trackers = OrderedSet()
|
||||
for announce in trackers:
|
||||
tracker = models.Trackers.by_uri(announce)
|
||||
|
||||
# Insert new tracker if not found
|
||||
if not tracker:
|
||||
tracker = models.Trackers(uri=announce)
|
||||
db.session.add(tracker)
|
||||
db.session.flush()
|
||||
|
||||
db_trackers.add(tracker)
|
||||
|
||||
# Store tracker refs in DB
|
||||
for order, tracker in enumerate(db_trackers):
|
||||
torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id,
|
||||
tracker_id=tracker.id, order=order)
|
||||
db.session.add(torrent_tracker)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return 'success'
|
||||
|
||||
|
||||
# ####################################### INFO #######################################
|
||||
ID_PATTERN = '^[0-9]+$'
|
||||
INFO_HASH_PATTERN = '^[0-9a-fA-F]{40}$' # INFO_HASH as string
|
||||
|
|
|
@ -162,9 +162,10 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False):
|
|||
|
||||
# Delete exisiting torrent which is marked as deleted
|
||||
if torrent_data.db_id is not None:
|
||||
models.Torrent.query.filter_by(id=torrent_data.db_id).delete()
|
||||
old_torrent = models.Torrent.by_id(torrent_data.db_id)
|
||||
_delete_torrent_file(old_torrent)
|
||||
db.session.delete(old_torrent)
|
||||
db.session.commit()
|
||||
_delete_cached_torrent_file(torrent_data.db_id)
|
||||
|
||||
# The torrent has been validated and is safe to access with ['foo'] etc - all relevant
|
||||
# keys and values have been checked for (see UploadForm in forms.py for details)
|
||||
|
@ -195,7 +196,15 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False):
|
|||
uploader_ip=ip_address(flask.request.remote_addr).packed)
|
||||
|
||||
# Store bencoded info_dict
|
||||
torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict)
|
||||
info_dict_path = torrent.info_dict_path
|
||||
|
||||
info_dict_dir = os.path.dirname(info_dict_path)
|
||||
if not os.path.exists(info_dict_dir):
|
||||
os.makedirs(info_dict_dir)
|
||||
|
||||
with open(info_dict_path, 'wb') as out_file:
|
||||
out_file.write(torrent_data.bencoded_info_dict)
|
||||
|
||||
torrent.stats = models.Statistic()
|
||||
torrent.has_torrent = True
|
||||
|
||||
|
@ -361,9 +370,7 @@ def tracker_api(info_hashes, method):
|
|||
return True
|
||||
|
||||
|
||||
def _delete_cached_torrent_file(torrent_id):
|
||||
# Note: obviously temporary
|
||||
cached_torrent = os.path.join(app.config['BASE_DIR'],
|
||||
'torrent_cache', str(torrent_id) + '.torrent')
|
||||
if os.path.exists(cached_torrent):
|
||||
os.remove(cached_torrent)
|
||||
def _delete_torrent_file(torrent):
|
||||
info_dict_path = torrent.info_dict_path
|
||||
if os.path.exists(info_dict_path):
|
||||
os.remove(info_dict_path)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import base64
|
||||
import os.path
|
||||
import re
|
||||
from datetime import datetime
|
||||
from enum import Enum, IntEnum
|
||||
|
@ -170,11 +171,6 @@ class TorrentBase(DeclarativeHelperBase):
|
|||
backref='torrents', lazy="joined",
|
||||
primaryjoin=join_sql.format(cls.__flavor__))
|
||||
|
||||
@declarative.declared_attr
|
||||
def info(cls):
|
||||
return db.relationship(cls._flavor_prefix('TorrentInfo'), uselist=False,
|
||||
cascade="all, delete-orphan", back_populates='torrent')
|
||||
|
||||
@declarative.declared_attr
|
||||
def filelist(cls):
|
||||
return db.relationship(cls._flavor_prefix('TorrentFilelist'), uselist=False,
|
||||
|
@ -229,13 +225,21 @@ class TorrentBase(DeclarativeHelperBase):
|
|||
# Escaped
|
||||
return escape_markup(self.information)
|
||||
|
||||
@property
|
||||
def info_dict_path(self):
|
||||
''' Returns a path to the info_dict file in form of 'info_dicts/aa/bb/aabbccddee...' '''
|
||||
info_hash = self.info_hash_as_hex
|
||||
info_dict_dir = os.path.join(app.config['BASE_DIR'], 'info_dicts',
|
||||
info_hash[0:2], info_hash[2:4])
|
||||
return os.path.join(info_dict_dir, info_hash)
|
||||
|
||||
@property
|
||||
def info_hash_as_b32(self):
|
||||
return base64.b32encode(self.info_hash).decode('utf-8')
|
||||
|
||||
@property
|
||||
def info_hash_as_hex(self):
|
||||
return self.info_hash.hex()
|
||||
return self.info_hash.hex().lower()
|
||||
|
||||
@property
|
||||
def magnet_uri(self):
|
||||
|
@ -290,22 +294,6 @@ class TorrentFilelistBase(DeclarativeHelperBase):
|
|||
back_populates='filelist')
|
||||
|
||||
|
||||
class TorrentInfoBase(DeclarativeHelperBase):
|
||||
__tablename_base__ = 'torrents_info'
|
||||
|
||||
__table_args__ = {'mysql_row_format': 'COMPRESSED'}
|
||||
|
||||
@declarative.declared_attr
|
||||
def torrent_id(cls):
|
||||
return db.Column(db.Integer, db.ForeignKey(
|
||||
cls._table_prefix('torrents.id'), ondelete="CASCADE"), primary_key=True)
|
||||
info_dict = db.Column(MediumBlobType, nullable=True)
|
||||
|
||||
@declarative.declared_attr
|
||||
def torrent(cls):
|
||||
return db.relationship(cls._flavor_prefix('Torrent'), uselist=False, back_populates='info')
|
||||
|
||||
|
||||
class StatisticBase(DeclarativeHelperBase):
|
||||
__tablename_base__ = 'statistics'
|
||||
|
||||
|
@ -806,15 +794,6 @@ class SukebeiTorrentFilelist(TorrentFilelistBase, db.Model):
|
|||
__flavor__ = 'Sukebei'
|
||||
|
||||
|
||||
# TorrentInfo
|
||||
class NyaaTorrentInfo(TorrentInfoBase, db.Model):
|
||||
__flavor__ = 'Nyaa'
|
||||
|
||||
|
||||
class SukebeiTorrentInfo(TorrentInfoBase, db.Model):
|
||||
__flavor__ = 'Sukebei'
|
||||
|
||||
|
||||
# Statistic
|
||||
class NyaaStatistic(StatisticBase, db.Model):
|
||||
__flavor__ = 'Nyaa'
|
||||
|
@ -882,7 +861,6 @@ class SukebeiReport(ReportBase, db.Model):
|
|||
if config['SITE_FLAVOR'] == 'nyaa':
|
||||
Torrent = NyaaTorrent
|
||||
TorrentFilelist = NyaaTorrentFilelist
|
||||
TorrentInfo = NyaaTorrentInfo
|
||||
Statistic = NyaaStatistic
|
||||
TorrentTrackers = NyaaTorrentTrackers
|
||||
MainCategory = NyaaMainCategory
|
||||
|
@ -895,7 +873,6 @@ if config['SITE_FLAVOR'] == 'nyaa':
|
|||
elif config['SITE_FLAVOR'] == 'sukebei':
|
||||
Torrent = SukebeiTorrent
|
||||
TorrentFilelist = SukebeiTorrentFilelist
|
||||
TorrentInfo = SukebeiTorrentInfo
|
||||
Statistic = SukebeiStatistic
|
||||
TorrentTrackers = SukebeiTorrentTrackers
|
||||
MainCategory = SukebeiMainCategory
|
||||
|
|
|
@ -118,7 +118,7 @@ def create_default_metadata_base(torrent, trackers=None, webseeds=None):
|
|||
return metadata_base
|
||||
|
||||
|
||||
def create_bencoded_torrent(torrent, metadata_base=None):
|
||||
def create_bencoded_torrent(torrent, bencoded_info, metadata_base=None):
|
||||
''' Creates a bencoded torrent metadata for a given torrent,
|
||||
optionally using a given metadata_base dict (note: 'info' key will be
|
||||
popped off the dict) '''
|
||||
|
@ -135,7 +135,6 @@ def create_bencoded_torrent(torrent, metadata_base=None):
|
|||
prefix = bencode.encode(prefixed_dict)
|
||||
suffix = bencode.encode(suffixed_dict)
|
||||
|
||||
bencoded_info = torrent.info.info_dict
|
||||
bencoded_torrent = prefix[:-1] + b'4:info' + bencoded_info + suffix[1:]
|
||||
|
||||
return bencoded_torrent
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import json
|
||||
import os.path
|
||||
from ipaddress import ip_address
|
||||
from urllib.parse import quote
|
||||
|
||||
|
@ -319,7 +318,7 @@ def download_torrent(torrent_id):
|
|||
if torrent.deleted and not (flask.g.user and flask.g.user.is_moderator):
|
||||
flask.abort(404)
|
||||
|
||||
torrent_file, torrent_file_size = _get_cached_torrent_file(torrent)
|
||||
torrent_file, torrent_file_size = _make_torrent_file(torrent)
|
||||
disposition = 'inline; filename="{0}"; filename*=UTF-8\'\'{0}'.format(
|
||||
quote(torrent.torrent_name.encode('utf-8')))
|
||||
|
||||
|
@ -472,18 +471,10 @@ def _create_upload_category_choices():
|
|||
return choices
|
||||
|
||||
|
||||
def _get_cached_torrent_file(torrent):
|
||||
# Note: obviously temporary
|
||||
cached_torrent = os.path.join(app.config['BASE_DIR'],
|
||||
'torrent_cache', str(torrent.id) + '.torrent')
|
||||
if not os.path.exists(cached_torrent):
|
||||
with open(cached_torrent, 'wb') as out_file:
|
||||
metadata_base = torrents.create_default_metadata_base(torrent)
|
||||
# Replace the default comment with url to the torrent page
|
||||
metadata_base['comment'] = flask.url_for('torrents.view',
|
||||
torrent_id=torrent.id,
|
||||
_external=True)
|
||||
def _make_torrent_file(torrent):
|
||||
with open(torrent.info_dict_path, 'rb') as in_file:
|
||||
bencoded_info = in_file.read()
|
||||
|
||||
out_file.write(torrents.create_bencoded_torrent(torrent, metadata_base))
|
||||
bencoded_torrent_data = torrents.create_bencoded_torrent(torrent, bencoded_info)
|
||||
|
||||
return open(cached_torrent, 'rb'), os.path.getsize(cached_torrent)
|
||||
return bencoded_torrent_data, len(bencoded_torrent_data)
|
||||
|
|
1
torrent_cache/.gitignore
vendored
1
torrent_cache/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
*.torrent
|
2
torrents/.gitignore
vendored
Normal file
2
torrents/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
53
utils/infodict_mysql2file.py
Executable file
53
utils/infodict_mysql2file.py
Executable file
|
@ -0,0 +1,53 @@
|
|||
#!/usr/bin/python
|
||||
import os
|
||||
import sys
|
||||
|
||||
import MySQLdb
|
||||
import MySQLdb.cursors
|
||||
|
||||
if len(sys.argv) < 3 or len(sys.argv) > 4:
|
||||
print("Usage: {0} <prefix(nyaa|sukebei)> <outdir> [offset]".format(sys.argv[0]))
|
||||
sys.exit(1)
|
||||
|
||||
ofs = 0
|
||||
prefix = sys.argv[1]
|
||||
outdir = sys.argv[2]
|
||||
if not os.path.exists(outdir):
|
||||
os.makedirs(outdir)
|
||||
if len(sys.argv) == 4:
|
||||
ofs = int(sys.argv[3])
|
||||
|
||||
|
||||
db = MySQLdb.connect(host='localhost',
|
||||
user='test',
|
||||
passwd='test123',
|
||||
db='nyaav2',
|
||||
cursorclass=MySQLdb.cursors.SSCursor)
|
||||
cur = db.cursor()
|
||||
|
||||
cur.execute(
|
||||
"""SELECT
|
||||
id,
|
||||
info_hash,
|
||||
info_dict
|
||||
FROM
|
||||
{0}_torrents
|
||||
JOIN {0}_torrents_info ON torrent_id = id
|
||||
LIMIT 18446744073709551610 OFFSET {1}
|
||||
""".format(prefix, ofs))
|
||||
|
||||
for row in cur:
|
||||
id = row[0]
|
||||
info_hash = row[1].hex().lower()
|
||||
info_dict = row[2]
|
||||
|
||||
path = os.path.join(outdir, info_hash[0:2], info_hash[2:4])
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
path = os.path.join(path, info_hash)
|
||||
|
||||
with open(path, 'wb') as fp:
|
||||
fp.write(info_dict)
|
||||
|
||||
ofs += 1
|
||||
print(ofs)
|
Loading…
Reference in a new issue