import json import os import re from datetime import datetime, timedelta from ipaddress import ip_address import flask from werkzeug import secure_filename import sqlalchemy from orderedset import OrderedSet from nyaa import models, utils from nyaa.extensions import db app = flask.current_app # Blacklists for _validate_torrent_filenames # TODO: consider moving to config.py? CHARACTER_BLACKLIST = [ '\u202E', # RIGHT-TO-LEFT OVERRIDE ] FILENAME_BLACKLIST = [ # Windows reserved filenames 'con', 'nul', 'prn', 'aux', 'com0', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', 'lpt0', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9', ] # Invalid RSS characters regex, used to sanitize some strings ILLEGAL_XML_CHARS_RE = re.compile(u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]') def sanitize_string(string, replacement='\uFFFD'): ''' Simply replaces characters based on a regex ''' return ILLEGAL_XML_CHARS_RE.sub(replacement, string) class TorrentExtraValidationException(Exception): def __init__(self, errors={}): self.errors = errors @utils.cached_function def get_category_id_map(): ''' Reads database for categories and turns them into a dict with ids as keys and name list as the value, ala {'1_0': ['Anime'], '1_2': ['Anime', 'English-translated'], ...} ''' cat_id_map = {} for main_cat in models.MainCategory.query: cat_id_map[main_cat.id_as_string] = [main_cat.name] for sub_cat in main_cat.sub_categories: cat_id_map[sub_cat.id_as_string] = [main_cat.name, sub_cat.name] return cat_id_map def _replace_utf8_values(dict_or_list): ''' Will replace 'property' with 'property.utf-8' and remove latter if it exists. Thanks, bitcomet! :/ ''' did_change = False if isinstance(dict_or_list, dict): for key in [key for key in dict_or_list.keys() if key.endswith('.utf-8')]: dict_or_list[key.replace('.utf-8', '')] = dict_or_list.pop(key) did_change = True for value in dict_or_list.values(): did_change = _replace_utf8_values(value) or did_change elif isinstance(dict_or_list, list): for item in dict_or_list: did_change = _replace_utf8_values(item) or did_change return did_change def _recursive_dict_iterator(source): ''' Iterates over a given dict, yielding (key, value) pairs, recursing inside any dicts. ''' # TODO Make a proper dict-filetree walker for key, value in source.items(): yield (key, value) if isinstance(value, dict): for kv in _recursive_dict_iterator(value): yield kv def _validate_torrent_filenames(torrent): ''' Checks path parts of a torrent's filetree against blacklisted characters and filenames, returning False on rejection ''' file_tree = json.loads(torrent.filelist.filelist_blob.decode('utf-8')) for path_part, value in _recursive_dict_iterator(file_tree): if path_part.rsplit('.', 1)[0].lower() in FILENAME_BLACKLIST: return False if any(True for c in CHARACTER_BLACKLIST if c in path_part): return False return True def validate_torrent_post_upload(torrent, upload_form=None): ''' Validates a Torrent instance before it's saved to the database. Enforcing user-and-such-based validations is more flexible here vs WTForm context ''' errors = { 'torrent_file': [] } # Encorce minimum size for userless uploads minimum_anonymous_torrent_size = app.config['MINIMUM_ANONYMOUS_TORRENT_SIZE'] if torrent.user is None and torrent.filesize < minimum_anonymous_torrent_size: errors['torrent_file'].append('Torrent too small for an anonymous uploader') if not _validate_torrent_filenames(torrent): errors['torrent_file'].append('Torrent has forbidden characters in filenames') # Remove keys with empty lists errors = {k: v for k, v in errors.items() if v} if errors: if upload_form: # Add error messages to the form fields for field_name, field_errors in errors.items(): getattr(upload_form, field_name).errors.extend(field_errors) # Clear out the wtforms dict to force a regeneration upload_form._errors = None raise TorrentExtraValidationException(errors) def check_uploader_ratelimit(user): ''' Figures out if user (or IP address from flask.request) may upload within upload ratelimit. Returns a tuple of current datetime, count of torrents uploaded within burst duration and timestamp for next allowed upload. ''' now = datetime.utcnow() next_allowed_time = now Torrent = models.Torrent def filter_uploader(query): if user: return query.filter(sqlalchemy.or_( Torrent.user == user, Torrent.uploader_ip == ip_address(flask.request.remote_addr).packed)) else: return query.filter(Torrent.uploader_ip == ip_address(flask.request.remote_addr).packed) time_range_start = datetime.utcnow() - timedelta(seconds=app.config['UPLOAD_BURST_DURATION']) # Count torrents uploaded by user/ip within given time period torrent_count_query = db.session.query(sqlalchemy.func.count(Torrent.id)) torrent_count = filter_uploader(torrent_count_query).filter( Torrent.created_time >= time_range_start).scalar() # If user has reached burst limit... if torrent_count >= app.config['MAX_UPLOAD_BURST']: # Check how long ago their latest torrent was (we know at least one will exist) last_torrent = filter_uploader(Torrent.query).order_by(Torrent.created_time.desc()).first() after_timeout = last_torrent.created_time + timedelta(seconds=app.config['UPLOAD_TIMEOUT']) if now < after_timeout: next_allowed_time = after_timeout return now, torrent_count, next_allowed_time def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): ''' Stores a torrent to the database. May throw TorrentExtraValidationException if the form/torrent fails post-WTForm validation! Exception messages will also be added to their relevant fields on the given form. ''' torrent_data = upload_form.torrent_file.parsed_data # Anonymous uploaders and non-trusted uploaders no_or_new_account = (not uploading_user or (uploading_user.age < app.config['RATELIMIT_ACCOUNT_AGE'] and not uploading_user.is_trusted)) if app.config['RATELIMIT_UPLOADS'] and no_or_new_account: now, torrent_count, next_time = check_uploader_ratelimit(uploading_user) if next_time > now: # This will flag the dialog in upload.html red and tell API users what's wrong upload_form.ratelimit.errors = ["You've gone over the upload ratelimit."] raise TorrentExtraValidationException() if not uploading_user: if app.config['RAID_MODE_LIMIT_UPLOADS']: # XXX TODO: rename rangebanned to something more generic upload_form.rangebanned.errors = [app.config['RAID_MODE_UPLOADS_MESSAGE']] raise TorrentExtraValidationException() elif models.RangeBan.is_rangebanned(ip_address(flask.request.remote_addr).packed): upload_form.rangebanned.errors = ["Your IP is banned from " "uploading anonymously."] raise TorrentExtraValidationException() # Delete existing torrent which is marked as deleted if torrent_data.db_id is not None: old_torrent = models.Torrent.by_id(torrent_data.db_id) db.session.delete(old_torrent) db.session.commit() # Delete physical file after transaction has been committed _delete_info_dict(old_torrent) # The torrent has been validated and is safe to access with ['foo'] etc - all relevant # keys and values have been checked for (see UploadForm in forms.py for details) info_dict = torrent_data.torrent_dict['info'] changed_to_utf8 = _replace_utf8_values(torrent_data.torrent_dict) # Use uploader-given name or grab it from the torrent display_name = upload_form.display_name.data.strip() or info_dict['name'].decode('utf8').strip() information = (upload_form.information.data or '').strip() description = (upload_form.description.data or '').strip() # Sanitize fields display_name = sanitize_string(display_name) information = sanitize_string(information) description = sanitize_string(description) torrent_filesize = info_dict.get('length') or sum( f['length'] for f in info_dict.get('files')) # In case no encoding, assume UTF-8. torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8') torrent = models.Torrent(id=torrent_data.db_id, info_hash=torrent_data.info_hash, display_name=display_name, torrent_name=torrent_data.filename, information=information, description=description, encoding=torrent_encoding, filesize=torrent_filesize, user=uploading_user, uploader_ip=ip_address(flask.request.remote_addr).packed) # Store bencoded info_dict info_dict_path = torrent.info_dict_path info_dict_dir = os.path.dirname(info_dict_path) os.makedirs(info_dict_dir, exist_ok=True) with open(info_dict_path, 'wb') as out_file: out_file.write(torrent_data.bencoded_info_dict) torrent.stats = models.Statistic() torrent.has_torrent = True # Fields with default value will be None before first commit, so set .flags torrent.flags = 0 torrent.anonymous = upload_form.is_anonymous.data if uploading_user else True torrent.hidden = upload_form.is_hidden.data torrent.remake = upload_form.is_remake.data torrent.complete = upload_form.is_complete.data # Copy trusted status from user if possible can_mark_trusted = uploading_user and uploading_user.is_trusted # To do, automatically mark trusted if user is trusted unless user specifies otherwise torrent.trusted = upload_form.is_trusted.data if can_mark_trusted else False # Only allow mods to upload locked torrents can_mark_locked = uploading_user and uploading_user.is_moderator torrent.comment_locked = upload_form.is_comment_locked.data if can_mark_locked else False # Set category ids torrent.main_category_id, torrent.sub_category_id = \ upload_form.category.parsed_data.get_category_ids() # To simplify parsing the filelist, turn single-file torrent into a list torrent_filelist = info_dict.get('files') used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding parsed_file_tree = dict() if not torrent_filelist: # If single-file, the root will be the file-tree (no directory) file_tree_root = parsed_file_tree torrent_filelist = [{'length': torrent_filesize, 'path': [info_dict['name']]}] else: # If multi-file, use the directory name as root for files file_tree_root = parsed_file_tree.setdefault( info_dict['name'].decode(used_path_encoding), {}) # Parse file dicts into a tree for file_dict in torrent_filelist: # Decode path parts from utf8-bytes path_parts = [path_part.decode(used_path_encoding) for path_part in file_dict['path']] filename = path_parts.pop() current_directory = file_tree_root for directory in path_parts: current_directory = current_directory.setdefault(directory, {}) # Don't add empty filenames (BitComet directory) if filename: current_directory[filename] = file_dict['length'] parsed_file_tree = utils.sorted_pathdict(parsed_file_tree) json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8') torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes) db.session.add(torrent) db.session.flush() # Store the users trackers trackers = OrderedSet() announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii') if announce: trackers.add(announce) # List of lists with single item announce_list = torrent_data.torrent_dict.get('announce-list', []) for announce in announce_list: trackers.add(announce[0].decode('ascii')) # Store webseeds # qBittorrent doesn't omit url-list but sets it as '' even when there are no webseeds webseed_list = torrent_data.torrent_dict.get('url-list') or [] if isinstance(webseed_list, bytes): webseed_list = [webseed_list] # qB doesn't contain a sole url in a list webseeds = OrderedSet(webseed.decode('utf-8') for webseed in webseed_list) # Remove our trackers, maybe? TODO ? # Search for/Add trackers in DB db_trackers = OrderedSet() for announce in trackers: tracker = models.Trackers.by_uri(announce) # Insert new tracker if not found if not tracker: tracker = models.Trackers(uri=announce) db.session.add(tracker) db.session.flush() elif tracker.is_webseed: # If we have an announce marked webseed (user error, malicy?), reset it. # Better to have "bad" announces than "hiding" proper announces in webseeds/url-list. tracker.is_webseed = False db.session.flush() db_trackers.add(tracker) # Same for webseeds for webseed_url in webseeds: webseed = models.Trackers.by_uri(webseed_url) if not webseed: webseed = models.Trackers(uri=webseed_url, is_webseed=True) db.session.add(webseed) db.session.flush() # Don't add trackers into webseeds if webseed.is_webseed: db_trackers.add(webseed) # Store tracker refs in DB for order, tracker in enumerate(db_trackers): torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, tracker_id=tracker.id, order=order) db.session.add(torrent_tracker) # Before final commit, validate the torrent again validate_torrent_post_upload(torrent, upload_form) # Add to tracker whitelist db.session.add(models.TrackerApi(torrent.info_hash, 'insert')) db.session.commit() # Store the actual torrent file as well torrent_file = upload_form.torrent_file.data if app.config.get('BACKUP_TORRENT_FOLDER'): torrent_file.seek(0, 0) torrent_dir = app.config['BACKUP_TORRENT_FOLDER'] os.makedirs(torrent_dir, exist_ok=True) torrent_path = os.path.join(torrent_dir, '{}.{}'.format( torrent.id, secure_filename(torrent_file.filename))) torrent_file.save(torrent_path) torrent_file.close() return torrent def _delete_info_dict(torrent): info_dict_path = torrent.info_dict_path if os.path.exists(info_dict_path): os.remove(info_dict_path)