diff --git a/import_to_es.py b/import_to_es.py index ee1ec72..c2ed46f 100755 --- a/import_to_es.py +++ b/import_to_es.py @@ -21,6 +21,9 @@ app = create_app('config') es = Elasticsearch(timeout=30) ic = IndicesClient(es) +def pad_bytes(in_bytes, size): + return in_bytes + (b'\x00' * max(0, size - len(in_bytes))) + # turn into thing that elasticsearch indexes. We flatten in # the stats (seeders/leechers) so we can order by them in es naturally. # we _don't_ dereference uploader_id to the user's display name however, @@ -42,7 +45,7 @@ def mk_es(t, index_name): "created_time": t.created_time, # not analyzed but included so we can render magnet links # without querying sql again. - "info_hash": t.info_hash.hex(), + "info_hash": pad_bytes(t.info_hash, 20).hex(), "filesize": t.filesize, "uploader_id": t.uploader_id, "main_category_id": t.main_category_id, diff --git a/sync_es.py b/sync_es.py index 444c905..c4a9025 100755 --- a/sync_es.py +++ b/sync_es.py @@ -73,6 +73,9 @@ ES_CHUNK_SIZE = config.get('es_chunk_size', 10000) # interacts with es' refresh_interval setting. FLUSH_INTERVAL = config.get('flush_interval', 5) +def pad_bytes(in_bytes, size): + return in_bytes + (b'\x00' * max(0, size - len(in_bytes))) + def reindex_torrent(t, index_name): # XXX annoyingly different from import_to_es, and # you need to keep them in sync manually. @@ -85,7 +88,7 @@ def reindex_torrent(t, index_name): "description": t['description'], # not analyzed but included so we can render magnet links # without querying sql again. - "info_hash": t['info_hash'].hex(), + "info_hash": pad_bytes(t['info_hash'], 20).hex(), "filesize": t['filesize'], "uploader_id": t['uploader_id'], "main_category_id": t['main_category_id'],