From 7d56d68728926896c7a78cadff1acb35d6495207 Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Mon, 29 Aug 2016 18:36:14 -0400 Subject: [PATCH] Add a `merge` command to do a simple left-right merge I'm running this off a RPi at the moment, this is needed to get around the 2GB limit on a 32-bit MongoDB --- metrik/merge.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 3 ++- 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 metrik/merge.py diff --git a/metrik/merge.py b/metrik/merge.py new file mode 100644 index 0000000..53265ff --- /dev/null +++ b/metrik/merge.py @@ -0,0 +1,47 @@ +from pymongo import MongoClient +import logging +import argparse + + +def open_connection(host, port): + return MongoClient(host=host, port=port) + + +def merge(con1, con2, database_name='metrik'): + database1 = con1[database_name] + database2 = con2[database_name] + collections = database1.collection_names() + for collection_name in collections: + collection1 = database1[collection_name] + collection2 = database2[collection_name] + for item in collection1.find(): + if collection2.find({'_id': item['_id']}) is None: + collection2.save(item) + collection1.delete_one({'_id': item['_id']}) + else: + logging.warning('Not moving item {} as the same ID already' + ' exists in the `right` database.'.format( + item['_id'] + )) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-h', '--host-1', dest='host1', + help='The `left` database to copy from') + parser.add_argument('-g', '--host-2', dest='host2', + help='The `right` database to copy into') + parser.add_argument('-p', '--port-1', default=27017, dest='port1', type=int, + help='The port number of the `left` database') + parser.add_argument('-o', '--port-2', default=27017, dest='port2', type=int, + help='The port number of the `right` database') + parser.add_argument('-d', '--database', default='metrik', + help='The database to merge from one host to the other') + args = parser.parse_args() + con1 = open_connection(args.host1, args.port1) + con2 = open_connection(args.host2, args.port2) + merge(con1, con2, args.database) + + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index 6070625..df61d6d 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,8 @@ setup( ], entry_points={ 'console_scripts': [ - 'metrik = metrik.batch:handle_commandline' + 'metrik = metrik.batch:handle_commandline', + 'metrik-merge = metrik.merge:main' ] } ) \ No newline at end of file