From 76193599d09ad2986d9c15e9e113dbb3d7be6536 Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Thu, 25 Aug 2016 23:17:35 -0400 Subject: [PATCH] Be more stingy with connections I think the act of spinning up ~4000 MongoTargets for the Equities caused things to blow up; this way connections will only by created/used when operations are being performed. This is theoretically less efficient, but from what I've read in the documentation Mongo should have some pooling tricks to not sacrifice performance. --- metrik/__init__.py | 2 +- metrik/targets/mongo.py | 35 +++++++++++++++++++++++------------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/metrik/__init__.py b/metrik/__init__.py index 602e0e8..c195454 100644 --- a/metrik/__init__.py +++ b/metrik/__init__.py @@ -1,2 +1,2 @@ -__version__ = '0.3.6' +__version__ = '0.3.7' __release__ = __version__ \ No newline at end of file diff --git a/metrik/targets/mongo.py b/metrik/targets/mongo.py index eef8d96..9b1045a 100644 --- a/metrik/targets/mongo.py +++ b/metrik/targets/mongo.py @@ -1,30 +1,41 @@ from luigi import Target from pymongo import MongoClient -from metrik.conf import get_config from datetime import datetime +from contextlib import contextmanager + +from metrik.conf import get_config class MongoTarget(Target): - def __init__(self, collection, id): + @contextmanager + def get_db(self): config = get_config() - self.connection = MongoClient( + client = MongoClient( host=config.get('metrik', 'mongo_host'), - port=config.getint('metrik', 'mongo_port'))[ - config.get('metrik', 'mongo_database') - ] - self.collection = self.connection[collection] + port=config.getint('metrik', 'mongo_port')) + + yield client[config.get('metrik', 'mongo_database')] + + client.close() + + def __init__(self, collection, id): + self.collection = collection self.id = id def exists(self): - return self.collection.find_one({ - '_id': self.id - }) is not None + with self.get_db() as db: + return db[self.collection].find_one({ + '_id': self.id + }) is not None def persist(self, dict_object): id_dict = dict_object id_dict['_id'] = self.id - return self.collection.insert_one(id_dict).inserted_id + + with self.get_db() as db: + return db[self.collection].insert_one(id_dict).inserted_id def retrieve(self): - return self.collection.find_one({'_id': self.id}) + with self.get_db() as db: + return db[self.collection].find_one({'_id': self.id})