Source code for matador.db.connect

# coding: utf-8
# Distributed under the terms of the MIT License.

""" Some simple utilities for making DB connections. """

import pymongo as pm
from matador.config import load_custom_settings

[docs]def make_connection_to_collection(coll_names, check_collection=False, allow_changelog=False, mongo_settings=None, override=False, import_mode=False, quiet=True, debug=False): """ Connect to database of choice. Parameters: coll_names (str): name of collection. Keyword Arguments: check_collection (bool): check whether collections exist (forces connection) allow_changelog (bool): allow queries to collections with names prefixed by __ mongo_settings (dict): dict containing mongo and related config override (bool): don't ask for user input from stdin and assume all is well quiet (bool): don't print very much. Returns: client (MongoClient): the connection to the database db (Database): the database to query collections (dict): Collection objects indexed by name """ if mongo_settings is None: settings = load_custom_settings(no_quickstart=override) else: settings = mongo_settings if not quiet: print('Trying to connect to {host}:{port}/{db}'.format(**settings['mongo'])) client = pm.MongoClient( host=settings['mongo']['host'], port=settings['mongo']['port'], connect=False, maxIdleTimeMS=600000, # disconnect after 10 minutes idle socketTimeoutMS=3600000, # give up on database after 1 hr without results serverSelectionTimeoutMS=10000, # give up on server after 2 seconds without results connectTimeoutMS=10000) # give up trying to connect to new database after 2 seconds try: database_names = client.list_database_names() if not quiet: print('Success!') except pm.errors.ServerSelectionTimeoutError as exc: print('{}: {}'.format(type(exc).__name__, exc)) raise SystemExit('Unable to connect to {host}:{port}/{db}, exiting...'.format(**settings['mongo'])) if settings['mongo']['db'] not in database_names: if override: response = 'y' else: response = input('Database {db} does not exist at {host}:{port}/{db}, ' 'would you like to create it? (y/n) ' .format(**settings['mongo'])) if response.lower() != 'y': raise SystemExit('Exiting...') else: print('Creating database {}'.format(settings['mongo']['db'])) db = client[settings['mongo']['db']] possible_collections = [name for name in db.list_collection_names() if not name.startswith('__')] collections = dict() # allow lists of collections for backwards-compat, though normally # we only want to connect to one at a time if coll_names is not None: if not isinstance(coll_names, list): coll_names = [coll_names] if len(coll_names) > 1: raise NotImplementedError("Querying multiple collections is no longer supported.") for collection in coll_names: if not allow_changelog: if collection.startswith('__'): raise SystemExit('Queries to collections prefixed with __ are VERBOTEN!') if collection not in possible_collections: options = fuzzy_collname_match(collection, possible_collections) if not options and check_collection: client.close() raise SystemExit('Collection {} not found!'.format(collection)) else: print('Collection {} not found, did you mean one of these?'.format(collection)) for ind, value in enumerate(options[:10]): print('({}):\t{}'.format(ind, value)) if check_collection: try: choice = int(input('Please enter your choice: ')) collection = options[choice] except Exception: raise SystemExit('Invalid choice. Exiting...') elif import_mode: if override: choice = 'y' else: choice = input('Are you sure you want to make a new collection called {}? (y/n) ' .format(collection)) if choice.lower() != 'y' and choice.lower != 'yes': try: choice = int(input('Then please enter your choice from above: ')) collection = options[choice] except Exception: raise SystemExit('Invalid choice. Exiting...') collections[collection] = db[collection] else: default_collection = settings['mongo']['default_collection'] if default_collection not in possible_collections: if check_collection: client.close() raise SystemExit('Default collection {} not found!'.format(default_collection)) else: print('Creating new collection {}...'.format(default_collection)) collections['repo'] = db[default_collection] return client, db, collections
[docs]def fuzzy_collname_match(trial, targets): """ Do a noddy fuzzy match for bits between punctuation, e.g. matthews_cool_database will search for matthews, cool and database in the known collection names. Parameters: trial (str): database search name. targets (list): list of existing database names. Returns: list: list of roughly matching collection names ordered by occurence of tokens. """ split_chars = set([char for char in trial if (not char.isalpha() and not char.isdigit())]) tokens = trial for char in split_chars: tokens = tokens.replace(char, ' ') tokens = tokens.split() options = {} for token in tokens: for option in targets: if token in option: if option not in options: options[option] = 0 else: options[option] += 1 options_list = [item[0] for item in list(sorted(options.items(), reverse=True, key=lambda item: item[1]))] return options_list