From fed612143032d09d8770f559fd6cca47beabf7cd Mon Sep 17 00:00:00 2001 From: Paul McCarthy <pauldmccarthy@gmail.com> Date: Thu, 9 Mar 2023 19:38:21 +0000 Subject: [PATCH] ENH,RF: - Renamed "overrides" to "selectors". - Flatten dictionaries in main config file, e.g. "[struct_T1] k = v" is flattened to struct_T1_k = v - New gettuple method - Added ability to override values e.g. from command-line. - Basic type validation applied to overrides - error if their parsed type does not match the stored type --- bip/utils/config.py | 291 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 236 insertions(+), 55 deletions(-) diff --git a/bip/utils/config.py b/bip/utils/config.py index edaf295..5e47e8e 100644 --- a/bip/utils/config.py +++ b/bip/utils/config.py @@ -1,6 +1,23 @@ #!/usr/bin/env python +"""This module provides the BIP Config class. The Config class is used +throughout BIP for accessing settings stored in BIP configuration files. +# Configuration file format + +TODO + +# Selectors - alternate setting values for specific scenarios + +TODO + +# Type validation + +TODO +""" + + +import copy import glob import functools as ft import itertools as it @@ -24,6 +41,19 @@ import bip log = logging.getLogger(__name__) +MAIN_CONFIG_FILE_NAME = 'config.toml' +"""Name of the primary BIP configuration file. This file contains global +settings, and may contain pipeline-specific settings which take precedence +over equivalent settings in secondary configuration files. +""" + + +DEFAULT_CONFIG_DIRECTORY = op.join(op.dirname(bip.__file__), 'data', 'config') +"""Default configuration directory. Used when a Config object is created with +specifying a directory. +""" + + def nested_lookup(d, key): """Look up a value in a nested dictionary based on the given key. For example, imagine we have a dictionary d: @@ -46,7 +76,74 @@ def nested_lookup(d, key): return nested_lookup(d, key[1:]) +def flatten_dictionary(d, nlevels=1): + """Flattens a nested dictionary. + + Given a dictionary such as: + + { + 'k1' : 'v1', + 'k2' : { + 'sk1' : 'sv1', + 'sk2' : 'sv2' + } + } + + this function will adjust the dictionary to have structure: + { + 'k1' : 'v1', + 'k2_sk1' : 'sv1' + 'k2_sk2' : 'sv2' + } + + If nlevels is 1 (the default), the function will only flatten + sub-dictionaries that are one-level deep. + """ + + d = copy.deepcopy(d) + + for key, val in list(d.items()): + if not isinstance(val, dict): + continue + + if nlevels > 1: + val = flatten_dictionary(val, nlevels - 1) + + for subkey, subval in val.items(): + subkey = f'{key}_{subkey}' + d[subkey] = subval + + d.pop(key) + + return d + + +def parse_override_value(name, origval, val): + """Use tomllib to coerce a string to a suitable type.""" + + # If the configuration file value is a string, + # return the override value unchanged + if isinstance(origval, str): + return val + + # Otherwise use tomllib to convert the value. + # Leave as a string on failures, but emit + # a warning. + try: + return tomllib.loads(f'val = {val}')['val'] + except Exception: + log.warning('Cannot parse override value for %s (%s) - ' + 'leaving value as a string.', name, val) + return val + + class Config: + """The Config class is a dictionary containing BIP settings. A Config + can be created by specifying the directory which contains all BIP + configuration files, e.g.: + + cfg = Config('path/to/config/dir/') + """ @staticmethod @@ -67,7 +164,10 @@ class Config: BIP settings may be grouped according to the file identifier - the file prefix is used solely for enforcing a particular ordering. """ - base = op.basename(fname).removesuffix('.toml') + base = op.basename(fname) + if base == MAIN_CONFIG_FILE_NAME: + return 'config' + base = base.removesuffix('.toml') # Drop file prefix if present return base.split('.')[-1] @@ -80,17 +180,17 @@ class Config: A BIP configuration is a directory containing one or more .toml files. - BIP configuration files are intended to be loaded in reverse- - alphabetical order, with settings in later files overwriting settings - in earlier files. + BIP configuration files are intended to be loaded in alphabetical + order, with settings in later files overwriting settings in earlier + files. The "config.toml" file must be loaded last, so that settings contained within it take precedence over all other files. """ cfgfiles = glob.glob(op.join(cfgdir, '*.toml')) - cfgfiles = sorted(cfgfiles, key=str.lower, reverse=True) - maincfg = op.join(cfgdir, 'config.toml') + cfgfiles = sorted(cfgfiles, key=str.lower) + maincfg = op.join(cfgdir, MAIN_CONFIG_FILE_NAME) # make sure main config is loaded last if maincfg in cfgfiles: @@ -101,28 +201,28 @@ class Config: @staticmethod - def resolve_overrides(settings, **overrides): - """Resolves and applies any "override" configuration settings. + def resolve_selectors(settings, selectors): + """Resolves and applies any "selector" configuration settings. A BIP configuration file may contain one default value for a setting, but may also contain alternate values for that setting which should be used in certain scenarios. Each alternate value - is associated with a set of "override" parameters, which are + is associated with a set of "selector" parameters, which are just key-value pairs. For example, a file may contain a default value for a setting called "param1", and an alternate value for param1 to be used when the - "subject" override parameter is set to "12345": + "subject" selector parameter is set to "12345": param1 = 0.5 subject.12345.param1 = 0.4 - If resolve_overrides is given these settings along with subject=12345, - the default value for param1 will be replaced with the override value. + If resolve_selectors is given these settings along with subject=12345, + the default value for param1 will be replaced with the selector value. - Multiple override parameters may be specified - in the configuration - file, the override parameters must be ordered alphabetically. For - example, if we have override parameters "subject" and "visit", the + Multiple selector parameters may be specified - in the configuration + file, the selector parameters must be ordered alphabetically. For + example, if we have selector parameters "subject" and "visit", the alternate values for subject=123 and visit=2 must be specified as: param1 = 0.5 @@ -134,20 +234,21 @@ class Config: visit.2.subject.123.param1 = 0.3 """ - # Take the override parameters, and generate all possible + settings = copy.deepcopy(settings) + + # Take the selector parameters, and generate all possible # candidate keys - e.g. {'subject' : '123', 'visit' : '1'} # will result in: # # ['subject', '123'] # ['visit', '1'] # ['subject', '123', 'visit', '1'] - kvps = [[str(k), str(v)] for k, v in overrides.items()] + kvps = [[str(k), str(v)] for k, v in selectors.items()] patterns = [it.combinations(kvps, i) for i in range(1, len(kvps) + 1)] patterns = it.chain(*patterns) patterns = [ft.reduce(operator.add, p) for p in patterns] for pat in patterns: - try: val = nested_lookup(settings, pat) except KeyError: @@ -155,7 +256,7 @@ class Config: if not isinstance(val, dict): continue - log.debug('Updating settings from override: %s', pat) + log.debug('Updating settings from selector: %s', pat) for k, v in val.items(): # If we have a configuration like: @@ -163,14 +264,54 @@ class Config: # subject.123.visit.1.param = 0.5 # visit.2.param = 0.5 # - # and we are processing ['subject', '123'], - # we do not want to override 'visit'. - if k not in overrides: + # and we are processing ['subject', '123'], we + # do not want to clobber the original 'visit'. + if k not in selectors: settings[k] = v + return settings + @staticmethod - def load_config_file(cfgfile, **overrides): + def apply_overrides(settings, overrides=None): + """Override some settings with override values. + + Any value read from the configuration files can be overridden. + + If an override value has an incompatible type with the value from the + configuration file, a ValueError is raised. + """ + + def types_match(old, new): + """Return True if old and new are of compatible types.""" + if isinstance(old, (float, int)): + return isinstance(new, (float, int)) + else: + return isinstance(new, type(old)) + + settings = copy.deepcopy(settings) + + for key, val in overrides.items(): + origval = settings.get(key, None) + + # Coerce strings to a toml type + if isinstance(val, str): + val = parse_override_value(key, origval, val) + + # Reject the new value if it has a + # different type to the original value + if origval is not None and not types_match(origval, val): + raise ValueError(f'{key}: override value has wrong type (got ' + f'{type(val)}, expected {type(origval)}') + + log.debug('Overriding %s (%s -> %s)', key, origval, val) + settings[key] = val + + return settings + + + @staticmethod + def load_config_file(cfgfile, selectors=None): """Load a BIP configuration file. The file is assumed to be a TOML file, named as described in the config_file_identifier documentation. @@ -190,36 +331,53 @@ class Config: file. """ + if selectors is None: + selectors = {} + with open(cfgfile, 'rb') as f: settings = tomllib.load(f) ident = Config.config_file_identifier(cfgfile) - overrides = {k : overrides[k] for k in sorted(overrides.keys())} + selectors = {k : selectors[k] for k in sorted(selectors.keys())} + settings = Config.resolve_selectors(settings, selectors) - Config.resolve_overrides(settings, **overrides) - - # TODO if main file, unpack dictionaries - e.g. we want - # to be able to have things like this in config.yaml: + # Any tables in the main config file are relabelled + # to "<tablename>_<setting>. For example, if the + # main config.toml contains: # - # some_global_param = 75 + # some_global_param = 75 # - # [T1_struct] - # bet_f = 0.2 # should be accessible via "T1_struct_bet_f" - - if ident == 'config' : ident = '' - else: ident = f'{ident}_' + # [T1_struct] + # bet_f = 0.2 + # + # bet_f is relabelled to T1_struct_bet_f + if ident == 'config': + settings = flatten_dictionary(settings, 1) - settings = {f'{ident}{k}' : v for k, v in settings.items()} + # All settings in secondary configuration files + # are relabelled to "<ident>_<setting>". For example, + # if T1_struct.toml contains: + # + # bet_f = 0.5 + # + # bet_f is relabelled to T1_struct_bet_f + else: + ident = f'{ident}_' + settings = {f'{ident}{k}' : v for k, v in settings.items()} return settings - def __init__(self, cfgdir=None, **overrides): - """ + def __init__(self, cfgdir=None, selectors=None, overrides=None): + """Create a Config object. Read configuration files from cfgdir. + + Selectors are applied using Config.resolve_selectors. + Override values are applied using Config.apply_overrides. """ - if cfgdir is None: - cfgdir = op.join(op.dirname(bip.__file__), 'data', 'config') + if selectors is None: selectors = {} + if overrides is None: overrides = {} + if cfgdir is None: cfgdir = DEFAULT_CONFIG_DIRECTORY cfgfiles = Config.list_config_files(cfgdir) @@ -230,7 +388,9 @@ class Config: for fname in cfgfiles: log.debug('Loading settings from %s', fname) - settings.update(Config.load_config_file(fname, **overrides)) + settings.update(Config.load_config_file(fname, selectors)) + + settings = Config.apply_overrides(settings, overrides) self.__settings = settings self.__cfgdir = cfgdir @@ -238,43 +398,64 @@ class Config: @property def cfgdir(self): - """ - """ + """Return the directory that the config files were loaded from. """ return self.__cfgdir - def __contains__(self, name): - return name in self.__settings + def __contains__(self, key): + """Return True if a configuration item with key exists. """ + return key in self.__settings - def __getitem__(self, name): - """Return the configuration item with the specified name. """ - return self.__settings[name] + def __getitem__(self, key): + """Return the configuration item with the specified key. """ + return self.__settings[key] - def __getattr__(self, name): - """Return the configuration item with the specified name. """ - return self[name] + def __getattr__(self, key): + """Return the configuration item with the specified key. """ + return self[kwey] - def get(self, name, default=None): - if name not in self: + def get(self, key, default=None): + """Return value associated with key. + Return default if there is no value associated with key. + """ + if key not in self: return default - return self[name] + return self[key] + + def getall(self, prefix=None, **kwargs): + """Return all requested values with given key prefix. + The specified default value is used for any keys which are not present. + The values are returned as a dictionary, with the keys un-prefixed. + """ + if prefix is None: prefix = '' + else: prefix = f'{prefix}_' - def getall(self, prefix, **kwargs): values = {} for k, v in kwargs.items(): - values[k] = self.get(k, v) + values[k] = self.get(f'{prefix}{k}', v) + return values + def gettuple(self, prefix=None, **kwargs): + """Same as getall, but values are returned as a tuple. """ + return tuple(self.getall(prefix, **kwargs)) + + def keys(self): + """Return all keys present in the config. """ return self.__settings.keys() + def values(self): + """Return all values present in the config. """ return self.__settings.values() + def items(self): + """Return all key-value pairs present in the config. """ return self.__settings.items() -- GitLab