From fed612143032d09d8770f559fd6cca47beabf7cd Mon Sep 17 00:00:00 2001
From: Paul McCarthy <pauldmccarthy@gmail.com>
Date: Thu, 9 Mar 2023 19:38:21 +0000
Subject: [PATCH] ENH,RF: - Renamed "overrides" to "selectors".         -
 Flatten dictionaries in main config file, e.g. "[struct_T1] k = v"          
 is flattened to struct_T1_k = v         - New gettuple method         - Added
 ability to override values e.g. from command-line.         - Basic type
 validation applied to overrides - error if their           parsed type does
 not match the stored type

---
 bip/utils/config.py | 291 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 236 insertions(+), 55 deletions(-)

diff --git a/bip/utils/config.py b/bip/utils/config.py
index edaf295..5e47e8e 100644
--- a/bip/utils/config.py
+++ b/bip/utils/config.py
@@ -1,6 +1,23 @@
 #!/usr/bin/env python
+"""This module provides the BIP Config class. The Config class is used
+throughout BIP for accessing settings stored in BIP configuration files.
 
 
+# Configuration file format
+
+TODO
+
+# Selectors - alternate setting values for specific scenarios
+
+TODO
+
+# Type validation
+
+TODO
+"""
+
+
+import              copy
 import              glob
 import functools as ft
 import itertools as it
@@ -24,6 +41,19 @@ import bip
 log = logging.getLogger(__name__)
 
 
+MAIN_CONFIG_FILE_NAME = 'config.toml'
+"""Name of the primary BIP configuration file. This file contains global
+settings, and may contain pipeline-specific settings which take precedence
+over equivalent settings in secondary configuration files.
+"""
+
+
+DEFAULT_CONFIG_DIRECTORY = op.join(op.dirname(bip.__file__), 'data', 'config')
+"""Default configuration directory. Used when a Config object is created with
+specifying a directory.
+"""
+
+
 def nested_lookup(d, key):
     """Look up a value in a nested dictionary based on the given key.
     For example, imagine we have a dictionary d:
@@ -46,7 +76,74 @@ def nested_lookup(d, key):
     return nested_lookup(d, key[1:])
 
 
+def flatten_dictionary(d, nlevels=1):
+    """Flattens a nested dictionary.
+
+    Given a dictionary such as:
+
+        {
+            'k1' : 'v1',
+            'k2' : {
+                'sk1' : 'sv1',
+                'sk2' : 'sv2'
+            }
+        }
+
+    this function will adjust the dictionary to have structure:
+        {
+            'k1'     : 'v1',
+            'k2_sk1' : 'sv1'
+            'k2_sk2' : 'sv2'
+        }
+
+    If nlevels is 1 (the default), the function will only flatten
+    sub-dictionaries that are one-level deep.
+    """
+
+    d = copy.deepcopy(d)
+
+    for key, val in list(d.items()):
+        if not isinstance(val, dict):
+            continue
+
+        if nlevels > 1:
+            val = flatten_dictionary(val, nlevels - 1)
+
+        for subkey, subval in val.items():
+            subkey    = f'{key}_{subkey}'
+            d[subkey] = subval
+
+        d.pop(key)
+
+    return d
+
+
+def parse_override_value(name, origval, val):
+    """Use tomllib to coerce a string to a suitable type."""
+
+    # If the configuration file value is a string,
+    # return the override value unchanged
+    if isinstance(origval, str):
+        return val
+
+    # Otherwise use tomllib to convert the value.
+    # Leave as a string on failures, but emit
+    # a warning.
+    try:
+        return tomllib.loads(f'val = {val}')['val']
+    except Exception:
+        log.warning('Cannot parse override value for %s (%s) - '
+                    'leaving value as a string.', name, val)
+        return val
+
+
 class Config:
+    """The Config class is a dictionary containing BIP settings. A Config
+    can be created by specifying the directory which contains all BIP
+    configuration files, e.g.:
+
+        cfg = Config('path/to/config/dir/')
+    """
 
 
     @staticmethod
@@ -67,7 +164,10 @@ class Config:
         BIP settings may be grouped according to the file identifier - the file
         prefix is used solely for enforcing a particular ordering.
         """
-        base = op.basename(fname).removesuffix('.toml')
+        base = op.basename(fname)
+        if base == MAIN_CONFIG_FILE_NAME:
+            return 'config'
+        base = base.removesuffix('.toml')
         # Drop file prefix if present
         return base.split('.')[-1]
 
@@ -80,17 +180,17 @@ class Config:
 
         A BIP configuration is a directory containing one or more .toml files.
 
-        BIP configuration files are intended to be loaded in reverse-
-        alphabetical order, with settings in later files overwriting settings
-        in earlier files.
+        BIP configuration files are intended to be loaded in alphabetical
+        order, with settings in later files overwriting settings in earlier
+        files.
 
         The "config.toml" file must be loaded last, so that settings
         contained within it take precedence over all other files.
         """
 
         cfgfiles = glob.glob(op.join(cfgdir, '*.toml'))
-        cfgfiles = sorted(cfgfiles, key=str.lower, reverse=True)
-        maincfg  = op.join(cfgdir, 'config.toml')
+        cfgfiles = sorted(cfgfiles, key=str.lower)
+        maincfg  = op.join(cfgdir, MAIN_CONFIG_FILE_NAME)
 
         # make sure main config is loaded last
         if maincfg in cfgfiles:
@@ -101,28 +201,28 @@ class Config:
 
 
     @staticmethod
-    def resolve_overrides(settings, **overrides):
-        """Resolves and applies any "override" configuration settings.
+    def resolve_selectors(settings, selectors):
+        """Resolves and applies any "selector" configuration settings.
 
         A BIP configuration file may contain one default value for a
         setting, but may also contain alternate values for that setting
         which should be used in certain scenarios. Each alternate value
-        is associated with a set of "override" parameters, which are
+        is associated with a set of "selector" parameters, which are
         just key-value pairs.
 
         For example, a file may contain a default value for a setting called
         "param1", and an alternate value for param1 to be used when the
-        "subject" override parameter is set to "12345":
+        "subject" selector parameter is set to "12345":
 
             param1               = 0.5
             subject.12345.param1 = 0.4
 
-        If resolve_overrides is given these settings along with subject=12345,
-        the default value for param1 will be replaced with the override value.
+        If resolve_selectors is given these settings along with subject=12345,
+        the default value for param1 will be replaced with the selector value.
 
-        Multiple override parameters may be specified - in the configuration
-        file, the override parameters must be ordered alphabetically. For
-        example, if we have override parameters "subject" and "visit", the
+        Multiple selector parameters may be specified - in the configuration
+        file, the selector parameters must be ordered alphabetically. For
+        example, if we have selector parameters "subject" and "visit", the
         alternate values for subject=123 and visit=2 must be specified as:
 
             param1                     = 0.5
@@ -134,20 +234,21 @@ class Config:
             visit.2.subject.123.param1 = 0.3
         """
 
-        # Take the override parameters, and generate all possible
+        settings = copy.deepcopy(settings)
+
+        # Take the selector parameters, and generate all possible
         # candidate keys - e.g. {'subject' : '123', 'visit' : '1'}
         # will result in:
         #
         #   ['subject', '123']
         #   ['visit',   '1']
         #   ['subject', '123', 'visit', '1']
-        kvps     = [[str(k), str(v)] for k, v in overrides.items()]
+        kvps     = [[str(k), str(v)] for k, v in selectors.items()]
         patterns = [it.combinations(kvps, i) for i in range(1, len(kvps) + 1)]
         patterns = it.chain(*patterns)
         patterns = [ft.reduce(operator.add, p) for p in patterns]
 
         for pat in patterns:
-
             try:
                 val = nested_lookup(settings, pat)
             except KeyError:
@@ -155,7 +256,7 @@ class Config:
             if not isinstance(val, dict):
                 continue
 
-            log.debug('Updating settings from override: %s', pat)
+            log.debug('Updating settings from selector: %s', pat)
             for k, v in val.items():
 
                 # If we have a configuration like:
@@ -163,14 +264,54 @@ class Config:
                 #   subject.123.visit.1.param = 0.5
                 #   visit.2.param             = 0.5
                 #
-                # and we are processing ['subject', '123'],
-                # we do not want to override 'visit'.
-                if k not in overrides:
+                # and we are processing ['subject', '123'], we
+                # do not want to clobber the original 'visit'.
+                if k not in selectors:
                     settings[k] = v
 
+        return settings
+
 
     @staticmethod
-    def load_config_file(cfgfile, **overrides):
+    def apply_overrides(settings, overrides=None):
+        """Override some settings with override values.
+
+        Any value read from the configuration files can be overridden.
+
+        If an override value has an incompatible type with the value from the
+        configuration file, a ValueError is raised.
+        """
+
+        def types_match(old, new):
+            """Return True if old and new are of compatible types."""
+            if isinstance(old, (float, int)):
+                return isinstance(new, (float, int))
+            else:
+                return isinstance(new, type(old))
+
+        settings = copy.deepcopy(settings)
+
+        for key, val in overrides.items():
+            origval = settings.get(key, None)
+
+            # Coerce strings to a toml type
+            if isinstance(val, str):
+                val = parse_override_value(key, origval, val)
+
+            # Reject the new value if it has a
+            # different type to the original value
+            if origval is not None and not types_match(origval, val):
+                raise ValueError(f'{key}: override value has wrong type (got '
+                                 f'{type(val)}, expected {type(origval)}')
+
+            log.debug('Overriding %s (%s -> %s)', key, origval, val)
+            settings[key] = val
+
+        return settings
+
+
+    @staticmethod
+    def load_config_file(cfgfile, selectors=None):
         """Load a BIP configuration file. The file is assumed to be a TOML
         file, named as described in the config_file_identifier documentation.
 
@@ -190,36 +331,53 @@ class Config:
         file.
         """
 
+        if selectors is None:
+            selectors = {}
+
         with open(cfgfile, 'rb') as f:
             settings = tomllib.load(f)
 
         ident     = Config.config_file_identifier(cfgfile)
-        overrides = {k : overrides[k] for k in sorted(overrides.keys())}
+        selectors = {k : selectors[k] for k in sorted(selectors.keys())}
+        settings  = Config.resolve_selectors(settings, selectors)
 
-        Config.resolve_overrides(settings, **overrides)
-
-        # TODO if main file, unpack dictionaries - e.g. we want
-        # to be able to have things like this in config.yaml:
+        # Any tables in the main config file are relabelled
+        # to "<tablename>_<setting>.  For example, if the
+        # main config.toml contains:
         #
-        # some_global_param = 75
+        #     some_global_param = 75
         #
-        # [T1_struct]
-        # bet_f = 0.2  # should be accessible via "T1_struct_bet_f"
-
-        if ident == 'config' : ident = ''
-        else:                  ident = f'{ident}_'
+        #     [T1_struct]
+        #     bet_f = 0.2
+        #
+        # bet_f is relabelled to T1_struct_bet_f
+        if ident == 'config':
+            settings = flatten_dictionary(settings, 1)
 
-        settings = {f'{ident}{k}' : v for k, v in settings.items()}
+        # All settings in secondary configuration files
+        # are relabelled to "<ident>_<setting>". For example,
+        # if T1_struct.toml contains:
+        #
+        #     bet_f = 0.5
+        #
+        # bet_f is relabelled to T1_struct_bet_f
+        else:
+            ident    = f'{ident}_'
+            settings = {f'{ident}{k}' : v for k, v in settings.items()}
 
         return settings
 
 
-    def __init__(self, cfgdir=None, **overrides):
-        """
+    def __init__(self, cfgdir=None, selectors=None, overrides=None):
+        """Create a Config object. Read configuration files from cfgdir.
+
+        Selectors are applied using Config.resolve_selectors.
+        Override values are applied using Config.apply_overrides.
         """
 
-        if cfgdir is None:
-            cfgdir = op.join(op.dirname(bip.__file__), 'data', 'config')
+        if selectors is None: selectors = {}
+        if overrides is None: overrides = {}
+        if cfgdir    is None: cfgdir    = DEFAULT_CONFIG_DIRECTORY
 
         cfgfiles = Config.list_config_files(cfgdir)
 
@@ -230,7 +388,9 @@ class Config:
 
         for fname in cfgfiles:
             log.debug('Loading settings from %s', fname)
-            settings.update(Config.load_config_file(fname, **overrides))
+            settings.update(Config.load_config_file(fname, selectors))
+
+        settings = Config.apply_overrides(settings, overrides)
 
         self.__settings = settings
         self.__cfgdir   = cfgdir
@@ -238,43 +398,64 @@ class Config:
 
     @property
     def cfgdir(self):
-        """
-        """
+        """Return the directory that the config files were loaded from. """
         return self.__cfgdir
 
 
-    def __contains__(self, name):
-        return name in self.__settings
+    def __contains__(self, key):
+        """Return True if a configuration item with key exists. """
+        return key in self.__settings
 
 
-    def __getitem__(self, name):
-        """Return the configuration item with the specified name. """
-        return self.__settings[name]
+    def __getitem__(self, key):
+        """Return the configuration item with the specified key. """
+        return self.__settings[key]
 
 
-    def __getattr__(self, name):
-        """Return the configuration item with the specified name. """
-        return self[name]
+    def __getattr__(self, key):
+        """Return the configuration item with the specified key. """
+        return self[kwey]
 
 
-    def get(self, name, default=None):
-        if name not in self:
+    def get(self, key, default=None):
+        """Return value associated with key.
+        Return default if there is no value associated with key.
+        """
+        if key not in self:
             return default
-        return self[name]
+        return self[key]
+
 
+    def getall(self, prefix=None, **kwargs):
+        """Return all requested values with given key prefix.
+        The specified default value is used for any keys which are not present.
+        The values are returned as a dictionary, with the keys un-prefixed.
+        """
+        if prefix is None: prefix = ''
+        else:              prefix = f'{prefix}_'
 
-    def getall(self, prefix, **kwargs):
         values = {}
         for k, v in kwargs.items():
-            values[k] = self.get(k, v)
+            values[k] = self.get(f'{prefix}{k}', v)
+
         return values
 
 
+    def gettuple(self, prefix=None, **kwargs):
+        """Same as getall, but values are returned as a tuple. """
+        return tuple(self.getall(prefix, **kwargs))
+
+
     def keys(self):
+        """Return all keys present in the config. """
         return self.__settings.keys()
 
+
     def values(self):
+        """Return all values present in the config. """
         return self.__settings.values()
 
+
     def items(self):
+        """Return all key-value pairs present in the config. """
         return self.__settings.items()
-- 
GitLab