Skip to content
Snippets Groups Projects
Commit fed61214 authored by Paul McCarthy's avatar Paul McCarthy :mountain_bicyclist:
Browse files

ENH,RF: - Renamed "overrides" to "selectors".

        - Flatten dictionaries in main config file, e.g. "[struct_T1] k = v"
          is flattened to struct_T1_k = v
        - New gettuple method
        - Added ability to override values e.g. from command-line.
        - Basic type validation applied to overrides - error if their
          parsed type does not match the stored type
parent 9dc06559
No related branches found
No related tags found
1 merge request!3BIP configuration system
#!/usr/bin/env python #!/usr/bin/env python
"""This module provides the BIP Config class. The Config class is used
throughout BIP for accessing settings stored in BIP configuration files.
# Configuration file format
TODO
# Selectors - alternate setting values for specific scenarios
TODO
# Type validation
TODO
"""
import copy
import glob import glob
import functools as ft import functools as ft
import itertools as it import itertools as it
...@@ -24,6 +41,19 @@ import bip ...@@ -24,6 +41,19 @@ import bip
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
MAIN_CONFIG_FILE_NAME = 'config.toml'
"""Name of the primary BIP configuration file. This file contains global
settings, and may contain pipeline-specific settings which take precedence
over equivalent settings in secondary configuration files.
"""
DEFAULT_CONFIG_DIRECTORY = op.join(op.dirname(bip.__file__), 'data', 'config')
"""Default configuration directory. Used when a Config object is created with
specifying a directory.
"""
def nested_lookup(d, key): def nested_lookup(d, key):
"""Look up a value in a nested dictionary based on the given key. """Look up a value in a nested dictionary based on the given key.
For example, imagine we have a dictionary d: For example, imagine we have a dictionary d:
...@@ -46,7 +76,74 @@ def nested_lookup(d, key): ...@@ -46,7 +76,74 @@ def nested_lookup(d, key):
return nested_lookup(d, key[1:]) return nested_lookup(d, key[1:])
def flatten_dictionary(d, nlevels=1):
"""Flattens a nested dictionary.
Given a dictionary such as:
{
'k1' : 'v1',
'k2' : {
'sk1' : 'sv1',
'sk2' : 'sv2'
}
}
this function will adjust the dictionary to have structure:
{
'k1' : 'v1',
'k2_sk1' : 'sv1'
'k2_sk2' : 'sv2'
}
If nlevels is 1 (the default), the function will only flatten
sub-dictionaries that are one-level deep.
"""
d = copy.deepcopy(d)
for key, val in list(d.items()):
if not isinstance(val, dict):
continue
if nlevels > 1:
val = flatten_dictionary(val, nlevels - 1)
for subkey, subval in val.items():
subkey = f'{key}_{subkey}'
d[subkey] = subval
d.pop(key)
return d
def parse_override_value(name, origval, val):
"""Use tomllib to coerce a string to a suitable type."""
# If the configuration file value is a string,
# return the override value unchanged
if isinstance(origval, str):
return val
# Otherwise use tomllib to convert the value.
# Leave as a string on failures, but emit
# a warning.
try:
return tomllib.loads(f'val = {val}')['val']
except Exception:
log.warning('Cannot parse override value for %s (%s) - '
'leaving value as a string.', name, val)
return val
class Config: class Config:
"""The Config class is a dictionary containing BIP settings. A Config
can be created by specifying the directory which contains all BIP
configuration files, e.g.:
cfg = Config('path/to/config/dir/')
"""
@staticmethod @staticmethod
...@@ -67,7 +164,10 @@ class Config: ...@@ -67,7 +164,10 @@ class Config:
BIP settings may be grouped according to the file identifier - the file BIP settings may be grouped according to the file identifier - the file
prefix is used solely for enforcing a particular ordering. prefix is used solely for enforcing a particular ordering.
""" """
base = op.basename(fname).removesuffix('.toml') base = op.basename(fname)
if base == MAIN_CONFIG_FILE_NAME:
return 'config'
base = base.removesuffix('.toml')
# Drop file prefix if present # Drop file prefix if present
return base.split('.')[-1] return base.split('.')[-1]
...@@ -80,17 +180,17 @@ class Config: ...@@ -80,17 +180,17 @@ class Config:
A BIP configuration is a directory containing one or more .toml files. A BIP configuration is a directory containing one or more .toml files.
BIP configuration files are intended to be loaded in reverse- BIP configuration files are intended to be loaded in alphabetical
alphabetical order, with settings in later files overwriting settings order, with settings in later files overwriting settings in earlier
in earlier files. files.
The "config.toml" file must be loaded last, so that settings The "config.toml" file must be loaded last, so that settings
contained within it take precedence over all other files. contained within it take precedence over all other files.
""" """
cfgfiles = glob.glob(op.join(cfgdir, '*.toml')) cfgfiles = glob.glob(op.join(cfgdir, '*.toml'))
cfgfiles = sorted(cfgfiles, key=str.lower, reverse=True) cfgfiles = sorted(cfgfiles, key=str.lower)
maincfg = op.join(cfgdir, 'config.toml') maincfg = op.join(cfgdir, MAIN_CONFIG_FILE_NAME)
# make sure main config is loaded last # make sure main config is loaded last
if maincfg in cfgfiles: if maincfg in cfgfiles:
...@@ -101,28 +201,28 @@ class Config: ...@@ -101,28 +201,28 @@ class Config:
@staticmethod @staticmethod
def resolve_overrides(settings, **overrides): def resolve_selectors(settings, selectors):
"""Resolves and applies any "override" configuration settings. """Resolves and applies any "selector" configuration settings.
A BIP configuration file may contain one default value for a A BIP configuration file may contain one default value for a
setting, but may also contain alternate values for that setting setting, but may also contain alternate values for that setting
which should be used in certain scenarios. Each alternate value which should be used in certain scenarios. Each alternate value
is associated with a set of "override" parameters, which are is associated with a set of "selector" parameters, which are
just key-value pairs. just key-value pairs.
For example, a file may contain a default value for a setting called For example, a file may contain a default value for a setting called
"param1", and an alternate value for param1 to be used when the "param1", and an alternate value for param1 to be used when the
"subject" override parameter is set to "12345": "subject" selector parameter is set to "12345":
param1 = 0.5 param1 = 0.5
subject.12345.param1 = 0.4 subject.12345.param1 = 0.4
If resolve_overrides is given these settings along with subject=12345, If resolve_selectors is given these settings along with subject=12345,
the default value for param1 will be replaced with the override value. the default value for param1 will be replaced with the selector value.
Multiple override parameters may be specified - in the configuration Multiple selector parameters may be specified - in the configuration
file, the override parameters must be ordered alphabetically. For file, the selector parameters must be ordered alphabetically. For
example, if we have override parameters "subject" and "visit", the example, if we have selector parameters "subject" and "visit", the
alternate values for subject=123 and visit=2 must be specified as: alternate values for subject=123 and visit=2 must be specified as:
param1 = 0.5 param1 = 0.5
...@@ -134,20 +234,21 @@ class Config: ...@@ -134,20 +234,21 @@ class Config:
visit.2.subject.123.param1 = 0.3 visit.2.subject.123.param1 = 0.3
""" """
# Take the override parameters, and generate all possible settings = copy.deepcopy(settings)
# Take the selector parameters, and generate all possible
# candidate keys - e.g. {'subject' : '123', 'visit' : '1'} # candidate keys - e.g. {'subject' : '123', 'visit' : '1'}
# will result in: # will result in:
# #
# ['subject', '123'] # ['subject', '123']
# ['visit', '1'] # ['visit', '1']
# ['subject', '123', 'visit', '1'] # ['subject', '123', 'visit', '1']
kvps = [[str(k), str(v)] for k, v in overrides.items()] kvps = [[str(k), str(v)] for k, v in selectors.items()]
patterns = [it.combinations(kvps, i) for i in range(1, len(kvps) + 1)] patterns = [it.combinations(kvps, i) for i in range(1, len(kvps) + 1)]
patterns = it.chain(*patterns) patterns = it.chain(*patterns)
patterns = [ft.reduce(operator.add, p) for p in patterns] patterns = [ft.reduce(operator.add, p) for p in patterns]
for pat in patterns: for pat in patterns:
try: try:
val = nested_lookup(settings, pat) val = nested_lookup(settings, pat)
except KeyError: except KeyError:
...@@ -155,7 +256,7 @@ class Config: ...@@ -155,7 +256,7 @@ class Config:
if not isinstance(val, dict): if not isinstance(val, dict):
continue continue
log.debug('Updating settings from override: %s', pat) log.debug('Updating settings from selector: %s', pat)
for k, v in val.items(): for k, v in val.items():
# If we have a configuration like: # If we have a configuration like:
...@@ -163,14 +264,54 @@ class Config: ...@@ -163,14 +264,54 @@ class Config:
# subject.123.visit.1.param = 0.5 # subject.123.visit.1.param = 0.5
# visit.2.param = 0.5 # visit.2.param = 0.5
# #
# and we are processing ['subject', '123'], # and we are processing ['subject', '123'], we
# we do not want to override 'visit'. # do not want to clobber the original 'visit'.
if k not in overrides: if k not in selectors:
settings[k] = v settings[k] = v
return settings
@staticmethod @staticmethod
def load_config_file(cfgfile, **overrides): def apply_overrides(settings, overrides=None):
"""Override some settings with override values.
Any value read from the configuration files can be overridden.
If an override value has an incompatible type with the value from the
configuration file, a ValueError is raised.
"""
def types_match(old, new):
"""Return True if old and new are of compatible types."""
if isinstance(old, (float, int)):
return isinstance(new, (float, int))
else:
return isinstance(new, type(old))
settings = copy.deepcopy(settings)
for key, val in overrides.items():
origval = settings.get(key, None)
# Coerce strings to a toml type
if isinstance(val, str):
val = parse_override_value(key, origval, val)
# Reject the new value if it has a
# different type to the original value
if origval is not None and not types_match(origval, val):
raise ValueError(f'{key}: override value has wrong type (got '
f'{type(val)}, expected {type(origval)}')
log.debug('Overriding %s (%s -> %s)', key, origval, val)
settings[key] = val
return settings
@staticmethod
def load_config_file(cfgfile, selectors=None):
"""Load a BIP configuration file. The file is assumed to be a TOML """Load a BIP configuration file. The file is assumed to be a TOML
file, named as described in the config_file_identifier documentation. file, named as described in the config_file_identifier documentation.
...@@ -190,36 +331,53 @@ class Config: ...@@ -190,36 +331,53 @@ class Config:
file. file.
""" """
if selectors is None:
selectors = {}
with open(cfgfile, 'rb') as f: with open(cfgfile, 'rb') as f:
settings = tomllib.load(f) settings = tomllib.load(f)
ident = Config.config_file_identifier(cfgfile) ident = Config.config_file_identifier(cfgfile)
overrides = {k : overrides[k] for k in sorted(overrides.keys())} selectors = {k : selectors[k] for k in sorted(selectors.keys())}
settings = Config.resolve_selectors(settings, selectors)
Config.resolve_overrides(settings, **overrides) # Any tables in the main config file are relabelled
# to "<tablename>_<setting>. For example, if the
# TODO if main file, unpack dictionaries - e.g. we want # main config.toml contains:
# to be able to have things like this in config.yaml:
# #
# some_global_param = 75 # some_global_param = 75
# #
# [T1_struct] # [T1_struct]
# bet_f = 0.2 # should be accessible via "T1_struct_bet_f" # bet_f = 0.2
#
if ident == 'config' : ident = '' # bet_f is relabelled to T1_struct_bet_f
else: ident = f'{ident}_' if ident == 'config':
settings = flatten_dictionary(settings, 1)
settings = {f'{ident}{k}' : v for k, v in settings.items()} # All settings in secondary configuration files
# are relabelled to "<ident>_<setting>". For example,
# if T1_struct.toml contains:
#
# bet_f = 0.5
#
# bet_f is relabelled to T1_struct_bet_f
else:
ident = f'{ident}_'
settings = {f'{ident}{k}' : v for k, v in settings.items()}
return settings return settings
def __init__(self, cfgdir=None, **overrides): def __init__(self, cfgdir=None, selectors=None, overrides=None):
""" """Create a Config object. Read configuration files from cfgdir.
Selectors are applied using Config.resolve_selectors.
Override values are applied using Config.apply_overrides.
""" """
if cfgdir is None: if selectors is None: selectors = {}
cfgdir = op.join(op.dirname(bip.__file__), 'data', 'config') if overrides is None: overrides = {}
if cfgdir is None: cfgdir = DEFAULT_CONFIG_DIRECTORY
cfgfiles = Config.list_config_files(cfgdir) cfgfiles = Config.list_config_files(cfgdir)
...@@ -230,7 +388,9 @@ class Config: ...@@ -230,7 +388,9 @@ class Config:
for fname in cfgfiles: for fname in cfgfiles:
log.debug('Loading settings from %s', fname) log.debug('Loading settings from %s', fname)
settings.update(Config.load_config_file(fname, **overrides)) settings.update(Config.load_config_file(fname, selectors))
settings = Config.apply_overrides(settings, overrides)
self.__settings = settings self.__settings = settings
self.__cfgdir = cfgdir self.__cfgdir = cfgdir
...@@ -238,43 +398,64 @@ class Config: ...@@ -238,43 +398,64 @@ class Config:
@property @property
def cfgdir(self): def cfgdir(self):
""" """Return the directory that the config files were loaded from. """
"""
return self.__cfgdir return self.__cfgdir
def __contains__(self, name): def __contains__(self, key):
return name in self.__settings """Return True if a configuration item with key exists. """
return key in self.__settings
def __getitem__(self, name): def __getitem__(self, key):
"""Return the configuration item with the specified name. """ """Return the configuration item with the specified key. """
return self.__settings[name] return self.__settings[key]
def __getattr__(self, name): def __getattr__(self, key):
"""Return the configuration item with the specified name. """ """Return the configuration item with the specified key. """
return self[name] return self[kwey]
def get(self, name, default=None): def get(self, key, default=None):
if name not in self: """Return value associated with key.
Return default if there is no value associated with key.
"""
if key not in self:
return default return default
return self[name] return self[key]
def getall(self, prefix=None, **kwargs):
"""Return all requested values with given key prefix.
The specified default value is used for any keys which are not present.
The values are returned as a dictionary, with the keys un-prefixed.
"""
if prefix is None: prefix = ''
else: prefix = f'{prefix}_'
def getall(self, prefix, **kwargs):
values = {} values = {}
for k, v in kwargs.items(): for k, v in kwargs.items():
values[k] = self.get(k, v) values[k] = self.get(f'{prefix}{k}', v)
return values return values
def gettuple(self, prefix=None, **kwargs):
"""Same as getall, but values are returned as a tuple. """
return tuple(self.getall(prefix, **kwargs))
def keys(self): def keys(self):
"""Return all keys present in the config. """
return self.__settings.keys() return self.__settings.keys()
def values(self): def values(self):
"""Return all values present in the config. """
return self.__settings.values() return self.__settings.values()
def items(self): def items(self):
"""Return all key-value pairs present in the config. """
return self.__settings.items() return self.__settings.items()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment