Commit a46df4a2 authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

ENH: New "metaproc" plugin type, for manipulating column metadata. Used to

generate descriptions for binarised ICD10 columns
parent 643e9195
......@@ -12,16 +12,18 @@ and for cleaning and processing.
The following plugin types currently exist:
+-------------------+------------------------------------------------------+
| Plugin type | |
+-------------------+------------------------------------------------------|
| ``sniffer`` | Return information about the columns in a file |
| ``loader`` | Load data from a file |
| ``cleaner`` | Run a cleaning function on a single column |
| ``processor`` | Run a processing fnction on one or more data columns |
| ``formatter`` | Format a column for output |
| ``exporter`` | Export the processed data set |
+-------------------+------------------------------------------------------+
+-------------------+-------------------------------------------------------+
| Plugin type | |
+-------------------+-------------------------------------------------------|
| ``sniffer`` | Return information about the columns in a file |
| ``loader`` | Load data from a file |
| ``cleaner`` | Run a cleaning function on a single column |
| ``processor`` | Run a processing function on one or more data columns |
| ``metaproc`` | Run a function on a :class:`.Column` ``metadata`` |
| | value |
| ``formatter`` | Format a column for output |
| ``exporter`` | Export the processed data set |
+-------------------+-------------------------------------------------------+
To ensure that the ``ukbparse`` command line help is nicely formatted, all
......@@ -66,6 +68,7 @@ PLUGIN_TYPES = ['loader',
'formatter',
'cleaner',
'processor',
'metaproc',
'exporter']
......
......@@ -36,8 +36,10 @@ import collections
import pyparsing as pp
import ukbparse.util as util
import ukbparse.custom as custom
from . import util
from . import icd10
from . import custom
from . import hierarchy
log = logging.getLogger(__name__)
......@@ -202,10 +204,11 @@ class Process(object):
# cleaner functions are not
# defined in processing_functions,
# so in this case func will be None.
self.__ptype = ptype
self.__name = name
self.__args = args
self.__kwargs = kwargs
self.__ptype = ptype
self.__name = name
self.__args = args
self.__kwargs = kwargs
self.__metaproc = kwargs.pop('metaproc', None)
def __repr__(self):
......@@ -242,11 +245,27 @@ class Process(object):
"""Run the process on the data, passing it the given arguments,
and any arguments that were passed to :meth:`__init__`.
"""
return custom.run(self.__ptype,
self.__name,
*args,
*self.__args,
**self.__kwargs)
result = custom.run(self.__ptype,
self.__name,
*args,
*self.__args,
**self.__kwargs)
if self.__metaproc is not None and \
isinstance(result, tuple) and \
len(result) == 4:
meta = result[3]
mproc = self.__metaproc
try:
meta = [custom.runMetaproc(mproc, m) for m in meta]
except Exception as e:
log.warning('Metadata processing function failed: %s', e)
result = tuple(list(result[:3]) + [meta])
return result
def parseProcesses(procs, ptype):
......@@ -333,3 +352,23 @@ def makeParser():
function = funcName + pp.Optional(allargs)
return function
@custom.metaproc('icd10.numdesc')
def icd10DescriptionFromNumeric(val):
"""Generates a description for a numeric ICD10 code. """
val = icd10.numericToCode(val)
hier = hierarchy.getHierarchyFilePath(name='icd10')
hier = hierarchy.loadHierarchyFile(hier)
desc = hier.description(val)
return '{} - {}'.format(val, desc)
@custom.metaproc('icd10.codedesc')
def icd10DescriptionFromCode(val):
"""Generates a description for an ICD10 code. """
val = icd10.codeToNumeric(val)
hier = hierarchy.getHierarchyFilePath(name='icd10')
hier = hierarchy.loadHierarchyFile(hier)
desc = hier.description(val)
return '{} - {}'.format(val, desc)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment