New job dependency logic causees an error when passing empty sequence for jobhold
Hi @duncan (also pinging @ndcn0236, as fsl-pipe
is affected by this),
The recent updates to fsl_sub_plugin_slurm have caused a minor issue - with version 1.5.2, it is possible to call fsl_sub.sumbit
with an empty sequence, e.g.:
from fsl_sub import submit
submit('some_command', name='JOB_NAME', jobhold=())
However, with fsl_sub_plugin_slurm
this raises an error (click for full output with fsl_sub
debugging enabled):
DEBUG:fsl_sub:Submit called with:
DEBUG:fsl_sub:ls SOME_NAME 1 None () False None None None None None None None True None True None mza328@rescomp1.hpc.in.bmrc.ox.ac.uk N
one None None None False 1 False None True False None None
DEBUG:fsl_sub.config:Validating config {'method': 'slurm', 'modulecmd': '/usr/bin/modulecmd', 'export_vars': ['SUBJECTS_DIR'], 'thread_c
ontrol': ['OMP_NUM_THREADS', 'MKL_NUM_THREADS', 'MKL_DOMAIN_NUM_THREADS', 'OPENBLAS_NUM_THREADS', 'GOTO_NUM_THREADS', 'FSLSUB_PARALLEL']
, 'silence_warnings': ['cuda'], 'method_opts': {'shell': {'queues': False, 'mail_support': False, 'has_parallel_envs': False, 'map_ram':
False, 'job_priorities': False, 'array_holds': False, 'architecture': False, 'job_resources': False, 'script_conf': False, 'projects':
False, 'run_parallel': True, 'parallel_disable_matches': ['*_gpu'], 'log_to_file': True}, 'slurm': {'memory_in_gb': False, 'queues': Tru
e, 'copy_environment': False, 'has_parallel_envs': False, 'script_conf': True, 'mail_support': False, 'mail_modes': {'b': ['BEGIN'], 'e'
: ['END'], 'a': ['FAIL', 'REQUEUE'], 'f': ['ALL'], 'n': ['NONE']}, 'mail_mode': 'n', 'notify_ram_usage': True, 'set_time_limit': False,
'array_holds': True, 'array_limit': True, 'projects': True, 'keep_jobscript': False, 'preserve_modules': True, 'add_module_paths': ['/we
ll/win/software/modules'], 'strict_dependencies': False}}, 'queues': {'gpu_short': {'time': 14400, 'slot_size': 60.8, 'map_ram': False,
'max_size': 1500, 'max_slots': 4, 'copros': {'cuda_all': {'max_quantity': 4, 'classes': ['R', 'P', 'V', 'V-16', 'V-32', 'A', 'A-40', 'A-
80']}, 'cuda_ml': {'max_quantity': 4, 'classes': ['R', 'V', 'V-16', 'V-32', 'A', 'A-40', 'A-80']}, 'cuda': {'max_quantity': 4, 'classes'
: ['P', 'V', 'V-16', 'V-32', 'A', 'A-40', 'A-80']}}}, 'gpu_long': {'time': 216000, 'slot_size': 60.8, 'map_ram': False, 'max_size': 1500
, 'max_slots': 4, 'copros': {'cuda_all': {'max_quantity': 4, 'classes': ['R', 'P', 'V', 'V-16', 'V-32', 'A', 'A-40', 'A-80']}, 'cuda_ml'
: {'max_quantity': 4, 'classes': ['R', 'V', 'V-16', 'V-32', 'A', 'A-40', 'A-80']}, 'cuda': {'max_quantity': 4, 'classes': ['P', 'V', 'V-
16', 'V-32', 'A', 'A-40', 'A-80']}}}, 'long': {'time': 14400, 'max_slots': 24, 'max_size': 385, 'ram_specifier': 'mem-per-cpu', 'map_ram
': False, 'slot_size': None}, 'short': {'default': True, 'time': 1800, 'max_slots': 24, 'max_size': 385, 'map_ram': False, 'slot_size':
None, 'group': 0, 'priority': 1}, 'epyc': {'time': 14400, 'max_slots': 32, 'max_size': 515, 'map_ram': False, 'slot_size': None}, 'win':
{'time': 14400, 'max_slots': 56, 'max_size': 1546, 'map_ram': False, 'slot_size': None}}, 'coproc_opts': {'cuda': {'presence_test': 'nv
idia-smi', 'uses_modules': True, 'module_parent': 'cuda', 'class_constraint': True, 'resource': 'gpu', 'classes': True, 'include_more_ca
pable': True, 'class_types': {'P': {'resource': 'p100', 'doc': 'Pascal. ECC, double, single, and half precision workloads - 16GB RAM - 6
CPUs', 'capability': 2}, 'V': {'resource': 'v100', 'doc': 'Volta. ECC, double, single, half and quarter precision workloads - 16 or 32G
B RAM - 6 CPUs', 'capability': 3}, 'V-16': {'resource': 'v100-sxm2-16gb', 'doc': 'Volta. ECC, double, single, half and quarter precision
workloads - 16GB RAM - 12 CPUs', 'capability': 4}, 'V-32': {'resource': 'v100-pcie-32gb', 'doc': 'Volta. ECC, double, single, half and
quarter precision workloads - 32GB RAM - 6 CPUs', 'capability': 5}, 'A': {'resource': 'a100', 'doc': 'Ampere. ECC, double, single, half
precision FP and Tensor - single, half, quarter and eigth-precision workloads - 40 or 80GB RAM - 8 CPUs', 'capability': 6}, 'A-40': {'re
source': 'a100-pcie-40gb', 'doc': 'Ampere. ECC, double, single, half precision FP and Tensor - single, half, quarter and eigth-precision
workloads - 40GB RAM - 8 CPUs', 'capability': 7}, 'A-80': {'resource': 'a100-pcie-40gb', 'doc': 'Ampere. ECC, double, single, half prec
ision FP and Tensor - single, half, quarter and eigth-precision workloads - 80GB RAM - 8 CPUs', 'capability': 8}}, 'default_class': 'P'}
, 'cuda_ml': {'presence_test': 'nvidia-smi', 'uses_modules': True, 'module_parent': 'cuda', 'class_constraint': True, 'resource': 'gpu',
'classes': True, 'include_more_capable': True, 'class_types': {'R': {'resource': 'rtx8000', 'doc': 'RTX8000. ECC, single-precision or t
ensor workloads - 48GB RAM - 8 CPUs', 'capability': 1}, 'V': {'resource': 'v100', 'doc': 'Volta. ECC, double, single, half and quarter p
recision workloads - 16 or 32GB RAM - 6 CPUs', 'capability': 2}, 'V-16': {'resource': 'v100-sxm2-16gb', 'doc': 'Volta. ECC, double, sing
le, half and quarter precision workloads - 16GB RAM - 12 CPUs', 'capability': 3}, 'V-32': {'resource': 'v100-pcie-32gb', 'doc': 'Volta.
ECC, double, single, half and quarter precision workloads - 32GB RAM - 6 CPUs', 'capability': 4}, 'A': {'resource': 'a100', 'doc': 'Ampe
re. ECC, double, single, half precision FP and Tensor - single, half, quarter and eigth-precision workloads - 40 or 80GB RAM - 8 CPUs',
'capability': 5}, 'A-40': {'resource': 'a100-pcie-40gb', 'doc': 'Ampere. ECC, double, single, half precision FP and Tensor - single, hal
f, quarter and eigth-precision workloads - 40GB RAM - 8 CPUs', 'capability': 6}, 'A-80': {'resource': 'a100-pcie-40gb', 'doc': 'Ampere.
ECC, double, single, half precision FP and Tensor - single, half, quarter and eigth-precision workloads - 80GB RAM - 8 CPUs', 'capabilit
y': 7}}, 'default_class': 'R'}, 'cuda_all': {'presence_test': 'nvidia-smi', 'uses_modules': True, 'module_parent': 'cuda', 'class_constr
aint': True, 'resource': 'gpu', 'classes': True, 'include_more_capable': True, 'class_types': {'R': {'resource': 'rtx8000', 'doc': 'RTX8
000. ECC, single-precision or tensor workloads - 48GB RAM - 8 CPUs', 'capability': 1}, 'P': {'resource': 'p100', 'doc': 'Pascal. ECC, do
uble, single, and half precision workloads - 16GB RAM - 6 CPUs', 'capability': 2}, 'V': {'resource': 'v100', 'doc': 'Volta. ECC, double,
single, half and quarter precision workloads - 16 or 32GB RAM - 6 CPUs', 'capability': 3}, 'V-16': {'resource': 'v100-sxm2-16gb', 'doc'
: 'Volta. ECC, double, single, half and quarter precision workloads - 16GB RAM - 12 CPUs', 'capability': 4}, 'V-32': {'resource': 'v100-
pcie-32gb', 'doc': 'Volta. ECC, double, single, half and quarter precision workloads - 32GB RAM - 6 CPUs', 'capability': 5}, 'A': {'reso
urce': 'a100', 'doc': 'Ampere. ECC, double, single, half precision FP and Tensor - single, half, quarter and eigth-precision workloads -
40 or 80GB RAM - 8 CPUs', 'capability': 6}, 'A-40': {'resource': 'a100-pcie-40gb', 'doc': 'Ampere. ECC, double, single, half precision
FP and Tensor - single, half, quarter and eigth-precision workloads - 40GB RAM - 8 CPUs', 'capability': 7}, 'A-80': {'resource': 'a100-p
cie-40gb', 'doc': 'Ampere. ECC, double, single, half precision FP and Tensor - single, half, quarter and eigth-precision workloads - 80G
B RAM - 8 CPUs', 'capability': 8}}, 'default_class': 'R'}}, 'qtest': '/usr/bin/sinfo'}
DEBUG:fsl_sub.config:Checking for empty queue definitions
DEBUG:fsl_sub.config:Checking for RAM split/PE configuration
DEBUG:fsl_sub.config:large_job_split_pe set to None
DEBUG:fsl_sub:Configuring plugin slurm
DEBUG:fsl_sub:Simple string or single element list passed
DEBUG:fsl_sub:String being shlex split
DEBUG:fsl_sub:['ls']
DEBUG:fsl_sub:Loading configuration for slurm
DEBUG:fsl_sub:Method configuration is {'memory_in_gb': False, 'queues': True, 'copy_environment': False, 'has_parallel_envs': False, 'sc
ript_conf': True, 'mail_support': False, 'mail_modes': {'b': ['BEGIN'], 'e': ['END'], 'a': ['FAIL', 'REQUEUE'], 'f': ['ALL'], 'n': ['NON
E']}, 'mail_mode': 'n', 'notify_ram_usage': True, 'set_time_limit': False, 'array_holds': True, 'array_limit': True, 'projects': True, '
keep_jobscript': False, 'preserve_modules': True, 'add_module_paths': ['/well/win/software/modules'], 'strict_dependencies': False}
DEBUG:fsl_sub:Adding export_vars from config to provided list ['FSLSUB_CONF=/well/win/software/packages/fsl_sub_slurm/fsl_sub.yml']['SUB
JECTS_DIR']
INFO:fsl_sub:METHOD=slurm : TYPE=single : args=ls
DEBUG:fsl_sub:Calc slots based on JR:SS:JT - 0:None:1
INFO:fsl_sub:Estimated RAM was 0 GBm, runtime was 0 minutes.
INFO:fsl_sub:Appropriate queue is short
DEBUG:fsl_sub:Automatic queue selection:
DEBUG:fsl_sub:('short', 1)
DEBUG:fsl_sub:Calling queue_submit fsl_sub_plugin_slurm with:
DEBUG:fsl_sub:['ls'], SOME_NAME, short, (), False, None, None, None, None, None, None, None, True, None, None, mza328@rescomp1.hpc.in.bm
rc.ox.ac.uk, None, None, None, None, False, 1, False, None, True, None
INFO:fsl_sub.fsl_sub_plugin_slurm:slurm_args: --export=FSLSUB_CONF=/well/win/software/packages/fsl_sub_slurm/fsl_sub.yml,SUBJECTS_DIR,OM
P_NUM_THREADS=1,MKL_NUM_THREADS=1,MKL_DOMAIN_NUM_THREADS=1,OPENBLAS_NUM_THREADS=1,GOTO_NUM_THREADS=1,FSLSUB_PARALLEL=1,FSLSUB_JOB_ID_VAR
=SLURM_JOB_ID,FSLSUB_ARRAYTASKID_VAR=SLURM_ARRAY_TASK_ID,FSLSUB_ARRAYSTARTID_VAR=SLURM_ARRAY_TASK_MIN,FSLSUB_ARRAYENDID_VAR=SLURM_ARRAY_
TASK_MAX,FSLSUB_ARRAYSTEPSIZE_VAR=SLURM_ARRAY_TASK_STEP,FSLSUB_ARRAYCOUNT_VAR=SLURM_ARRAY_TASK_COUNT,FSLSUB_NSLOTS=SLURM_NPROCS ['-o', '
/gpfs3/users/win-fmrib-analysis/mza328/SOME_NAME.o%j'] ['-e', '/gpfs3/users/win-fmrib-analysis/mza328/SOME_NAME.e%j'] --dependency=after
any: --job-name=SOME_NAME --chdir=/gpfs3/users/win-fmrib-analysis/mza328 ['-p', 'short'] --parsable --requeue --ntasks=1
INFO:fsl_sub.fsl_sub_plugin_slurm:executing single task
INFO:fsl_sub.fsl_sub_plugin_slurm:--export=FSLSUB_CONF=/well/win/software/packages/fsl_sub_slurm/fsl_sub.yml,SUBJECTS_DIR,OMP_NUM_THREAD
S=1,MKL_NUM_THREADS=1,MKL_DOMAIN_NUM_THREADS=1,OPENBLAS_NUM_THREADS=1,GOTO_NUM_THREADS=1,FSLSUB_PARALLEL=1,FSLSUB_JOB_ID_VAR=SLURM_JOB_I
D,FSLSUB_ARRAYTASKID_VAR=SLURM_ARRAY_TASK_ID,FSLSUB_ARRAYSTARTID_VAR=SLURM_ARRAY_TASK_MIN,FSLSUB_ARRAYENDID_VAR=SLURM_ARRAY_TASK_MAX,FSL
SUB_ARRAYSTEPSIZE_VAR=SLURM_ARRAY_TASK_STEP,FSLSUB_ARRAYCOUNT_VAR=SLURM_ARRAY_TASK_COUNT,FSLSUB_NSLOTS=SLURM_NPROCS ['-o', '/gpfs3/users
/win-fmrib-analysis/mza328/SOME_NAME.o%j'] ['-e', '/gpfs3/users/win-fmrib-analysis/mza328/SOME_NAME.e%j'] --dependency=afterany: --job-n
ame=SOME_NAME --chdir=/gpfs3/users/win-fmrib-analysis/mza328 ['-p', 'short'] --parsable --requeue --ntasks=1
DEBUG:fsl_sub.fsl_sub_plugin_slurm:<class 'list'>
DEBUG:fsl_sub.fsl_sub_plugin_slurm:['--export=FSLSUB_CONF=/well/win/software/packages/fsl_sub_slurm/fsl_sub.yml,SUBJECTS_DIR,OMP_NUM_THR
EADS=1,MKL_NUM_THREADS=1,MKL_DOMAIN_NUM_THREADS=1,OPENBLAS_NUM_THREADS=1,GOTO_NUM_THREADS=1,FSLSUB_PARALLEL=1,FSLSUB_JOB_ID_VAR=SLURM_JO
B_ID,FSLSUB_ARRAYTASKID_VAR=SLURM_ARRAY_TASK_ID,FSLSUB_ARRAYSTARTID_VAR=SLURM_ARRAY_TASK_MIN,FSLSUB_ARRAYENDID_VAR=SLURM_ARRAY_TASK_MAX,
FSLSUB_ARRAYSTEPSIZE_VAR=SLURM_ARRAY_TASK_STEP,FSLSUB_ARRAYCOUNT_VAR=SLURM_ARRAY_TASK_COUNT,FSLSUB_NSLOTS=SLURM_NPROCS', ['-o', '/gpfs3/
users/win-fmrib-analysis/mza328/SOME_NAME.o%j'], ['-e', '/gpfs3/users/win-fmrib-analysis/mza328/SOME_NAME.e%j'], '--dependency=afterany:
', '--job-name=SOME_NAME', '--chdir=/gpfs3/users/win-fmrib-analysis/mza328', ['-p', 'short'], '--parsable', '--requeue', '--ntasks=1']
DEBUG:fsl_sub.fsl_sub_plugin_slurm:Found following loaded modules
DEBUG:fsl_sub.fsl_sub_plugin_slurm:['fsl_sub/2.7']
DEBUG:fsl_sub.fsl_sub_plugin_slurm:Creating module load lines
DEBUG:fsl_sub.fsl_sub_plugin_slurm:Adding modules paths
DEBUG:fsl_sub.fsl_sub_plugin_slurm:Module list is ['fsl_sub/2.7']
DEBUG:fsl_sub.fsl_sub_plugin_slurm:#!/usr/bin/bash
#SBATCH --export=FSLSUB_CONF=/well/win/software/packages/fsl_sub_slurm/fsl_sub.yml,SUBJECTS_DIR,OMP_NUM_THREADS=1,MKL_NUM_THREADS=1,MKL_
DOMAIN_NUM_THREADS=1,OPENBLAS_NUM_THREADS=1,GOTO_NUM_THREADS=1,FSLSUB_PARALLEL=1,FSLSUB_JOB_ID_VAR=SLURM_JOB_ID,FSLSUB_ARRAYTASKID_VAR=S
LURM_ARRAY_TASK_ID,FSLSUB_ARRAYSTARTID_VAR=SLURM_ARRAY_TASK_MIN,FSLSUB_ARRAYENDID_VAR=SLURM_ARRAY_TASK_MAX,FSLSUB_ARRAYSTEPSIZE_VAR=SLUR
M_ARRAY_TASK_STEP,FSLSUB_ARRAYCOUNT_VAR=SLURM_ARRAY_TASK_COUNT,FSLSUB_NSLOTS=SLURM_NPROCS
#SBATCH -o /gpfs3/users/win-fmrib-analysis/mza328/SOME_NAME.o%j
#SBATCH -e /gpfs3/users/win-fmrib-analysis/mza328/SOME_NAME.e%j
#SBATCH --dependency=afterany:
#SBATCH --job-name=SOME_NAME
#SBATCH --chdir=/gpfs3/users/win-fmrib-analysis/mza328
#SBATCH -p short
#SBATCH --parsable
#SBATCH --requeue
#SBATCH --ntasks=1
MODULEPATH=/well/win/software/modules:$MODULEPATH
module load fsl_sub/2.7
# Built by fsl_sub v.2.8.3 and fsl_sub_plugin_slurm v.1.6.0
# Command line:
# Submission time (H:M:S DD/MM/YYYY): 17:12:45 20/06/2023
ls
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/well/win/software/packages/fsl_sub_slurm/miniforge3/envs/fsl_sub/lib/python3.10/site-packages/fsl_sub/__init__.py", line 575, i
n submit
job_id = queue_submit(
File "/well/win/software/packages/fsl_sub_slurm/miniforge3/envs/fsl_sub/lib/python3.10/site-packages/fsl_sub_plugin_slurm/__init__.py"
, line 654, in submit
raise BadSubmission(result.stderr)
fsl_sub.exceptions.BadSubmission: sbatch: error: Batch job submission failed: Job dependency problem
The call does work if jobhold
is set to None
, e.g.:
from fsl_sub import submit
submit('some_command', name='JOB_NAME', jobhold=None)
It looks to me like the sbatch afterany:
option is being added, but without any job IDs, which is causing sbatch
to complain.
As this is quite a minor issue, I'm not sure what you would prefer - would you like the fsl_sub.submit
function to accept both jobhold=None
and jobhold=<empty-sequence>
, or would you like to require calling code to set jobhold=None
for jobs with no dependencies?