defaults.yamlΒΆ
set in config
# Default configuration settings for merging
# Please create your own config files instead of modifying this one directly!
inputs:
mode: dids # Options: query, dids, files
namespace: "usertests" # Default namespace for local input files
inputs: [] # List of inputs
search_dirs: [] # List of directories to search for metadata files
skip: ~ # Skip a number of input files, overridden by '--skip #'
limit: ~ # Limit the number of input files, overridden by '--limit #'
tag: ~ # Specify a tag to identify outputs, overridden by '--tag TAG'
comment: ~ # Add a comment to output metadata, overridden by '--comment COMMENT'
output:
scripts: "tmp" # Directory for generated job scripts (relative to package dir)
mode: justin # Options: dids, replicas, pfns, local, justin
namespace: "usertests" # Optionally specify a namespace different from the parents
name: "{core.run_type}_{dune.campaign}_{dune.config_file}_{core.application.name}"
#name: "{core.run_type}_{core.file_type}_{dune.campaign}_{core.data_stream}_{dune.config_file}_{core.data_tier}"
grandparents: False # List the parents of the input files as the parents of the merged file
dir: "/pnfs/dune/scratch/users/${USER}/merge_test" # Directory for local output files
lifetime: 30 # Lifetime of output files (in days)
scratch: # Settings for temporary files from 2-stage merging
namespace: "usertests" # Optionally specify a namespace different from the parents
lifetime: 30 # Lifetime of output files (in days)
metadata:
optional: # These metadata keys are optional (overrides required and conditional keys)
- "dune_mc.geometry_version"
- "dune_mc.gen_fcl_filename"
validation:
batch_size: 100 # Number of files to query metacat about at once
fast_fail: True # Stop processing files as soon as one batch fails validation
skip: # Continue processing files even if some fail validation
missing: False # Skip files with missing metadata
duplicate: False # Skip duplicated files
unreachable: False # Skip files that are not accessible
invalid: False # Skip files with invalid metadata
inconsistent: False # Skip files with inconsistent metadata
checksums:
- "adler32" # Adler32 should be the default checksum
sites:
justin_url: "https://justin-ui-fnal.dune.hep.ac.uk"
local: "US_FNAL-FermiGrid" # Local site name
default: "US_FNAL-FermiGrid" # Default site (eg for stage 2 jobs)
allowed_sites: # Sites where merging is allowed
- "US_FNAL-FermiGrid"
- "CERN"
max_distance: 1000.0 # Distances range from 0 to 101
rse_distances: # Distances offsets for specific RSEs
"DUNE_US_FNAL_DISK_STAGE": -5.0 # Increase priority
nearline_distance: # Extra distance to account for staging
default: 100.0 # Default distance for all RSEs
"FNAL_DCACHE": 10.0
streaming: True # Stream files from remote sites instead of making a local copy
merging:
target_mode: size # Options: size, count
target_size: 10.0 # Target size (in GB) or number of files
equalize: True # Try to equalize the size of the merged files
chunk_min: 2 # Minimum number of files to merge at once
chunk_max: 1000 # Maxiumum number of files to merge at once
dune_version: ~ # Optionally specify DUNE software version, defaults to DUNE_VERSION env var
dune_qualifier: ~ # Optionally specify DUNE software qualifier, defaults to DUNE_QUALIFIER env var
method:
name: auto # Can be auto, a specific default method, or the path to a custom script
cmd: ~ # Optionally specify merging command, eg. '{script} {cfg} {output} {inputs}'
cfg: ~ # Optionally specify a config file for merging
script: ~ # Optionally specify a custom merging script
outputs: [] # List of output files produced by the merging method
dependencies: [] # Optionally specify additional files required for merging
# Default settings for built-in merging methods, matched using 'cond' in reverse order
methods:
- name: "tar"
cond: "True" # Always matches if no other method matches first
script: "merge_tar.py"
outputs:
- name: "{NAME}_merged_{UUID}.tar"
metadata:
core.file_format: "tar"
- name: "hadd"
cond: "'{core.file_format}' in ['root', 'rootntuple', 'tfile']"
cmd: "hadd -f {output} {inputs}"
outputs:
- name: "{NAME}_merged_{UUID}.root"
- name: "lar"
cond: "'{core.file_format}' in ['artroot']"
cmd: "lar -c {cfg} -n 1000000 -o {output} {inputs}"
cfg: "artcat.fcl"
outputs:
- name: "{NAME}_merged_{UUID}.root"
- name: "hdf5"
cond: "'{core.file_format}' in ['hdf5']"
script: "merge_hdf5.py"
cfg: "hdf5.yaml"
outputs:
- name: "{NAME}_merged_{UUID}.hdf5"