defaults.yamlΒΆ

set in config

# Default configuration settings for merging
# Please create your own config files instead of modifying this one directly!

inputs:
    mode: dids              # Options: query, dids, files
    namespace: "usertests"  # Default namespace for local input files
    inputs: []              # List of inputs
    search_dirs: []         # List of directories to search for metadata files
    skip: ~                 # Skip a number of input files, overridden by '--skip #'
    limit: ~                # Limit the number of input files, overridden by '--limit #'
    tag: ~                  # Specify a tag to identify outputs, overridden by '--tag TAG'
    comment: ~              # Add a comment to output metadata, overridden by '--comment COMMENT'

output:
    scripts: "tmp"    # Directory for generated job scripts (relative to package dir)
    mode: justin      # Options: dids, replicas, pfns, local, justin
    namespace: "usertests" # Optionally specify a namespace different from the parents
    name: "{core.run_type}_{dune.campaign}_{dune.config_file}_{core.application.name}"
    #name: "{core.run_type}_{core.file_type}_{dune.campaign}_{core.data_stream}_{dune.config_file}_{core.data_tier}"
    grandparents: False # List the parents of the input files as the parents of the merged file
    dir: "/pnfs/dune/scratch/users/${USER}/merge_test" # Directory for local output files
    lifetime: 30      # Lifetime of output files (in days)
    scratch:  # Settings for temporary files from 2-stage merging
        namespace: "usertests" # Optionally specify a namespace different from the parents
        lifetime: 30      # Lifetime of output files (in days)

metadata:
    optional:         # These metadata keys are optional (overrides required and conditional keys)
      - "dune_mc.geometry_version"
      - "dune_mc.gen_fcl_filename"

validation:
    batch_size: 100   # Number of files to query metacat about at once
    fast_fail: True   # Stop processing files as soon as one batch fails validation
    skip:             # Continue processing files even if some fail validation
        missing:      False # Skip files with missing metadata
        duplicate:    False # Skip duplicated files
        unreachable:  False # Skip files that are not accessible
        invalid:      False # Skip files with invalid metadata
        inconsistent: False # Skip files with inconsistent metadata
    checksums:
      - "adler32"     # Adler32 should be the default checksum

sites:
    justin_url: "https://justin-ui-fnal.dune.hep.ac.uk"
    local: "US_FNAL-FermiGrid"          # Local site name
    default: "US_FNAL-FermiGrid"        # Default site (eg for stage 2 jobs)
    allowed_sites:                      # Sites where merging is allowed
      - "US_FNAL-FermiGrid"
      - "CERN"
    max_distance: 1000.0                # Distances range from 0 to 101
    rse_distances:                      # Distances offsets for specific RSEs
        "DUNE_US_FNAL_DISK_STAGE": -5.0 # Increase priority 
    nearline_distance:                  # Extra distance to account for staging
        default:        100.0           # Default distance for all RSEs
        "FNAL_DCACHE":  10.0
    streaming: True                     # Stream files from remote sites instead of making a local copy

merging:
    target_mode: size     # Options: size, count
    target_size: 10.0     # Target size (in GB) or number of files
    equalize: True        # Try to equalize the size of the merged files
    chunk_min: 2          # Minimum number of files to merge at once
    chunk_max: 1000       # Maxiumum number of files to merge at once
    dune_version: ~       # Optionally specify DUNE software version, defaults to DUNE_VERSION env var
    dune_qualifier: ~     # Optionally specify DUNE software qualifier, defaults to DUNE_QUALIFIER env var
    method:
        name: auto        # Can be auto, a specific default method, or the path to a custom script
        cmd: ~            # Optionally specify merging command, eg. '{script} {cfg} {output} {inputs}'
        cfg: ~            # Optionally specify a config file for merging
        script: ~         # Optionally specify a custom merging script
        outputs: []       # List of output files produced by the merging method      
        dependencies: []  # Optionally specify additional files required for merging
    # Default settings for built-in merging methods, matched using 'cond' in reverse order
    methods:
      - name: "tar"
        cond: "True"  # Always matches if no other method matches first
        script: "merge_tar.py"
        outputs:
          - name: "{NAME}_merged_{UUID}.tar"
            metadata:
                core.file_format: "tar"
      - name: "hadd"
        cond: "'{core.file_format}' in ['root', 'rootntuple', 'tfile']"
        cmd: "hadd -f {output} {inputs}"
        outputs:
          - name: "{NAME}_merged_{UUID}.root"
      - name: "lar"
        cond: "'{core.file_format}' in ['artroot']"
        cmd: "lar -c {cfg} -n 1000000 -o {output} {inputs}"
        cfg: "artcat.fcl"
        outputs:
          - name: "{NAME}_merged_{UUID}.root"
      - name: "hdf5"
        cond: "'{core.file_format}' in ['hdf5']"
        script: "merge_hdf5.py"
        cfg: "hdf5.yaml"
        outputs:
          - name: "{NAME}_merged_{UUID}.hdf5"