defaults

The defaults reside in file config/defaults.yaml

defaults.yaml

# Default configuration settings for merging
# Please create your own config files instead of modifying this one directly!

validation:
    batch_size: 100   # Number of files to query metacat about at once
    fast_fail: True   # Stop processing files as soon as one batch fails validation
    skip:             # Continue processing files even if some fail validation
        missing:      false # Skip files with missing metadata
        duplicate:    false # Skip duplicated files
        unreachable:  false # Skip files that are not accessible
        invalid:      false # Skip files with invalid metadata
        inconsistent: false # Skip files with inconsistent metadata
    checksums:
      - "adler32"     # Adler32 should be the default checksum
    consistent:       # These metadata keys must be the same for all input files
      - "core.run_type"
      - "core.file_type"
      - "core.file_format"
      - "core.data_tier"
      - "core.data_stream"
      - "core.application.name"
      - "dune.campaign"
      - "dune.requestid"
      - "dune.config_file"          # Not checked in old merging
      - "core.application.version"  # Not checked in old merging
    required:       # These metadata keys must be present in all input files
    optional:       # These metadata keys are optional (overrides required and conditional keys)
      - "dune_mc.geometry_version"

sites:
    max_distance: 1000        # Distances range from 0 to 101
    nearline_distance:        # Extra distance to account for staging
        default:        100   # Default distance for all RSEs
        "FNAL_DCACHE":  10
    allowed_sites:
      - "US_FNAL-FermiGrid"
      - "CERN"

merging:
    method: auto        # Can be auto or a specific method from the list below
    target_mode: size   # Options: size, count
    target_size: 10     # Target size (in GB) or number of files
    equalize: true      # Try to equalize the size of the merged files
    chunk_min: 5        # Minimum number of files to merge at once
    chunk_max: 100      # Maxiumum number of files to merge at once
    metadata:   # Special handling for metadata keys, options are:
        # unique   only save key if all values are the same
        # all      save a list of all values
        # min      save the minimum value
        # max      save the maximum value
        # sum      save the sum of all values
        # union    save the union of all values
        # skip     ignore the key
        default:                    unique
        "core.first_event_number":  min
        "core.last_event_number":   max
        "core.event_count":         sum
        "core.events":              union
        "core.runs":                union
        "core.runs_subruns":        union
        # Skip values that don't make sense to merge
        "core.start_time":          skip
        "core.end_time":            skip
        "Offline.options":          skip
        "Offline.machine":          skip
        overrides: # Set keys to specific values
            "retention.status":     "active"
    methods:
        "hadd":
            file_format:
              - "root"
              - "rootntuple"
              - "tfile"
            ext: ".root"
        "lar":
            file_format:
              - "artroot"
            ext: ".root"
            fcl: "artcat.fcl"
        "hdf5":
            file_format:
              - "hdf5"
            ext: ".hdf5"
        "tar":
            file_format:
              - "binary"
              - "tar"
              - "unknown"
            output_format: "tar"
            ext: ".tar"

output:
    dir: "~/scratch/merge_test" # Directory to save local merged files
    namespace: ~ # Optionally specify a namespace different from the parents
    name: "{core.run_type}_{dune.campaign}_{dune.config_file}_{core.application.name}"
    #name: "{core.run_type}_{core.file_type}_{dune.campaign}_{core.data_stream}_{dune.config_file}_{core.data_tier}"
    grandparents: false # List the parents of the input files as the parents of the merged file
    abbreviations:
        "core.run_type":
            "protodune-sp":             "pd-sp"
            "protodune-dp":             "pd-dp"
            "hd-coldbox":               "cb-hd"
            "vd-coldbox":               "cb-vd"
            "vd-coldbox-bottom":        "cb-vd-b"
            "vd-coldbox-top":           "cb-vd-t"
            "protodune-hd":             "pd-hd"
            "hd-protodune":             "pd-hd"
            "vd-protodune":             "pd-vd"
            "vd-protodune-pds":         "pd-vd-pds"
            "vd-protodune-arapucas":    "pd-vd-arapucas"
            "dc4-vd-coldbox-bottom":    "cb-dc4-vd-b"
            "dc4-vd-coldbox-top":       "cb-dc4-vd-t"
            "dc4-hd-protodune":         "pd-dc4-hd"
            "neardet":                  "nd"
            "neardet-lar":              "nd-lar"
            "neardet-2x2":              "nd-2x2"
            "neardet-2x2-lar":          "nd-2x2-lar"
            "neardet-2x2-lar-charge":   "nd-2x2-lar-charge"
            "neardet-2x2-lar-light":    "nd-2x2-lar-light"
            "neardet-2x2-minerva":      "nd-2x2-minerva"
            "fardet":                   "fd"
            "fardet-sp":                "fd-sp"
            "fardet-hd":                "fd-hd"
            "fardet-vd":                "fd-vd"
            "fardet-dp":                "fd-dp"
            "fardet-moo":               "fd-moo"
            "311_dp_light":             "311-dp-light"
            "physics":                  "phys"
            "protodune":                "pd"
            "protodune-vst":            "pd-vst"
        "core.file_type":
            "detector":                 "det"
            "importedDetector":         "imp-det"
            "binary":                   "bin"
            "photon_detector":          "photo-det"
        "core.data_stream":
            "calibration":              "calib"
            "physics":                  "phys"
            "commissioning":            "comm"
            "pedestal":                 "pdstl"
            "g4beamline":               "g4"
        "core.data_tier":
            "simulated":                "sim"
            "hit-reconstructed":        "hit-reco"
            "full-reconstructed":       "full-reco"
            "generated":                "gen"
            "detector-simulated":       "det-sim"
            "root-tuple":               "r-tuple"
            "root-hist":                "r-hist"
            "decoded-raw":              "dec-raw"
            "pandora-info":             "pandora"
            "reco-recalibrated":        "reco-recal"
            "root-tuple-virtual":       "r-tuple-v"
            "binary-raw":               "binary"
            "sam-user":                 "sam"
        "core.file_format":
            "artroot":                  "art"
            "binary":                   "bin"
            "rootntuple":               "tuple"