Source code for MetaValidator
"""Check metadata against a template"""
import os,sys,json
#from CheckConfiguration import known_fields
[docs]
def MetaValidator(filemd=None, errfile=None, verbose=False):
" check for type and missing required fields in metadata"
f = open("../../config/valid_values.json")
known_fields = json.load(f)
f.close()
DEBUG=False
# define types
STRING = type("")
FLOAT = type(1.0)
INT = type(1)
LIST = type([])
DICT = type({})
# list defaults for metadata fields
basetypes = {
"name": STRING,
"namespace": STRING,
"checksums": DICT,
"size":INT,
"metadata":{
"core.application.family": STRING,
"core.application.name": STRING,
"core.application.version": STRING,
"core.data_stream":STRING,
"core.data_tier": STRING,
"core.end_time": FLOAT,
"core.event_count": INT,
"core.events": LIST,
"core.file_content_status": STRING,
"core.file_format": STRING,
"core.file_type": STRING,
"core.first_event_number": INT,
"core.last_event_number": INT,
"core.run_type": STRING,
"core.runs": LIST,
"core.runs_subruns": LIST,
"core.start_time": FLOAT,
"dune.daq_test": STRING,
"dune.config_file": STRING,
"dune_mc.gen_fcl_filename": STRING,
"dune_mc.geometry_version":STRING,
"retention.status": STRING,
"retention.class": STRING
}
}
# set default values for fields that are often missing but needed
fixDefaults = {
"core.file_content_status":"good",
"retention.status":"active",
"retention.class":"unknown"
}
# place to put optional fields: all is optional for all, otherwise you need to tell it data_tier
optional = {
"all":["core.events","dune.daq_test"],
"root-tuple":["core.event_count","core.first_event_number","core.last_event_number"],
"raw":["dune.config_file", "dune_mc.gen_fcl_filename","dune_mc.geometry_version","core.application.family","core.application.name","core.application.version"],
"binary-raw":["dune.config_file", "dune_mc.gen_fcl_filename","dune_mc.geometry_version","core.application.family","core.application.name","core.application.version"],
"trigprim":["dune.config_file", "dune_mc.gen_fcl_filename","dune_mc.geometry_version","core.application.family","core.application.name","core.application.version"],
"root-tuple-virtual":["core.event_count","core.first_event_number","core.last_event_number"],
"root-tuple-virtual-tar":["core.event_count","core.first_event_number","core.last_event_number"]
}
valid = True
fixes = {}
if "name" in filemd:
did = filemd["namespace"]+":"+filemd["name"]
else:
print ("ERROR: name not in metadata, continuing but this is invalid and I cannot fix it from metadata only")
tempname = "UNKNOWN"
did = "UNKNOWN:UNKNOWN"
valid = False
# do this as file may not have an fid yet, but fid makes shorter error messages.
if "fid" in filemd:
fid = filemd["fid"]
else:
fid = did
# start out with valid and no fixes needed
# loop over default md keys
for xkey, xtype in basetypes.items():
if verbose: print (x,xtype)
x = "core."+xkey
if x in optional["all"]: continue
# check required
if x not in filemd.keys():
error = x+" is missing from "+ fid + "\n"
print (error)
if errfile is not None: errfile.write(error)
valid *= False
print (filemd.keys())
continue
# check type
if xtype != type(filemd[x]) and x != "metadata":
if xtype == FLOAT and type(filemd[x]) == INT: continue
error = "%s has wrong type in %s \n"%(x,fid)
print (error)
if errfile is not None: errfile.write(error)
valid *= False
# now do the metadata
md = filemd["metadata"]
for x, xtype in basetypes["metadata"].items():
if verbose: print (x,xtype)
if x in optional["all"]: continue # skip optional items
if "core.run_type" in md and md["core.run_type"] != "mc" and "mc" in x:
if DEBUG: print ("skipping mc only",x)
continue
# check required keys
if x not in md.keys():
if "core.data_tier" in md and md["core.data_tier"] in optional and x in optional[md["core.data_tier"]]: # skip optional items by data_tier
if DEBUG: print ("skipping optional missing field for data_tier",md["core.data_tier"],x)
continue
error = x+ " is missing from " + fid + "\n"
print (error)
if errfile is not None: errfile.write(error)
valid *= False
if x in fixDefaults:
fixes[x]=fixDefaults[x]
continue
# check for type
if xtype != type(md[x]):
if xtype == FLOAT and type(md[x]) == INT: continue
error = "%s has wrong type in %s\n "%(x,fid)
print (error)
if errfile is not None: errfile.write(error+"\n")
valid *= False
for x,core in known_fields.items():
if x not in md:
print ("required field",x,"not present")
valid *=False
if md[x] not in core and md[x][-4:]!="-tar":
print ("unknown required metadata field",x,"=",md[x])
valid *= False
for x,v in md.items():
if x != x.lower() and x.lower() not in md.keys():
valid *=False
print ("OOPS upper case",x)
fixes[x.lower()]=v
if not valid:
print (did, " fails basic metadata tests")
if len(fixes) !=0:
print ("you could fix this by applying this fix")
print (json.dumps(fixes,indent=4))
# look for upper case in keys
return valid, fixes
if __name__ == '__main__':
if len(sys.argv) < 2:
print ("please provide a json file to check")
sys.exit(1)
jsonname = sys.argv[1]
if not os.path.exists(jsonname):
print ("input file does not exist",jsonname)
sys.exit(1)
jsonfile = open(jsonname,'r')
filemd = json.load(jsonfile)
errfile = open(jsonname+".err",'w')
status,fixes = MetaValidator(filemd=filemd,errfile=errfile,verbose=False)
errfile.close()