#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2009 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0
#
# Project:
# glideinWMS
#
# File Version:
#
# Description:
# This program allows to add announced downtimes
# as well as handle unexpected downtimes
#
import os
import sys
import time
from glideinwms.creation.lib import cgWConsts, cgWDictFile
from glideinwms.factory import glideFactoryConfig, glideFactoryDowntimeLib
from glideinwms.lib import condorMonitor
[docs]
def usage():
print("Usage:")
print(
" manageFactoryDowntimes.py -dir factory_dir -entry ['all'|'factory'|'entries'|entry_name] -cmd [command] [options]"
)
print("where command is one of:")
print(" add - Add a scheduled downtime period")
print(" down - Put the factory down now(+delay)")
print(" up - Get the factory back up now(+delay)")
print(" ress - Set the up/down based on RESS status")
print(" check - Report if the factory is in downtime now(+delay)")
print(" vacuum - Remove all expired downtime info")
print("Other options:")
print(" -start [[[YYYY-]MM-]DD-]HH:MM[:SS] (start time for adding a downtime)")
print(" -end [[[YYYY-]MM-]DD-]HH:MM[:SS] (end time for adding a downtime)")
print(" -delay [HHh][MMm][SS[s]] (delay a downtime for down, up, and check cmds)")
print(" -ISinfo 'CEStatus' (attribute used in ress for creating downtimes)")
print(" -security SECURITY_CLASS (restricts a downtime to users of that security class)")
print(" (If not specified, the downtime is for all users.)")
print(" -frontend SECURITY_NAME (Limits a downtime to one frontend)")
print(' -comment "Comment here" (user comment for the downtime. Not used by WMS.)')
print()
# [[[YYYY-]MM-]DD-]HH:MM[:SS]
[docs]
def strtxt2time(timeStr):
deftime = time.localtime(time.time())
year = deftime[0]
month = deftime[1]
day = deftime[2]
seconds = 0
darr = timeStr.split("-")
if len(darr) > 1: # we have at least part of the date
timeStr = darr[-1]
day = int(darr[-2])
if len(darr) > 2:
month = int(darr[-3])
if len(darr) > 3:
year = int(darr[-4])
tarr = timeStr.split(":")
hours = int(tarr[0])
minutes = int(tarr[1])
if len(tarr) > 2:
seconds = int(tarr[2])
outtime = time.mktime((year, month, day, hours, minutes, seconds, 0, 0, -1))
return outtime
# [[[YYYY-]MM-]DD-]HH:MM[:SS]
# or
# unix_time
[docs]
def str2time(timeStr):
# if (timeStr is None) or (timeStr=="None") or (timeStr==""):
# return time.localtime(time.time())
if len(timeStr.split(":", 1)) > 1:
# has a :, so it must be a text representation
return strtxt2time(timeStr)
else:
print(timeStr)
# should be a simple number
return int(timeStr)
# Create an array for each value in the frontend descript file
[docs]
def get_security_classes(factory_dir):
sec_array = []
frontendDescript = glideFactoryConfig.ConfigFile(factory_dir + "/frontend.descript", lambda s: s)
for fe in list(frontendDescript.data.keys()):
for sec_class in frontendDescript.data[fe]["usermap"]:
sec_array.append(sec_class)
return sec_array
# Create an array for each frontend in the frontend descript file
[docs]
def get_frontends(factory_dir):
frontendDescript = glideFactoryConfig.ConfigFile(factory_dir + "/frontend.descript", lambda s: s)
return list(frontendDescript.data.keys())
# Create an array for each entry in the glidein descript file
[docs]
def get_entries(factory_dir):
glideinDescript = glideFactoryConfig.GlideinDescript()
# glideinDescript=glideFactoryConfig.ConfigFile(factory_dir+"/glidein.descript",lambda s:s)
return glideinDescript.data["Entries"].split(",")
#
#
[docs]
def get_downtime_fd(entry_name, cmdname):
try:
# New style has config all in the factory file
# if entry_name=='factory':
config = glideFactoryConfig.GlideinDescript()
# else:
# config=glideFactoryConfig.JobDescript(entry_name)
except OSError:
raise RuntimeError("Failed to load config for %s" % entry_name)
fd = glideFactoryDowntimeLib.DowntimeFile(config.data["DowntimesFile"])
return fd
[docs]
def get_downtime_fd_dict(entry_or_id, cmdname, opt_dict):
out_fds = {}
if entry_or_id in ("entries", "All"):
glideinDescript = glideFactoryConfig.GlideinDescript()
entries = glideinDescript.data["Entries"].split(",")
for entry in entries:
out_fds[entry] = get_downtime_fd(entry, cmdname)
if (entry_or_id == "All") and ("entries" not in opt_dict):
out_fds["factory"] = get_downtime_fd("factory", cmdname)
else:
out_fds[entry_or_id] = get_downtime_fd(entry_or_id, cmdname)
return out_fds
[docs]
def add(entry_name, opt_dict):
down_fd = get_downtime_fd(entry_name, opt_dict["dir"])
start_time = str2time(opt_dict["start"])
end_time = str2time(opt_dict["end"])
sec_name = opt_dict["sec"]
frontend = opt_dict["frontend"]
down_fd.addPeriod(
start_time=start_time,
end_time=end_time,
entry=entry_name,
frontend=frontend,
security_class=sec_name,
comment=opt_dict["comment"],
)
return 0
# [HHh][MMm][SS[s]]
[docs]
def delay2time(delayStr):
hours = 0
minutes = 0
seconds = 0
harr = delayStr.split("h", 1)
if len(harr) == 2:
hours = int(harr[0])
delayStr = harr[1]
marr = delayStr.split("m", 1)
if len(marr) == 2:
minutes = int(marr[0])
delayStr = marr[1]
if delayStr[-1:] == "s":
delayStr = delayStr[:-1] # remove final s if present
if len(delayStr) > 0:
seconds = int(delayStr)
return seconds + 60 * (minutes + 60 * hours)
[docs]
def down(entry_name, opt_dict):
down_fd = get_downtime_fd(entry_name, opt_dict["dir"])
when = delay2time(opt_dict["delay"])
if opt_dict["start"] == "None":
when += int(time.time())
else:
when += str2time(opt_dict["start"])
if opt_dict["end"] == "None":
end_time = None
else:
end_time = str2time(opt_dict["end"])
frontend = opt_dict["frontend"]
sec_name = opt_dict["sec"]
if not down_fd.checkDowntime(entry=entry_name, frontend=frontend, security_class=sec_name, check_time=when):
# only add a new line if not in downtime at that time
return down_fd.startDowntime(
start_time=when,
end_time=end_time,
frontend=frontend,
security_class=sec_name,
entry=entry_name,
comment=opt_dict["comment"],
)
else:
print("Entry is already down. (%s)" % down_fd.downtime_comment)
return 0
[docs]
def up(entry_name, opt_dict):
down_fd = get_downtime_fd(entry_name, opt_dict["dir"])
when = delay2time(opt_dict["delay"])
sec_name = opt_dict["sec"]
frontend = opt_dict["frontend"]
comment = opt_dict["comment"]
if opt_dict["end"] == "None":
when += int(time.time())
else:
when += str2time(opt_dict["end"])
# commenting this check out since we could be in a downtime
# for certain security_classes/frontend, but if we specify
# -cmd up and -security All, etc, it should clear out all downtimes
# if (down_fd.checkDowntime(entry=entry_name, frontend=frontend, security_class=sec_name, check_time=when)or (sec_name=="All")):
rtn = down_fd.endDowntime(
end_time=when, entry=entry_name, frontend=frontend, security_class=sec_name, comment=comment
)
if rtn > 0:
return 0
else:
print("Entry is not in downtime.")
return 1
# This function replaces "check", which does not take into account
# security classes. This function will read the downtimes file
# and parse it to determine whether the downtime is relevant to the
# security class
[docs]
def printtimes(entry_or_id, opt_dict):
config_els = get_downtime_fd_dict(entry_or_id, opt_dict["dir"], opt_dict)
when = delay2time(opt_dict["delay"]) + int(time.time())
entry_keys = sorted(config_els.keys())
for entry in entry_keys:
down_fd = config_els[entry]
down_fd.printDowntime(entry=entry, check_time=when)
# This function is now deprecated, replaced by printtimes
# as it does not take into account that an entry can be down for
# only some security classes.
[docs]
def check(entry_or_id, opt_dict):
config_els = get_downtime_fd_dict(entry_or_id, opt_dict["dir"], opt_dict)
when = delay2time(opt_dict["delay"])
sec_name = opt_dict["sec"]
when += int(time.time())
entry_keys = sorted(config_els.keys())
for entry in entry_keys:
down_fd = config_els[entry]
in_downtime = down_fd.checkDowntime(entry=entry, security_class=sec_name, check_time=when)
if in_downtime:
print("%s\tDown" % entry)
else:
print("%s\tUp" % entry)
return 0
[docs]
def vacuum(entry_or_id, opt_dict):
config_els = get_downtime_fd_dict(entry_or_id, opt_dict["dir"], opt_dict)
entry_keys = sorted(config_els.keys())
for entry in entry_keys:
down_fd = config_els[entry]
down_fd.purgeOldPeriods()
return 0
[docs]
def get_production_ress_entries(server, ref_dict_list):
production_entries = []
condor_obj = condorMonitor.CondorStatus(pool_name=server)
condor_obj.load(
constraint='(GlueCEInfoContactString=!=UNDEFINED)&&(GlueCEStateStatus=?="Production")', format_list=[]
)
condor_refs = list(condor_obj.fetchStored().keys())
# del condor_obj
for el in ref_dict_list:
ref = el["ref"]
if ref in condor_refs:
production_entries.append(el["entry_name"])
return production_entries
[docs]
def infosys_based(entry_name, opt_dict, infosys_types):
# find out which entries I need to look at
# gather downtime fds for them
config_els = {}
if entry_name == "factory":
return 0 # nothing to do... the whole factory cannot be controlled by infosys
elif entry_name in ("entries", "all"):
# all==entries in this case, since there is nothing to do for the factory
glideinDescript = glideFactoryConfig.GlideinDescript()
entries = glideinDescript.data["Entries"].split(",")
for entry in entries:
config_els[entry] = {}
else:
config_els[entry_name] = {}
# load the infosys info
for entry in list(config_els.keys()):
infosys_fd = cgWDictFile.InfoSysDictFile(cgWConsts.get_entry_submit_dir(".", entry), cgWConsts.INFOSYS_FILE)
infosys_fd.load()
if len(infosys_fd.keys) == 0:
# entry not associated with any infosys, cannot be managed, ignore
del config_els[entry]
continue
compatible_infosys = False
for k in infosys_fd.keys:
infosys_type = infosys_fd[k][0]
if infosys_type in infosys_types:
compatible_infosys = True
break
if not compatible_infosys:
# entry not associated with a compatible infosys, cannot be managed, ignore
del config_els[entry]
continue
config_els[entry]["infosys_fd"] = infosys_fd
if len(list(config_els.keys())) == 0:
return 0 # nothing to do
# all the remaining entries are handled by one of the supported infosys
# summarize
infosys_data = {}
for entry in list(config_els.keys()):
infosys_fd = config_els[entry]["infosys_fd"]
for k in infosys_fd.keys:
infosys_type = infosys_fd[k][0]
server = infosys_fd[k][1]
ref = infosys_fd[k][2]
if infosys_type not in infosys_data:
infosys_data[infosys_type] = {}
infosys_data_type = infosys_data[infosys_type]
if server not in infosys_data_type:
infosys_data_type[server] = []
infosys_data_type[server].append({"ref": ref, "entry_name": entry})
# get production entries
production_entries = []
for infosys_type in list(infosys_data.keys()):
if infosys_type in infosys_types:
infosys_data_type = infosys_data[infosys_type]
for server in list(infosys_data_type.keys()):
infosys_data_server = infosys_data_type[server]
if infosys_type == "RESS":
production_entries += get_production_ress_entries(server, infosys_data_server)
else:
raise RuntimeError("Unknown infosys type '%s'" % infosys_type) # should never get here
# Use the info to put the
entry_keys = sorted(config_els.keys())
for entry in entry_keys:
if entry in production_entries:
print("%s up" % entry)
up(entry, ["up"])
else:
print("%s down" % entry)
down(entry, ["down"])
return 0
[docs]
def get_args(argv):
# defaults
opt_dict = {"comment": "", "sec": "All", "delay": "0", "end": "None", "start": "None", "frontend": "All"}
index = 0
for arg in argv:
if arg == "-factory":
opt_dict["entry"] = "factory"
if len(argv) <= index + 1:
continue
# Change lowercase all to All so checks for "All" work
if argv[index + 1].lower() == "all":
argv[index + 1] = "All"
if arg == "-cmd":
opt_dict["cmd"] = argv[index + 1]
if arg == "-dir":
opt_dict["dir"] = argv[index + 1]
if arg == "-entry":
opt_dict["entry"] = argv[index + 1]
if arg == "-comment":
opt_dict["comment"] = argv[index + 1]
if arg == "-start":
opt_dict["start"] = argv[index + 1]
if arg == "-end":
opt_dict["end"] = argv[index + 1]
if arg == "-delay":
opt_dict["delay"] = argv[index + 1]
if arg == "-ISinfo":
opt_dict["ISinfo"] = argv[index + 1]
if arg == "-security":
opt_dict["sec"] = argv[index + 1]
if arg == "-frontend":
opt_dict["frontend"] = argv[index + 1]
index = index + 1
return opt_dict
[docs]
def main(argv):
if len(argv) < 3:
usage()
return 1
# Get the command line arguments
opt_dict = get_args(argv)
mandatory_comments = False
if "GLIDEIN_MANDATORY_COMMENTS" in os.environ:
if os.environ["GLIDEIN_MANDATORY_COMMENTS"].lower() in ("on", "true", "1"):
mandatory_comments = True
if opt_dict["cmd"] in ("check", "vacuum"):
mandatory_comments = False
try:
factory_dir = opt_dict["dir"]
entry_name = opt_dict["entry"]
cmd = opt_dict["cmd"]
if mandatory_comments:
comments = opt_dict["comment"]
if comments == "":
raise KeyError
except KeyError:
usage()
print("-cmd -dir and -entry arguments are required.")
if mandatory_comments:
print("Mandatory comments are enabled. add -comment.")
return 1
if opt_dict["sec"] != "All":
if opt_dict["sec"] not in get_security_classes(factory_dir):
print("Invalid security class")
print("Valid security classes are: ")
for sec_class in get_security_classes(factory_dir):
print(sec_class)
return 1
if opt_dict["frontend"] != "All":
if opt_dict["frontend"] not in get_frontends(factory_dir):
print("Invalid frontend identity:")
print("Valid frontends are: ")
for fe in get_frontends(factory_dir):
print(fe)
return 1
try:
os.chdir(factory_dir)
except OSError as e:
usage()
print("Failed to locate factory %s" % factory_dir)
print("%s" % e)
return 1
# Verify Entry is an actual entry
if opt_dict["entry"].lower() == "entries":
opt_dict["entries"] = "true"
opt_dict["entry"] = "All"
entry_name = "All"
if (opt_dict["entry"] != "All") and (opt_dict["entry"] != "factory"):
if opt_dict["entry"] not in get_entries(factory_dir):
print("Invalid entry name")
print("Valid entries are:")
for entry in get_entries(factory_dir):
print(entry)
return 1
if cmd == "add":
return add(entry_name, opt_dict)
elif cmd == "down":
return down(entry_name, opt_dict)
elif cmd == "up":
return up(entry_name, opt_dict)
elif cmd == "check":
return printtimes(entry_name, opt_dict)
elif cmd == "ress":
return infosys_based(entry_name, opt_dict, ["RESS"])
elif cmd == "vacuum":
return vacuum(entry_name, opt_dict)
else:
usage()
print("Invalid command %s" % cmd)
return 1
if __name__ == "__main__":
sys.exit(main(sys.argv))