Source code for glideinwms.factory.tools.lib.gWftLogParser

# SPDX-FileCopyrightText: 2009 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0

#
# Project:
#   glideinWMS
#
# File Version:
#
# Description:
#   factory/tool specific condorLogs helper
#

import binascii
import gzip
import io
import mmap
import os.path
import re
import time

from glideinwms.factory import glideFactoryLogParser
from glideinwms.lib import condorLogParser
from glideinwms.lib.defaults import BINARY_ENCODING, force_bytes


# get the list of jobs that were active at a certain time
[docs] def get_glideins(log_dir_name, date_arr, time_arr): glidein_list = [] cldata = glideFactoryLogParser.dirSummaryTimingsOutFull(log_dir_name, cache_dir=None) cldata.load(active_only=False) glidein_data = cldata.data["Completed"] # I am interested only in the completed ones ref_ctime = time.mktime(date_arr + time_arr + (0, 0, -1)) for glidein_el in glidein_data: glidein_id, fistTimeStr, runningStartTimeStr, lastTimeStr = glidein_el runningStartTime = condorLogParser.rawTime2cTimeLastYear(runningStartTimeStr) if runningStartTime > ref_ctime: continue # not one of them, started after lastTime = condorLogParser.rawTime2cTimeLastYear(lastTimeStr) if lastTime < ref_ctime: continue # not one of them, ended before glidein_list.append(glidein_id) return glidein_list
# get the list of log files for an entry that were active at a certain time
[docs] def get_glidein_logs_entry(factory_dir, entry, date_arr, time_arr, ext="err"): log_list = [] log_dir_name = os.path.join(factory_dir, "entry_%s/log" % entry) glidein_list = get_glideins(log_dir_name, date_arr, time_arr) for glidein_id in glidein_list: glidein_log_file = "job.%i.%i." % condorLogParser.rawJobId2Nr(glidein_id) glidein_log_file += ext glidein_log_filepath = os.path.join(log_dir_name, glidein_log_file) if os.path.exists(glidein_log_filepath): log_list.append(glidein_log_filepath) return log_list
# get the list of log files for an entry that were active at a certain time
[docs] def get_glidein_logs(factory_dir, entries, date_arr, time_arr, ext="err"): log_list = [] for entry in entries: entry_log_list = get_glidein_logs_entry(factory_dir, entry, date_arr, time_arr, ext) log_list += entry_log_list return log_list
# extract the blob from a glidein log file starting from position
[docs] def get_Compressed_raw(log_fname, start_str, start_pos=0): SL_START_RE = re.compile(b"%s\nbegin-base64 644 -\n" % force_bytes(start_str, BINARY_ENCODING), re.M | re.DOTALL) size = os.path.getsize(log_fname) if size == 0: return "" # mmap would fail... and I know I will not find anything anyhow with open(log_fname) as fd: buf = mmap.mmap(fd.fileno(), size, access=mmap.ACCESS_READ) try: # first find the header that delimits the log in the file start_re = SL_START_RE.search(buf, 0) if start_re is None: return "" # no StartLog section log_start_idx = start_re.end() # find where it ends log_end_idx = buf.find(b"\n====", log_start_idx) if log_end_idx < 0: # up to the end of the file return buf[log_start_idx:].decode(BINARY_ENCODING) else: return buf[log_start_idx:log_end_idx].decode(BINARY_ENCODING) finally: buf.close()
# extract the blob from a glidein log file
[docs] def get_Compressed(log_fname, start_str): raw_data = get_Compressed_raw(log_fname, start_str) if raw_data != "": gzip_data = binascii.a2b_base64(raw_data) del raw_data data_fd = gzip.GzipFile(fileobj=io.BytesIO(gzip_data)) data = data_fd.read().decode(BINARY_ENCODING) else: data = raw_data return data
# extract the blob from a glidein log file
[docs] def get_Simple(log_fname, start_str, end_str): SL_START_RE = re.compile(force_bytes(start_str, BINARY_ENCODING) + b"\n", re.M | re.DOTALL) SL_END_RE = re.compile(end_str, re.M | re.DOTALL) size = os.path.getsize(log_fname) if size == 0: return "" # mmap would fail... and I know I will not find anything anyhow with open(log_fname) as fd: buf = mmap.mmap(fd.fileno(), size, access=mmap.ACCESS_READ) try: # first find the header that delimits the log in the file start_re = SL_START_RE.search(buf, 0) if start_re is None: return "" # no StartLog section log_start_idx = start_re.end() # find where it ends log_end_idx = SL_END_RE.search(buf, log_start_idx) if log_end_idx is None: # up to the end of the file return buf[log_start_idx:].decode(BINARY_ENCODING) else: return buf[log_start_idx : log_end_idx.start()].decode(BINARY_ENCODING) finally: buf.close()
# extract the Condor Log from a glidein log file # condor_log_id should be something like "StartdLog"
[docs] def get_CondorLog(log_fname, condor_log_id): start_str = "^%s\n======== gzip . uuencode =============" % condor_log_id return get_Compressed(log_fname, start_str)
# extract the XML Result from a glidein log file
[docs] def get_XMLResult(log_fname): start_str = "^=== Encoded XML description of glidein activity ===" s = get_Compressed(log_fname, start_str) if s != "": return s # not found, try the uncompressed version start_str = "^=== XML description of glidein activity ===" end_str = "^=== End XML description of glidein activity ===" return get_Simple(log_fname, start_str, end_str)
# extract slot names
[docs] def get_StarterSlotNames(log_fname, condor_log_id="(StarterLog.slot[0-9]*[_]*[0-9]*)"): start_str = "^%s\n======== gzip . uuencode =============" % condor_log_id SL_START_RE = re.compile(b"%s\nbegin-base64 644 -\n" % force_bytes(start_str, BINARY_ENCODING), re.M | re.DOTALL) size = os.path.getsize(log_fname) if size == 0: return "" # mmap would fail... and I know I will not find anything anyhow with open(log_fname) as fd: buf = mmap.mmap(fd.fileno(), size, access=mmap.ACCESS_READ) try: strings = [s.decode(BINARY_ENCODING) for s in SL_START_RE.findall(buf, 0)] return strings finally: buf.close()