Source code for glideinwms.lib.rrdSupport

# SPDX-FileCopyrightText: 2009 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0

"""This module implements the basic functions needed to interface to rrdtool
"""

import os
import shutil
import tempfile
import time

from . import defaults, subprocessSupport

try:
    import rrdtool  # pylint: disable=import-error
except ImportError:
    # Will use the binary tools if the Python library is not available
    pass



[docs]
class BaseRRDSupport:
    #############################################################
    def __init__(self, rrd_obj):
        self.rrd_obj = rrd_obj


[docs]
    def isDummy(self):
        return self.rrd_obj is None


    #############################################################
    # The default will do nothing
    # Children should overwrite it, if needed

[docs]
    def get_disk_lock(self, fname):
        return dummy_disk_lock()


    #############################################################
    # The default will do nothing
    # Children should overwrite it, if needed

[docs]
    def get_graph_lock(self, fname):
        return dummy_disk_lock()


    #############################################################

[docs]
    def create_rrd(self, rrdfname, rrd_step, rrd_archives, rrd_ds):
        """
        Create a new RRD archive

        Arguments:
          rrdfname     - File path name of the RRD archive
          rrd_step     - base interval in seconds
          rrd_archives - list of tuples, each containing the following fileds (in order)
            CF    - consolidation function (usually AVERAGE)
            xff   - xfiles factor (fraction that can be unknown)
            steps - how many of these primary data points are used to build a consolidated data point
            rows  - how many generations of data values are kept
          rrd_ds       - a tuple containing the following fields (in order)
            ds-name   - attribute name
            DST       - Data Source Type (usually GAUGE)
            heartbeat - the maximum number of seconds that may pass between two updates before it becomes unknown
            min       - min value
            max       - max value

        For more details see
          http://oss.oetiker.ch/rrdtool/doc/rrdcreate.en.html
        """
        self.create_rrd_multi(rrdfname, rrd_step, rrd_archives, (rrd_ds,))
        return


    #############################################################

[docs]
    def create_rrd_multi(self, rrdfname, rrd_step, rrd_archives, rrd_ds_arr):
        """
        Create a new RRD archive

        Arguments:
          rrdfname     - File path name of the RRD archive
          rrd_step     - base interval in seconds
          rrd_archives - list of tuples, each containing the following fileds (in order)
            CF    - consolidation function (usually AVERAGE)
            xff   - xfiles factor (fraction that can be unknown)
            steps - how many of these primary data points are used to build a consolidated data point
            rows  - how many generations of data values are kept
          rrd_ds_arr   - list of tuples, each containing the following fields (in order)
            ds-name   - attribute name
            DST       - Data Source Type (usually GAUGE)
            heartbeat - the maximum number of seconds that may pass between two updates before it becomes unknown
            min       - min value
            max       - max value

        For more details see
          http://oss.oetiker.ch/rrdtool/doc/rrdcreate.en.html
        """
        if self.rrd_obj is None:
            return  # nothing to do in this case

        # make the start time to be aligned on the rrd_step boundary
        # This is needed for optimal resoultion selection
        start_time = (int(time.time() - 1) / rrd_step) * rrd_step
        # print (rrdfname,start_time,rrd_step)+rrd_ds
        args = [str(rrdfname), "-b", "%li" % start_time, "-s", "%i" % rrd_step]
        for rrd_ds in rrd_ds_arr:
            args.append("DS:%s:%s:%i:%s:%s" % rrd_ds)
        for archive in rrd_archives:
            args.append("RRA:%s:%g:%i:%i" % archive)

        lck = self.get_disk_lock(rrdfname)
        try:
            self.rrd_obj.create(*args)
        finally:
            lck.close()
        return


    #############################################################

[docs]
    def update_rrd(self, rrdfname, time, val):
        """
        Create an RRD archive with a new value

        Arguments:
          rrdfname - File path name of the RRD archive
          time     - When was the value taken
          val      - What vas the value
        """
        if self.rrd_obj is None:
            # nothing to do in this case
            return

        lck = self.get_disk_lock(rrdfname)
        try:
            self.rrd_obj.update(str(rrdfname), "%li:%s" % (time, val))
        finally:
            lck.close()

        return


    #############################################################

[docs]
    def update_rrd_multi(self, rrdfname, time, val_dict):
        """
        Create an RRD archive with a set of values (possibly all of the supported)

        Arguments:
          rrdfname - File path name of the RRD archive
          time     - When was the value taken
          val_dict - What was the value
        """
        if self.rrd_obj is None:
            return  # nothing to do in this case

        args = [str(rrdfname)]
        ds_names = sorted(val_dict.keys())

        ds_names_real = []
        ds_vals = []
        for ds_name in ds_names:
            if val_dict[ds_name] is not None:
                ds_vals.append("%s" % val_dict[ds_name])
                ds_names_real.append(ds_name)

        if len(ds_names_real) == 0:
            return

        args.append("-t")
        args.append(":".join(ds_names_real))
        args.append(("%li:" % time) + ":".join(ds_vals))

        lck = self.get_disk_lock(rrdfname)
        try:
            # print args
            self.rrd_obj.update(*args)
        finally:
            lck.close()

        return


    #############################################################

[docs]
    def rrd2graph(
        self,
        fname,
        rrd_step,
        ds_name,
        ds_type,
        start,
        end,
        width,
        height,
        title,
        rrd_files,
        cdef_arr=None,
        trend=None,
        img_format="PNG",
    ):
        """
        Create a graph file out of a set of RRD files

        Arguments:
          fname         - File path name of the graph file
          rrd_step      - Which step should I use in the RRD files
          ds_name       - Which attribute should I use in the RRD files
          ds_type       - Which type should I use in the RRD files
          start,end     - Time points in utime format
          width,height  - Size of the graph
          title         - Title to put in the graph
          rrd_files     - list of RRD files, each being a tuple of (in order)
                rrd_id      - logical name of the RRD file (will be the graph label)
                rrd_fname   - name of the RRD file
                graph_type  - Graph type (LINE, STACK, AREA)
                grpah_color - Graph color in rrdtool format
          cdef_arr      - list of derived RRD values
                          if present, only the cdefs will be plotted
                          each elsement is a tuple of (in order)
                rrd_id        - logical name of the RRD file (will be the graph label)
                cdef_formula  - Derived formula in rrdtool format
                graph_type    - Graph type (LINE, STACK, AREA)
                grpah_color   - Graph color in rrdtool format
          trend         - Trend value in seconds (if desired, None else)

        For more details see
          http://oss.oetiker.ch/rrdtool/doc/rrdcreate.en.html
        """
        if self.rrd_obj is None:
            return  # nothing to do in this case

        multi_rrd_files = []
        for rrd_file in rrd_files:
            multi_rrd_files.append((rrd_file[0], rrd_file[1], ds_name, ds_type, rrd_file[2], rrd_file[3]))
        return self.rrd2graph_multi(
            fname, rrd_step, start, end, width, height, title, multi_rrd_files, cdef_arr, trend, img_format
        )


    #############################################################

[docs]
    def rrd2graph_now(
        self,
        fname,
        rrd_step,
        ds_name,
        ds_type,
        period,
        width,
        height,
        title,
        rrd_files,
        cdef_arr=None,
        trend=None,
        img_format="PNG",
    ):
        """
        Create a graph file out of a set of RRD files

        Arguments:
          fname         - File path name of the graph file
          rrd_step      - Which step should I use in the RRD files
          ds_name       - Which attribute should I use in the RRD files
          ds_type       - Which type should I use in the RRD files
          period        - start=now-period, end=now
          width,height  - Size of the graph
          title         - Title to put in the graph
          rrd_files     - list of RRD files, each being a tuple of (in order)
                rrd_id      - logical name of the RRD file (will be the graph label)
                rrd_fname   - name of the RRD file
                graph_type  - Graph type (LINE, STACK, AREA)
                grpah_color - Graph color in rrdtool format
          cdef_arr      - list of derived RRD values
                          if present, only the cdefs will be plotted
                          each elsement is a tuple of (in order)
                rrd_id        - logical name of the RRD file (will be the graph label)
                cdef_formula  - Derived formula in rrdtool format
                graph_type    - Graph type (LINE, STACK, AREA)
                grpah_color   - Graph color in rrdtool format
          trend         - Trend value in seconds (if desired, None else)

        For more details see
          http://oss.oetiker.ch/rrdtool/doc/rrdcreate.en.html
        """
        now = int(time.time())
        start = ((now - period) / rrd_step) * rrd_step
        end = ((now - 1) / rrd_step) * rrd_step
        return self.rrd2graph(
            fname, rrd_step, ds_name, ds_type, start, end, width, height, title, rrd_files, cdef_arr, trend, img_format
        )


    #############################################################

[docs]
    def rrd2graph_multi(
        self, fname, rrd_step, start, end, width, height, title, rrd_files, cdef_arr=None, trend=None, img_format="PNG"
    ):
        """
        Create a graph file out of a set of RRD files

        Arguments:
          fname         - File path name of the graph file
          rrd_step      - Which step should I use in the RRD files
          start,end     - Time points in utime format
          width,height  - Size of the graph
          title         - Title to put in the graph
          rrd_files     - list of RRD files, each being a tuple of (in order)
                rrd_id      - logical name of the RRD file (will be the graph label)
                rrd_fname   - name of the RRD file
                ds_name     - Which attribute should I use in the RRD files
                ds_type     - Which type should I use in the RRD files
                graph_type  - Graph type (LINE, STACK, AREA)
                graph_color - Graph color in rrdtool format
          cdef_arr      - list of derived RRD values
                          if present, only the cdefs will be plotted
                          each elsement is a tuple of (in order)
                rrd_id        - logical name of the RRD file (will be the graph label)
                cdef_formula  - Derived formula in rrdtool format
                graph_type    - Graph type (LINE, STACK, AREA)
                grpah_color   - Graph color in rrdtool format
          trend         - Trend value in seconds (if desired, None else)
          img_format    - format of the graph file (default PNG)

        For more details see
          http://oss.oetiker.ch/rrdtool/doc/rrdcreate.en.html
        """
        if self.rrd_obj is None:
            return  # nothing to do in this case

        args = [
            str(fname),
            "-s",
            "%li" % start,
            "-e",
            "%li" % end,
            "--step",
            "%i" % rrd_step,
            "-l",
            "0",
            "-w",
            "%i" % width,
            "-h",
            "%i" % height,
            "--imgformat",
            str(img_format),
            "--title",
            str(title),
        ]
        for rrd_file in rrd_files:
            ds_id = rrd_file[0]
            ds_fname = rrd_file[1]
            ds_name = rrd_file[2]
            ds_type = rrd_file[3]
            if trend is None:
                args.append(str(f"DEF:{ds_id}={ds_fname}:{ds_name}:{ds_type}"))
            else:
                args.append(str(f"DEF:{ds_id}_inst={ds_fname}:{ds_name}:{ds_type}"))
                args.append(str("CDEF:%s=%s_inst,%i,TREND" % (ds_id, ds_id, trend)))

        plot_arr = rrd_files
        if cdef_arr is not None:
            # plot the cdefs not the files themselves, when we have them
            plot_arr = cdef_arr

            for cdef_el in cdef_arr:
                ds_id = cdef_el[0]
                cdef_formula = cdef_el[1]
                ds_graph_type = rrd_file[2]
                ds_color = rrd_file[3]
                args.append(str(f"CDEF:{ds_id}={cdef_formula}"))
        else:
            plot_arr = []
            for rrd_file in rrd_files:
                plot_arr.append((rrd_file[0], None, rrd_file[4], rrd_file[5]))

        if plot_arr[0][2] == "STACK":
            # add an invisible baseline to stack upon
            args.append("AREA:0")

        for plot_el in plot_arr:
            ds_id = plot_el[0]
            ds_graph_type = plot_el[2]
            ds_color = plot_el[3]
            args.append(f"{ds_graph_type}:{ds_id}#{ds_color}:{ds_id}")

        args.append("COMMENT:Created on %s" % time.strftime(r"%b %d %H\:%M\:%S %Z %Y"))

        try:
            lck = self.get_graph_lock(fname)
            try:
                self.rrd_obj.graph(*args)
            finally:
                lck.close()
        except Exception:
            print("Failed graph: %s" % str(args))

        return args


    #############################################################

[docs]
    def rrd2graph_multi_now(
        self, fname, rrd_step, period, width, height, title, rrd_files, cdef_arr=None, trend=None, img_format="PNG"
    ):
        """
        Create a graph file out of a set of RRD files

        Arguments:
          fname         - File path name of the graph file
          rrd_step      - Which step should I use in the RRD files
          period        - start=now-period, end=now
          width,height  - Size of the graph
          title         - Title to put in the graph
          rrd_files     - list of RRD files, each being a tuple of (in order)
                rrd_id      - logical name of the RRD file (will be the graph label)
                rrd_fname   - name of the RRD file
                ds_name     - Which attribute should I use in the RRD files
                ds_type     - Which type should I use in the RRD files
                graph_type  - Graph type (LINE, STACK, AREA)
                graph_color - Graph color in rrdtool format
          cdef_arr      - list of derived RRD values
                          if present, only the cdefs will be plotted
                          each elsement is a tuple of (in order)
                rrd_id        - logical name of the RRD file (will be the graph label)
                cdef_formula  - Derived formula in rrdtool format
                graph_type    - Graph type (LINE, STACK, AREA)
                grpah_color   - Graph color in rrdtool format
          trend         - Trend value in seconds (if desired, None else)
          img_format    - format of the graph file (default PNG)

        For more details see
          http://oss.oetiker.ch/rrdtool/doc/rrdcreate.en.html
        """
        now = int(time.time())
        start = ((now - period) / rrd_step) * rrd_step
        end = ((now - 1) / rrd_step) * rrd_step
        return self.rrd2graph_multi(
            fname, rrd_step, start, end, width, height, title, rrd_files, cdef_arr, trend, img_format
        )


    ###################################################

[docs]
    def fetch_rrd(self, filename, CF, resolution=None, start=None, end=None, daemon=None):
        """
        Fetch will analyze the RRD and try to retrieve the data in the
        resolution requested.

        Arguments:
          filename      -the name of the RRD you want to fetch data from
          CF            -the consolidation function that is applied to the data
                         you want to fetch (AVERAGE, MIN, MAX, LAST)
          resolution    -the interval you want your values to have
                         (default 300 sec)
          start         -start of the time series (default end - 1day)
          end           -end of the time series (default now)
          daemon        -Address of the rrdcached daemon. If specified, a flush
                         command is sent to the server before reading the RRD
                         files. This allows rrdtool to return fresh data even
                         if the daemon is configured to cache values for a long
                         time.

        For more details see
          http://oss.oetiker.ch/rrdtool/doc/rrdcreate.en.html
        """
        if self.rrd_obj is None:
            return  # nothing to do in this case

        if CF in ("AVERAGE", "MIN", "MAX", "LAST"):
            consolFunc = str(CF)
        else:
            raise RuntimeError("Invalid consolidation function %s" % CF)
        args = [str(filename), consolFunc]
        if resolution is not None:
            args.append("-r")
            args.append(str(resolution))
        if end is not None:
            args.append("-e")
            args.append(str(end))
        if start is not None:
            args.append("-s")
            args.append(str(start))
        if daemon is not None:
            args.append("--daemon")
            args.append(str(daemon))

        if os.path.exists(filename):
            try:
                return self.rrd_obj.fetch(*args)
            except Exception as e:
                raise RuntimeError("Error when running rrdtool.fetch") from e
        else:
            raise RuntimeError(f"RRD file '{filename}' does not exist. Failing fetch_rrd.")



[docs]
    def verify_rrd(self, filename, expected_dict):
        """
        Verifies that an rrd matches a dictionary of datastores.
        This will return a tuple of arrays ([missing],[extra]) attributes

        @param filename: filename of the rrd to verify
        @param expected_dict: dictionary of expected values
        @return: A two-tuple of arrays ([missing attrs],[extra attrs])

        """
        rrd_info = self.rrd_obj.info(filename)
        rrd_dict = {}
        for key in list(rrd_info.keys()):
            # rrdtool 1.3
            if key[:3] == "ds[":
                rrd_dict[key[3:].split("]")[0]] = None
            # rrdtool 1.2
            if key == "ds":
                for dskey in list(rrd_info[key].keys()):
                    rrd_dict[dskey] = None
        missing = []
        extra = []
        for t in list(expected_dict.keys()):
            if t not in list(rrd_dict.keys()):
                missing.append(t)
        for t in list(rrd_dict.keys()):
            if t not in list(expected_dict.keys()):
                extra.append(t)
        return (missing, extra)




# This class uses the rrdtool module for rrd_obj

[docs]
class ModuleRRDSupport(BaseRRDSupport):
    def __init__(self):
        BaseRRDSupport.__init__(self, rrdtool)



# This class uses rrdtool cmdline for rrd_obj

[docs]
class ExeRRDSupport(BaseRRDSupport):
    def __init__(self):
        BaseRRDSupport.__init__(self, rrdtool_exe())



# This class tries to use the rrdtool module for rrd_obj
# then tries the rrdtool cmdline
# will use None if needed

[docs]
class rrdSupport(BaseRRDSupport):
    def __init__(self):
        try:
            rrd_obj = rrdtool
        except NameError:
            try:
                rrd_obj = rrdtool_exe()
            except Exception:
                rrd_obj = None
        BaseRRDSupport.__init__(self, rrd_obj)



##################################################################
# INTERNAL, do not use directly
##################################################################



[docs]
class DummyDiskLock:
    """Dummy, do nothing. Used just to get a object"""


[docs]
    def close(self):
        return





[docs]
def dummy_disk_lock():
    return DummyDiskLock()



#################################

[docs]
def string_quote_join(arglist):
    l2 = []
    for e in arglist:
        l2.append('"%s"' % e)
    return " ".join(l2)




[docs]
class rrdtool_exe:
    """This class is a wrapper around the rrdtool client (binary) and
    is used in place of the rrdtool python module, if that one is not available

    It provides also extra functions:
    dump: returns an array of lines with the content instead of saving the RRD in an XML file
    restore: allows the restore of a DB
    """

    def __init__(self):
        self.rrd_bin = (subprocessSupport.iexe_cmd("which rrdtool").split("\n")[0]).strip()


[docs]
    def create(self, *args):
        cmdline = f"{self.rrd_bin} create {string_quote_join(args)}"
        outstr = subprocessSupport.iexe_cmd(cmdline)  # noqa: F841
        return



[docs]
    def update(self, *args):
        cmdline = f"{self.rrd_bin} update {string_quote_join(args)}"
        outstr = subprocessSupport.iexe_cmd(cmdline)  # noqa: F841
        return



[docs]
    def info(self, *args):
        cmdline = f"{self.rrd_bin} info {string_quote_join(args)}"
        outstr = subprocessSupport.iexe_cmd(cmdline).split("\n")
        outarr = {}
        for line in outstr:
            if "=" in line:
                linearr = line.split("=")
                outarr[linearr[0].strip()] = linearr[1].strip()
        return outarr



[docs]
    def dump(self, *args):
        """Run rrd_tool dump

        Input is usually just the file name.
        Output is a list of lines, as returned from rrdtool dump.
        This is different from the `dump` method provided by the `rrdtool` package (Python binding)
        which outputs to a file or stdout

        Args:
            *args: rrdtool dump arguments, joined in single string for the command line

        Returns:
            str: multi-line string, output of rrd dump

        """
        cmdline = f"{self.rrd_bin} dump {string_quote_join(args)}"
        outstr = subprocessSupport.iexe_cmd(cmdline).split("\n")
        return outstr



[docs]
    def restore(self, *args):
        cmdline = f"{self.rrd_bin} restore {string_quote_join(args)}"
        outstr = subprocessSupport.iexe_cmd(cmdline)  # noqa: F841
        return



[docs]
    def graph(self, *args):
        cmdline = f"{self.rrd_bin} graph {string_quote_join(args)}"
        outstr = subprocessSupport.iexe_cmd(cmdline)  # noqa: F841
        return



[docs]
    def fetch(self, *args):
        cmdline = f"{self.rrd_bin} fetch {string_quote_join(args)}"
        outstr = subprocessSupport.iexe_cmd(cmdline).split("\n")
        headers = tuple(outstr.pop(0).split())
        lines = []
        for line in outstr:
            if len(line) == 0:
                continue
            lines.append(tuple(float(i) if i != "-nan" else None for i in line.split()[1:]))
        tstep = int(outstr[2].split(":")[0]) - int(outstr[1].split(":")[0])
        ftime = int(outstr[1].split(":")[0]) - tstep
        ltime = int(outstr[-2].split(":")[0])
        times = (ftime, ltime, tstep)
        outtup = (times, headers, lines)
        return outtup





[docs]
def addDataStore(filenamein, filenameout, attrlist):
    """Add a list of data stores to a rrd export file
    This will essentially add attributes to the end of a rrd row

    @param filenamein: filename path of a rrd exported with rrdtool dump
    @param filenameout: filename path of output xml with datastores added
    @param attrlist: array of datastores to add
    """
    f = open(filenamein)
    out = open(filenameout, "w")
    parse = False
    writenDS = False
    for line in f:
        if ("<rra>" in line) and (not writenDS):
            for a in attrlist:
                out.write("<ds>\n")
                out.write("<name> %s </name>\n" % a)
                out.write("<type> GAUGE </type>\n")
                out.write("<minimal_heartbeat> 1800 </minimal_heartbeat>\n")
                out.write("<min> NaN </min>\n")
                out.write("<max> NaN </max>\n")
                out.write("<!-- PDP Status -->\n")
                out.write("<last_ds> UNKN </last_ds>\n")
                out.write("<value> 0 </value>\n")
                out.write("<unknown_sec> 0 </unknown_sec>\n")
                out.write("</ds>\n")
            writenDS = True
        if "</cdp_prep>" in line:
            for a in attrlist:
                out.write("<ds><value> NaN </value>\n")
                out.write("<unknown_datapoints> 0 </unknown_datapoints></ds>\n")
        if "</database>" in line:
            parse = False
        if parse:
            out.write(line[:-7])
            for a in attrlist:
                out.write("<v> NaN </v>")
            out.write(line[-7:])
        else:
            out.write(line)
        if "<database>" in line:
            parse = True



# Function used by verifyRRD (in Factory and Frontend), invoked during reconfig/upgrade
# No logging available, output is to stdout/err

[docs]
def verifyHelper(filename, data_dict, fix_rrd=False, backup=True):
    """Helper function for verifyRRD.
    Checks one file, prints out errors.
    if fix_rrd, will attempt to dump out rrd to xml, add the missing attributes, then restore.
    Original file is backed up with time stamp if backup is True, obliterated otherwise.

    Args:
        filename(str): filename of rrd to check
        data_dict(dict): expected dictionary
        fix_rrd(bool): if True, will attempt to add missing attrs
        backup(bool): if not True skip the backup of original rrd

    Returns:
        bool: True if there were some problem with the RRD file, False if all OK

    """
    rrd_problems_found = False
    if not os.path.exists(filename):
        print(f"WARNING: {filename} missing, will be created on restart")
        return
    rrd_obj = rrdSupport()
    (missing, extra) = rrd_obj.verify_rrd(filename, data_dict)
    for attr in extra:
        print(f"ERROR: {filename} has extra attribute {attr}")
        if fix_rrd:
            print("ERROR: fix_rrd cannot fix extra attributes")
    if not fix_rrd:
        for attr in missing:
            print(f"ERROR: {filename} missing attribute {attr}")
        if len(missing) > 0:
            rrd_problems_found = True
    if fix_rrd and (len(missing) > 0):
        (f, tempfilename) = tempfile.mkstemp()
        (out, tempfilename2) = tempfile.mkstemp()
        (restored, restoredfilename) = tempfile.mkstemp()
        os.close(out)
        os.close(restored)
        os.unlink(restoredfilename)
        # Use exe version since dump, restore not available in rrdtool
        dump_obj = rrdtool_exe()
        outstr = dump_obj.dump(filename)
        for line in outstr:
            # dump is returning an array of strings decoded w/ utf-8
            os.write(f, f"{line}\n".encode(defaults.BINARY_ENCODING_DEFAULT))
        os.close(f)
        if backup:
            backup_str = str(int(time.time())) + ".backup"
            print(f"Fixing {filename}... (backed up to {filename + backup_str})")
            # Move file to back up location
            shutil.move(filename, filename + backup_str)
        else:
            print(f"Fixing {filename}... (no back up)")
            os.unlink(filename)
        addDataStore(tempfilename, tempfilename2, missing)
        dump_obj.restore(tempfilename2, restoredfilename)
        os.unlink(tempfilename)
        os.unlink(tempfilename2)
        shutil.move(restoredfilename, filename)
    if len(extra) > 0:
        rrd_problems_found = True
    return rrd_problems_found